6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
Summary: Use SSE4.2 and AVX2 instructions for encodeArray intrinsic.
Reviewed-by: roland
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Jan 22 15:34:16 2013 -0800
@@ -2263,6 +2263,18 @@
emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
}
+void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256) {
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector256);
+ emit_int8(0x00);
+ emit_int8(0xC0 | encode);
+ emit_int8(imm8);
+}
+
void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_2(), "");
InstructionMark im(this);
@@ -2475,7 +2487,7 @@
assert(dst != xnoreg, "sanity");
int dst_enc = dst->encoding();
// swap src<->dst for encoding
- vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+ vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
emit_int8(0x17);
emit_operand(dst, src);
}
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Tue Jan 22 15:34:16 2013 -0800
@@ -1395,6 +1395,10 @@
// Pack with unsigned saturation
void packuswb(XMMRegister dst, XMMRegister src);
void packuswb(XMMRegister dst, Address src);
+ void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+
+ // Pemutation of 64bit words
+ void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
// SSE4.2 string instructions
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jan 22 15:34:16 2013 -0800
@@ -6209,6 +6209,128 @@
}
BIND(L_exit);
}
+
+// encode char[] to byte[] in ISO_8859_1
+void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
+ XMMRegister tmp1Reg, XMMRegister tmp2Reg,
+ XMMRegister tmp3Reg, XMMRegister tmp4Reg,
+ Register tmp5, Register result) {
+ // rsi: src
+ // rdi: dst
+ // rdx: len
+ // rcx: tmp5
+ // rax: result
+ ShortBranchVerifier sbv(this);
+ assert_different_registers(src, dst, len, tmp5, result);
+ Label L_done, L_copy_1_char, L_copy_1_char_exit;
+
+ // set result
+ xorl(result, result);
+ // check for zero length
+ testl(len, len);
+ jcc(Assembler::zero, L_done);
+ movl(result, len);
+
+ // Setup pointers
+ lea(src, Address(src, len, Address::times_2)); // char[]
+ lea(dst, Address(dst, len, Address::times_1)); // byte[]
+ negptr(len);
+
+ if (UseSSE42Intrinsics || UseAVX >= 2) {
+ Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit;
+ Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit;
+
+ if (UseAVX >= 2) {
+ Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
+ movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector
+ movdl(tmp1Reg, tmp5);
+ vpbroadcastd(tmp1Reg, tmp1Reg);
+ jmpb(L_chars_32_check);
+
+ bind(L_copy_32_chars);
+ vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64));
+ vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32));
+ vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true);
+ vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
+ jccb(Assembler::notZero, L_copy_32_chars_exit);
+ vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true);
+ vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true);
+ vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg);
+
+ bind(L_chars_32_check);
+ addptr(len, 32);
+ jccb(Assembler::lessEqual, L_copy_32_chars);
+
+ bind(L_copy_32_chars_exit);
+ subptr(len, 16);
+ jccb(Assembler::greater, L_copy_16_chars_exit);
+
+ } else if (UseSSE42Intrinsics) {
+ movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector
+ movdl(tmp1Reg, tmp5);
+ pshufd(tmp1Reg, tmp1Reg, 0);
+ jmpb(L_chars_16_check);
+ }
+
+ bind(L_copy_16_chars);
+ if (UseAVX >= 2) {
+ vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32));
+ vptest(tmp2Reg, tmp1Reg);
+ jccb(Assembler::notZero, L_copy_16_chars_exit);
+ vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true);
+ vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true);
+ } else {
+ if (UseAVX > 0) {
+ movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
+ movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
+ vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false);
+ } else {
+ movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
+ por(tmp2Reg, tmp3Reg);
+ movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
+ por(tmp2Reg, tmp4Reg);
+ }
+ ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
+ jccb(Assembler::notZero, L_copy_16_chars_exit);
+ packuswb(tmp3Reg, tmp4Reg);
+ }
+ movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg);
+
+ bind(L_chars_16_check);
+ addptr(len, 16);
+ jccb(Assembler::lessEqual, L_copy_16_chars);
+
+ bind(L_copy_16_chars_exit);
+ subptr(len, 8);
+ jccb(Assembler::greater, L_copy_8_chars_exit);
+
+ bind(L_copy_8_chars);
+ movdqu(tmp3Reg, Address(src, len, Address::times_2, -16));
+ ptest(tmp3Reg, tmp1Reg);
+ jccb(Assembler::notZero, L_copy_8_chars_exit);
+ packuswb(tmp3Reg, tmp1Reg);
+ movq(Address(dst, len, Address::times_1, -8), tmp3Reg);
+ addptr(len, 8);
+ jccb(Assembler::lessEqual, L_copy_8_chars);
+
+ bind(L_copy_8_chars_exit);
+ subptr(len, 8);
+ jccb(Assembler::zero, L_done);
+ }
+
+ bind(L_copy_1_char);
+ load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0));
+ testl(tmp5, 0xff00); // check if Unicode char
+ jccb(Assembler::notZero, L_copy_1_char_exit);
+ movb(Address(dst, len, Address::times_1, 0), tmp5);
+ addptr(len, 1);
+ jccb(Assembler::less, L_copy_1_char);
+
+ bind(L_copy_1_char_exit);
+ addptr(result, len); // len is negative count of not processed elements
+ bind(L_done);
+}
+
#undef BIND
#undef BLOCK_COMMENT
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jan 22 15:34:16 2013 -0800
@@ -1135,6 +1135,10 @@
Register to, Register value, Register count,
Register rtmp, XMMRegister xtmp);
+ void encode_iso_array(Register src, Register dst, Register len,
+ XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
+ XMMRegister tmp4, Register tmp5, Register result);
+
#undef VIRTUAL
};
--- a/hotspot/src/cpu/x86/vm/x86_32.ad Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad Tue Jan 22 15:34:16 2013 -0800
@@ -11687,6 +11687,23 @@
ins_pipe( pipe_slow );
%}
+// encode char[] to byte[] in ISO_8859_1
+instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
+ regD tmp1, regD tmp2, regD tmp3, regD tmp4,
+ eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
+ match(Set result (EncodeISOArray src (Binary dst len)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
+
+ format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
+ ins_encode %{
+ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
+ $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+
//----------Control Flow Instructions------------------------------------------
// Signed compare Instructions
instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
--- a/hotspot/src/cpu/x86/vm/x86_64.ad Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad Tue Jan 22 15:34:16 2013 -0800
@@ -10495,6 +10495,23 @@
ins_pipe( pipe_slow );
%}
+// encode char[] to byte[] in ISO_8859_1
+instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
+ regD tmp1, regD tmp2, regD tmp3, regD tmp4,
+ rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
+ match(Set result (EncodeISOArray src (Binary dst len)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
+
+ format %{ "Encode array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
+ ins_encode %{
+ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
+ $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+
//----------Control Flow Instructions------------------------------------------
// Signed compare Instructions
--- a/hotspot/src/share/vm/adlc/formssel.cpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/adlc/formssel.cpp Tue Jan 22 15:34:16 2013 -0800
@@ -862,8 +862,10 @@
( strcmp(_matrule->_rChild->_opType,"AryEq" )==0 ||
strcmp(_matrule->_rChild->_opType,"StrComp" )==0 ||
strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 ||
- strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 )) {
+ strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 ||
+ strcmp(_matrule->_rChild->_opType,"EncodeISOArray")==0)) {
// String.(compareTo/equals/indexOf) and Arrays.equals
+ // and sun.nio.cs.iso8859_1$Encoder.EncodeISOArray
// take 1 control and 1 memory edges.
return 2;
}
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Tue Jan 22 15:34:16 2013 -0800
@@ -735,6 +735,11 @@
do_intrinsic(_checkIndex, java_nio_Buffer, checkIndex_name, int_int_signature, F_R) \
do_name( checkIndex_name, "checkIndex") \
\
+ do_class(sun_nio_cs_iso8859_1_Encoder, "sun/nio/cs/ISO_8859_1$Encoder") \
+ do_intrinsic(_encodeISOArray, sun_nio_cs_iso8859_1_Encoder, encodeISOArray_name, encodeISOArray_signature, F_S) \
+ do_name( encodeISOArray_name, "encodeISOArray") \
+ do_signature(encodeISOArray_signature, "([CI[BII)I") \
+ \
/* java/lang/ref/Reference */ \
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
\
--- a/hotspot/src/share/vm/opto/c2_globals.hpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp Tue Jan 22 15:34:16 2013 -0800
@@ -516,6 +516,9 @@
develop(bool, SpecialArraysEquals, true, \
"special version of Arrays.equals(char[],char[])") \
\
+ product(bool, SpecialEncodeISOArray, true, \
+ "special version of ISO_8859_1$Encoder.encodeISOArray") \
+ \
develop(bool, BailoutToInterpreterForThrows, false, \
"Compiled methods which throws/catches exceptions will be " \
"deopt and intp.") \
--- a/hotspot/src/share/vm/opto/classes.hpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/opto/classes.hpp Tue Jan 22 15:34:16 2013 -0800
@@ -127,6 +127,7 @@
macro(DivMod)
macro(DivModI)
macro(DivModL)
+macro(EncodeISOArray)
macro(EncodeP)
macro(EncodePKlass)
macro(ExpD)
--- a/hotspot/src/share/vm/opto/escape.cpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/opto/escape.cpp Tue Jan 22 15:34:16 2013 -0800
@@ -523,7 +523,8 @@
case Op_AryEq:
case Op_StrComp:
case Op_StrEquals:
- case Op_StrIndexOf: {
+ case Op_StrIndexOf:
+ case Op_EncodeISOArray: {
add_local_var(n, PointsToNode::ArgEscape);
delayed_worklist->push(n); // Process it later.
break;
@@ -701,7 +702,8 @@
case Op_AryEq:
case Op_StrComp:
case Op_StrEquals:
- case Op_StrIndexOf: {
+ case Op_StrIndexOf:
+ case Op_EncodeISOArray: {
// char[] arrays passed to string intrinsic do not escape but
// they are not scalar replaceable. Adjust escape state for them.
// Start from in(2) edge since in(1) is memory edge.
@@ -2581,15 +2583,22 @@
}
// Otherwise skip it (the call updated 'result' value).
} else if (result->Opcode() == Op_SCMemProj) {
- assert(result->in(0)->is_LoadStore(), "sanity");
- const Type *at = igvn->type(result->in(0)->in(MemNode::Address));
+ Node* mem = result->in(0);
+ Node* adr = NULL;
+ if (mem->is_LoadStore()) {
+ adr = mem->in(MemNode::Address);
+ } else {
+ assert(mem->Opcode() == Op_EncodeISOArray, "sanity");
+ adr = mem->in(3); // Memory edge corresponds to destination array
+ }
+ const Type *at = igvn->type(adr);
if (at != Type::TOP) {
assert (at->isa_ptr() != NULL, "pointer type required.");
int idx = C->get_alias_index(at->is_ptr());
assert(idx != alias_idx, "Object is not scalar replaceable if a LoadStore node access its field");
break;
}
- result = result->in(0)->in(MemNode::Memory);
+ result = mem->in(MemNode::Memory);
}
}
if (result->is_Phi()) {
@@ -2927,6 +2936,11 @@
if (m->is_MergeMem()) {
assert(_mergemem_worklist.contains(m->as_MergeMem()), "EA: missing MergeMem node in the worklist");
}
+ } else if (use->Opcode() == Op_EncodeISOArray) {
+ if (use->in(MemNode::Memory) == n || use->in(3) == n) {
+ // EncodeISOArray overwrites destination array
+ memnode_worklist.append_if_missing(use);
+ }
} else {
uint op = use->Opcode();
if (!(op == Op_CmpP || op == Op_Conv2B ||
@@ -2962,6 +2976,16 @@
n = n->as_MemBar()->proj_out(TypeFunc::Memory);
if (n == NULL)
continue;
+ } else if (n->Opcode() == Op_EncodeISOArray) {
+ // get the memory projection
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node *use = n->fast_out(i);
+ if (use->Opcode() == Op_SCMemProj) {
+ n = use;
+ break;
+ }
+ }
+ assert(n->Opcode() == Op_SCMemProj, "memory projection required");
} else {
assert(n->is_Mem(), "memory node required.");
Node *addr = n->in(MemNode::Address);
@@ -2999,7 +3023,7 @@
Node *use = n->fast_out(i);
if (use->is_Phi() || use->is_ClearArray()) {
memnode_worklist.append_if_missing(use);
- } else if(use->is_Mem() && use->in(MemNode::Memory) == n) {
+ } else if (use->is_Mem() && use->in(MemNode::Memory) == n) {
if (use->Opcode() == Op_StoreCM) // Ignore cardmark stores
continue;
memnode_worklist.append_if_missing(use);
@@ -3010,6 +3034,11 @@
assert(use->in(MemNode::Memory) != n, "EA: missing memory path");
} else if (use->is_MergeMem()) {
assert(_mergemem_worklist.contains(use->as_MergeMem()), "EA: missing MergeMem node in the worklist");
+ } else if (use->Opcode() == Op_EncodeISOArray) {
+ if (use->in(MemNode::Memory) == n || use->in(3) == n) {
+ // EncodeISOArray overwrites destination array
+ memnode_worklist.append_if_missing(use);
+ }
} else {
uint op = use->Opcode();
if (!(op == Op_StoreCM ||
--- a/hotspot/src/share/vm/opto/lcm.cpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/opto/lcm.cpp Tue Jan 22 15:34:16 2013 -0800
@@ -175,6 +175,7 @@
case Op_StrEquals:
case Op_StrIndexOf:
case Op_AryEq:
+ case Op_EncodeISOArray:
// Not a legit memory op for implicit null check regardless of
// embedded loads
continue;
--- a/hotspot/src/share/vm/opto/library_call.cpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/opto/library_call.cpp Tue Jan 22 15:34:16 2013 -0800
@@ -290,6 +290,7 @@
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
+ bool inline_encodeISOArray();
};
@@ -381,6 +382,10 @@
// These also use the arraycopy intrinsic mechanism:
if (!InlineArrayCopy) return NULL;
break;
+ case vmIntrinsics::_encodeISOArray:
+ if (!SpecialEncodeISOArray) return NULL;
+ if (!Matcher::match_rule_supported(Op_EncodeISOArray)) return NULL;
+ break;
case vmIntrinsics::_checkIndex:
// We do not intrinsify this. The optimizer does fine with it.
return NULL;
@@ -799,6 +804,9 @@
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
+ case vmIntrinsics::_encodeISOArray:
+ return inline_encodeISOArray();
+
default:
// If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here.
@@ -5368,6 +5376,47 @@
src_start, dest_start, copy_length XTOP);
}
+//-------------inline_encodeISOArray-----------------------------------
+// encode char[] to byte[] in ISO_8859_1
+bool LibraryCallKit::inline_encodeISOArray() {
+ assert(callee()->signature()->size() == 5, "encodeISOArray has 5 parameters");
+ // no receiver since it is static method
+ Node *src = argument(0);
+ Node *src_offset = argument(1);
+ Node *dst = argument(2);
+ Node *dst_offset = argument(3);
+ Node *length = argument(4);
+
+ const Type* src_type = src->Value(&_gvn);
+ const Type* dst_type = dst->Value(&_gvn);
+ const TypeAryPtr* top_src = src_type->isa_aryptr();
+ const TypeAryPtr* top_dest = dst_type->isa_aryptr();
+ if (top_src == NULL || top_src->klass() == NULL ||
+ top_dest == NULL || top_dest->klass() == NULL) {
+ // failed array check
+ return false;
+ }
+
+ // Figure out the size and type of the elements we will be copying.
+ BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+ BasicType dst_elem = dst_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+ if (src_elem != T_CHAR || dst_elem != T_BYTE) {
+ return false;
+ }
+ Node* src_start = array_element_address(src, src_offset, src_elem);
+ Node* dst_start = array_element_address(dst, dst_offset, dst_elem);
+ // 'src_start' points to src array + scaled offset
+ // 'dst_start' points to dst array + scaled offset
+
+ const TypeAryPtr* mtype = TypeAryPtr::BYTES;
+ Node* enc = new (C) EncodeISOArrayNode(control(), memory(mtype), src_start, dst_start, length);
+ enc = _gvn.transform(enc);
+ Node* res_mem = _gvn.transform(new (C) SCMemProjNode(enc));
+ set_memory(res_mem, mtype);
+ set_result(enc);
+ return true;
+}
+
//----------------------------inline_reference_get----------------------------
// public T java.lang.ref.Reference.get();
bool LibraryCallKit::inline_reference_get() {
--- a/hotspot/src/share/vm/opto/loopTransform.cpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp Tue Jan 22 15:34:16 2013 -0800
@@ -613,6 +613,7 @@
case Op_StrComp:
case Op_StrEquals:
case Op_StrIndexOf:
+ case Op_EncodeISOArray:
case Op_AryEq: {
return false;
}
@@ -717,6 +718,7 @@
case Op_StrComp:
case Op_StrEquals:
case Op_StrIndexOf:
+ case Op_EncodeISOArray:
case Op_AryEq: {
// Do not unroll a loop with String intrinsics code.
// String intrinsics are large and have loops.
--- a/hotspot/src/share/vm/opto/macro.cpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/opto/macro.cpp Tue Jan 22 15:34:16 2013 -0800
@@ -361,14 +361,21 @@
}
// Otherwise skip it (the call updated 'mem' value).
} else if (mem->Opcode() == Op_SCMemProj) {
- assert(mem->in(0)->is_LoadStore(), "sanity");
- const TypePtr* atype = mem->in(0)->in(MemNode::Address)->bottom_type()->is_ptr();
+ mem = mem->in(0);
+ Node* adr = NULL;
+ if (mem->is_LoadStore()) {
+ adr = mem->in(MemNode::Address);
+ } else {
+ assert(mem->Opcode() == Op_EncodeISOArray, "sanity");
+ adr = mem->in(3); // Destination array
+ }
+ const TypePtr* atype = adr->bottom_type()->is_ptr();
int adr_idx = Compile::current()->get_alias_index(atype);
if (adr_idx == alias_idx) {
assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
return NULL;
}
- mem = mem->in(0)->in(MemNode::Memory);
+ mem = mem->in(MemNode::Memory);
} else {
return mem;
}
@@ -445,7 +452,7 @@
}
values.at_put(j, val);
} else if (val->Opcode() == Op_SCMemProj) {
- assert(val->in(0)->is_LoadStore(), "sanity");
+ assert(val->in(0)->is_LoadStore() || val->in(0)->Opcode() == Op_EncodeISOArray, "sanity");
assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
return NULL;
} else {
--- a/hotspot/src/share/vm/opto/matcher.cpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/opto/matcher.cpp Tue Jan 22 15:34:16 2013 -0800
@@ -919,6 +919,7 @@
case Op_AryEq:
case Op_MemBarVolatile:
case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
+ case Op_EncodeISOArray:
nidx = Compile::AliasIdxTop;
nat = NULL;
break;
@@ -1982,6 +1983,7 @@
case Op_StrEquals:
case Op_StrIndexOf:
case Op_AryEq:
+ case Op_EncodeISOArray:
set_shared(n); // Force result into register (it will be anyways)
break;
case Op_ConP: { // Convert pointers above the centerline to NUL
@@ -2183,6 +2185,13 @@
n->del_req(4);
break;
}
+ case Op_EncodeISOArray: {
+ // Restructure into a binary tree for Matching.
+ Node* pair = new (C) BinaryNode(n->in(3), n->in(4));
+ n->set_req(3, pair);
+ n->del_req(4);
+ break;
+ }
default:
break;
}
--- a/hotspot/src/share/vm/opto/memnode.cpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/opto/memnode.cpp Tue Jan 22 15:34:16 2013 -0800
@@ -2797,6 +2797,26 @@
}
//=============================================================================
+//------------------------------match_edge-------------------------------------
+// Do not match memory edge
+uint EncodeISOArrayNode::match_edge(uint idx) const {
+ return idx == 2 || idx == 3; // EncodeISOArray src (Binary dst len)
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node. Strip out
+// control copies
+Node *EncodeISOArrayNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+//------------------------------Value------------------------------------------
+const Type *EncodeISOArrayNode::Value(PhaseTransform *phase) const {
+ if (in(0) && phase->type(in(0)) == Type::TOP) return Type::TOP;
+ return bottom_type();
+}
+
+//=============================================================================
MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
: MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)),
_adr_type(C->get_adr_type(alias_idx))
--- a/hotspot/src/share/vm/opto/memnode.hpp Tue Jan 22 11:31:25 2013 -0800
+++ b/hotspot/src/share/vm/opto/memnode.hpp Tue Jan 22 15:34:16 2013 -0800
@@ -888,6 +888,22 @@
virtual const Type* bottom_type() const { return TypeInt::BOOL; }
};
+
+//------------------------------EncodeISOArray--------------------------------
+// encode char[] to byte[] in ISO_8859_1
+class EncodeISOArrayNode: public Node {
+public:
+ EncodeISOArrayNode(Node *control, Node* arymem, Node* s1, Node* s2, Node* c): Node(control, arymem, s1, s2, c) {};
+ virtual int Opcode() const;
+ virtual bool depends_only_on_test() const { return false; }
+ virtual const Type* bottom_type() const { return TypeInt::INT; }
+ virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
+ virtual uint match_edge(uint idx) const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value(PhaseTransform *phase) const;
+};
+
//------------------------------MemBar-----------------------------------------
// There are different flavors of Memory Barriers to match the Java Memory
// Model. Monitor-enter and volatile-load act as Aquires: no following ref
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/6896617/Test6896617.java Tue Jan 22 15:34:16 2013 -0800
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6896617
+ * @summary Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() with SSE instructions on x86
+ * @run main/othervm/timeout=1200 -Xbatch -Xmx256m Test6896617
+ *
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class Test6896617 {
+ final static int SIZE = 256;
+
+ public static void main(String[] args) {
+ String csn = "ISO-8859-1";
+ Charset cs = Charset.forName(csn);
+ CharsetEncoder enc = cs.newEncoder();
+ enc.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE);
+ CharsetDecoder dec = cs.newDecoder();
+ dec.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE);
+
+ byte repl = (byte)'?';
+ enc.replaceWith(new byte[] { repl });
+
+ // Use internal API for tests.
+ sun.nio.cs.ArrayEncoder arrenc = (sun.nio.cs.ArrayEncoder)enc;
+ sun.nio.cs.ArrayDecoder arrdec = (sun.nio.cs.ArrayDecoder)dec;
+
+ // Populate char[] with chars which can be encoded by ISO_8859_1 (<= 0xFF)
+ Random rnd = new Random(0);
+ int maxchar = 0xFF;
+ char[] a = new char[SIZE];
+ byte[] b = new byte[SIZE];
+ char[] at = new char[SIZE];
+ byte[] bt = new byte[SIZE];
+ for (int i = 0; i < SIZE; i++) {
+ char c = (char) rnd.nextInt(maxchar);
+ if (!enc.canEncode(c)) {
+ System.out.printf("Something wrong: can't encode c=%03x\n", (int)c);
+ System.exit(97);
+ }
+ a[i] = c;
+ b[i] = (byte)c;
+ at[i] = (char)-1;
+ bt[i] = (byte)-1;
+ }
+ if (arrenc.encode(a, 0, SIZE, bt) != SIZE || !Arrays.equals(b, bt)) {
+ System.out.println("Something wrong: ArrayEncoder.encode failed");
+ System.exit(97);
+ }
+ if (arrdec.decode(b, 0, SIZE, at) != SIZE || !Arrays.equals(a, at)) {
+ System.out.println("Something wrong: ArrayDecoder.decode failed");
+ System.exit(97);
+ }
+ for (int i = 0; i < SIZE; i++) {
+ at[i] = (char)-1;
+ bt[i] = (byte)-1;
+ }
+
+ ByteBuffer bb = ByteBuffer.wrap(b);
+ CharBuffer ba = CharBuffer.wrap(a);
+ ByteBuffer bbt = ByteBuffer.wrap(bt);
+ CharBuffer bat = CharBuffer.wrap(at);
+ if (!enc.encode(ba, bbt, true).isUnderflow() || !Arrays.equals(b, bt)) {
+ System.out.println("Something wrong: Encoder.encode failed");
+ System.exit(97);
+ }
+ if (!dec.decode(bb, bat, true).isUnderflow() || !Arrays.equals(a, at)) {
+ System.out.println("Something wrong: Decoder.decode failed");
+ System.exit(97);
+ }
+ for (int i = 0; i < SIZE; i++) {
+ at[i] = (char)-1;
+ bt[i] = (byte)-1;
+ }
+
+ // Warm up
+ boolean failed = false;
+ int result = 0;
+ for (int i = 0; i < 10000; i++) {
+ result += arrenc.encode(a, 0, SIZE, bt);
+ result -= arrdec.decode(b, 0, SIZE, at);
+ }
+ for (int i = 0; i < 10000; i++) {
+ result += arrenc.encode(a, 0, SIZE, bt);
+ result -= arrdec.decode(b, 0, SIZE, at);
+ }
+ for (int i = 0; i < 10000; i++) {
+ result += arrenc.encode(a, 0, SIZE, bt);
+ result -= arrdec.decode(b, 0, SIZE, at);
+ }
+ if (result != 0 || !Arrays.equals(b, bt) || !Arrays.equals(a, at)) {
+ failed = true;
+ System.out.println("Failed: ArrayEncoder.encode char[" + SIZE + "] and ArrayDecoder.decode byte[" + SIZE + "]");
+ }
+ for (int i = 0; i < SIZE; i++) {
+ at[i] = (char)-1;
+ bt[i] = (byte)-1;
+ }
+
+ boolean is_underflow = true;
+ for (int i = 0; i < 10000; i++) {
+ ba.clear(); bb.clear(); bat.clear(); bbt.clear();
+ boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
+ boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
+ is_underflow = is_underflow && enc_res && dec_res;
+ }
+ for (int i = 0; i < SIZE; i++) {
+ at[i] = (char)-1;
+ bt[i] = (byte)-1;
+ }
+ for (int i = 0; i < 10000; i++) {
+ ba.clear(); bb.clear(); bat.clear(); bbt.clear();
+ boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
+ boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
+ is_underflow = is_underflow && enc_res && dec_res;
+ }
+ for (int i = 0; i < SIZE; i++) {
+ at[i] = (char)-1;
+ bt[i] = (byte)-1;
+ }
+ for (int i = 0; i < 10000; i++) {
+ ba.clear(); bb.clear(); bat.clear(); bbt.clear();
+ boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
+ boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
+ is_underflow = is_underflow && enc_res && dec_res;
+ }
+ if (!is_underflow || !Arrays.equals(b, bt) || !Arrays.equals(a, at)) {
+ failed = true;
+ System.out.println("Failed: Encoder.encode char[" + SIZE + "] and Decoder.decode byte[" + SIZE + "]");
+ }
+
+ // Test encoder with different source and destination sizes
+ System.out.println("Testing different source and destination sizes");
+ for (int i = 1; i <= SIZE; i++) {
+ for (int j = 1; j <= SIZE; j++) {
+ bt = new byte[j];
+ // very source's SIZE
+ result = arrenc.encode(a, 0, i, bt);
+ int l = Math.min(i, j);
+ if (result != l) {
+ failed = true;
+ System.out.println("Failed: encode char[" + i + "] to byte[" + j + "]: result = " + result + ", expected " + l);
+ }
+ for (int k = 0; k < l; k++) {
+ if (bt[k] != b[k]) {
+ failed = true;
+ System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[k]);
+ }
+ }
+ // very source's offset
+ int sz = SIZE - i + 1;
+ result = arrenc.encode(a, i-1, sz, bt);
+ l = Math.min(sz, j);
+ if (result != l) {
+ failed = true;
+ System.out.println("Failed: encode char[" + sz + "] to byte[" + j + "]: result = " + result + ", expected " + l);
+ }
+ for (int k = 0; k < l; k++) {
+ if (bt[k] != b[i+k-1]) {
+ failed = true;
+ System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[i+k-1]);
+ }
+ }
+ }
+ }
+
+ // Test encoder with char > 0xFF
+ System.out.println("Testing big char");
+
+ byte orig = (byte)'A';
+ bt = new byte[SIZE];
+ for (int i = 1; i <= SIZE; i++) {
+ for (int j = 0; j < i; j++) {
+ a[j] += 0x100;
+ // make sure to replace a different byte
+ bt[j] = orig;
+ result = arrenc.encode(a, 0, i, bt);
+ if (result != i) {
+ failed = true;
+ System.out.println("Failed: encode char[" + i + "] to byte[" + i + "]: result = " + result + ", expected " + i);
+ }
+ if (bt[j] != repl) {
+ failed = true;
+ System.out.println("Failed: encoded replace byte[" + j + "] (" + bt[j] + ") != " + repl);
+ }
+ bt[j] = b[j]; // Restore to compare whole array
+ for (int k = 0; k < i; k++) {
+ if (bt[k] != b[k]) {
+ failed = true;
+ System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[k]);
+ }
+ }
+ a[j] -= 0x100; // Restore
+ }
+ }
+
+ // Test sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() performance.
+
+ int itrs = Integer.getInteger("iterations", 1000000);
+ int size = Integer.getInteger("size", 256);
+ a = new char[size];
+ b = new byte[size];
+ bt = new byte[size];
+ for (int i = 0; i < size; i++) {
+ char c = (char) rnd.nextInt(maxchar);
+ if (!enc.canEncode(c)) {
+ System.out.printf("Something wrong: can't encode c=%03x\n", (int)c);
+ System.exit(97);
+ }
+ a[i] = c;
+ b[i] = (byte)-1;
+ bt[i] = (byte)c;
+ }
+ ba = CharBuffer.wrap(a);
+ bb = ByteBuffer.wrap(b);
+ boolean enc_res = enc.encode(ba, bb, true).isUnderflow();
+ if (!enc_res || !Arrays.equals(b, bt)) {
+ failed = true;
+ System.out.println("Failed 1: Encoder.encode char[" + size + "]");
+ }
+ for (int i = 0; i < size; i++) {
+ b[i] = (byte)-1;
+ }
+
+ // Make sure to recompile method if needed before performance run.
+ for (int i = 0; i < 10000; i++) {
+ ba.clear(); bb.clear();
+ enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow();
+ }
+ for (int i = 0; i < size; i++) {
+ b[i] = (byte)-1;
+ }
+ for (int i = 0; i < 10000; i++) {
+ ba.clear(); bb.clear();
+ enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow();
+ }
+ if (!enc_res || !Arrays.equals(b, bt)) {
+ failed = true;
+ System.out.println("Failed 2: Encoder.encode char[" + size + "]");
+ }
+ for (int i = 0; i < size; i++) {
+ b[i] = (byte)-1;
+ }
+
+ System.out.println("Testing ISO_8859_1$Encode.encodeArrayLoop() performance");
+ long start = System.currentTimeMillis();
+ for (int i = 0; i < itrs; i++) {
+ ba.clear(); bb.clear();
+ enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow();
+ }
+ long end = System.currentTimeMillis();
+ if (!enc_res || !Arrays.equals(b, bt)) {
+ failed = true;
+ System.out.println("Failed 3: Encoder.encode char[" + size + "]");
+ } else {
+ System.out.println("size: " + size + " time: " + (end - start));
+ }
+
+ // Test sun.nio.cs.ISO_8859_1$Encode.encode() performance.
+
+ // Make sure to recompile method if needed before performance run.
+ result = 0;
+ for (int i = 0; i < size; i++) {
+ b[i] = (byte)-1;
+ }
+ for (int i = 0; i < 10000; i++) {
+ result += arrenc.encode(a, 0, size, b);
+ }
+ for (int i = 0; i < size; i++) {
+ b[i] = (byte)-1;
+ }
+ for (int i = 0; i < 10000; i++) {
+ result += arrenc.encode(a, 0, size, b);
+ }
+ if (result != size*20000 || !Arrays.equals(b, bt)) {
+ failed = true;
+ System.out.println("Failed 1: ArrayEncoder.encode char[" + SIZE + "]");
+ }
+ for (int i = 0; i < size; i++) {
+ b[i] = (byte)-1;
+ }
+
+ System.out.println("Testing ISO_8859_1$Encode.encode() performance");
+ result = 0;
+ start = System.currentTimeMillis();
+ for (int i = 0; i < itrs; i++) {
+ result += arrenc.encode(a, 0, size, b);
+ }
+ end = System.currentTimeMillis();
+ if (!Arrays.equals(b, bt)) {
+ failed = true;
+ System.out.println("Failed 2: ArrayEncoder.encode char[" + size + "]");
+ } else {
+ System.out.println("size: " + size + " time: " + (end - start));
+ }
+
+ if (failed) {
+ System.out.println("FAILED");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+}