# HG changeset patch # User zmajo # Date 1448288179 -3600 # Node ID abe570308c14cc980aa5ef426bd51721bf238c47 # Parent ac3017b4c33691a0bb836e4485ed5c1060128528# Parent a5f1c458b56e246edad8ba3dbf20d6a640b9ceeb Merge diff -r ac3017b4c336 -r abe570308c14 hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Mon Nov 23 15:09:45 2015 +0100 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Mon Nov 23 15:16:19 2015 +0100 @@ -7387,7 +7387,8 @@ XMMRegister vec, Register tmp, int ae) { ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); + assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); // This method uses the pcmpestri instruction with bound registers @@ -7565,7 +7566,8 @@ XMMRegister vec, Register tmp, int ae) { ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); + assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); // @@ -7882,7 +7884,8 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result, XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); + assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); int stride = 8; @@ -7919,36 +7922,32 @@ pshufd(vec1, vec1, 0); pxor(vec2, vec2); } - if (UseAVX >= 2 || UseSSE42Intrinsics) { - bind(SCAN_TO_8_CHAR); - cmpl(cnt1, stride); - if (UseAVX >= 2) { - jccb(Assembler::less, SCAN_TO_CHAR); - } - if (!(UseAVX >= 2)) { - jccb(Assembler::less, SCAN_TO_CHAR_LOOP); - movdl(vec1, ch); - pshuflw(vec1, vec1, 0x00); - pshufd(vec1, vec1, 0); - pxor(vec2, vec2); - } - movl(tmp, cnt1); - andl(tmp, 0xFFFFFFF8); //vector count (in chars) - andl(cnt1,0x00000007); //tail count (in chars) - - bind(SCAN_TO_8_CHAR_LOOP); - movdqu(vec3, Address(result, 0)); - pcmpeqw(vec3, vec1); - ptest(vec2, vec3); - jcc(Assembler::carryClear, FOUND_CHAR); - addptr(result, 16); - subl(tmp, stride); - jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP); - } + bind(SCAN_TO_8_CHAR); + cmpl(cnt1, stride); + if (UseAVX >= 2) { + jccb(Assembler::less, SCAN_TO_CHAR); + } else { + jccb(Assembler::less, SCAN_TO_CHAR_LOOP); + movdl(vec1, ch); + pshuflw(vec1, vec1, 0x00); + pshufd(vec1, vec1, 0); + pxor(vec2, vec2); + } + movl(tmp, cnt1); + andl(tmp, 0xFFFFFFF8); //vector count (in chars) + andl(cnt1,0x00000007); //tail count (in chars) + + bind(SCAN_TO_8_CHAR_LOOP); + movdqu(vec3, Address(result, 0)); + pcmpeqw(vec3, vec1); + ptest(vec2, vec3); + jcc(Assembler::carryClear, FOUND_CHAR); + addptr(result, 16); + subl(tmp, stride); + jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP); bind(SCAN_TO_CHAR); testl(cnt1, cnt1); jcc(Assembler::zero, RET_NOT_FOUND); - bind(SCAN_TO_CHAR_LOOP); load_unsigned_short(tmp, Address(result, 0)); cmpl(ch, tmp); @@ -7962,16 +7961,14 @@ movl(result, -1); jmpb(DONE_LABEL); - if (UseAVX >= 2 || UseSSE42Intrinsics) { - bind(FOUND_CHAR); - if (UseAVX >= 2) { - vpmovmskb(tmp, vec3); - } else { - pmovmskb(tmp, vec3); - } - bsfl(ch, tmp); - addl(result, ch); - } + bind(FOUND_CHAR); + if (UseAVX >= 2) { + vpmovmskb(tmp, vec3); + } else { + pmovmskb(tmp, vec3); + } + bsfl(ch, tmp); + addl(result, ch); bind(FOUND_SEQ_CHAR); subptr(result, str1); @@ -8060,6 +8057,7 @@ } if (UseAVX >= 2 && UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; Label COMPARE_TAIL_LONG; @@ -8195,6 +8193,7 @@ bind(COMPARE_SMALL_STR); } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; int pcmpmask = 0x19; // Setup to compare 8-char (16-byte) vectors, @@ -8327,7 +8326,7 @@ movl(result, len); // copy - if (UseAVX >= 2) { + if (UseAVX >= 2 && UseSSE >= 2) { // With AVX2, use 32-byte vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -8362,6 +8361,7 @@ movl(len, result); // Fallthru to tail compare } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be for SSE4.2 intrinsics to be available"); // With SSE4.2, use double quad vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -8438,7 +8438,7 @@ // That's it bind(DONE); - if (UseAVX >= 2) { + if (UseAVX >= 2 && UseSSE >= 2) { // clean upper bits of YMM registers vpxor(vec1, vec1); vpxor(vec2, vec2); @@ -8526,6 +8526,7 @@ movl(limit, result); // Fallthru to tail compare } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); // With SSE4.2, use double quad vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -8875,6 +8876,7 @@ negptr(len); if (UseSSE42Intrinsics || UseAVX >= 2) { + assert(UseSSE42Intrinsics ? UseSSE >= 4 : true, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit; Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; @@ -10647,6 +10649,7 @@ push(len); if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label copy_32_loop, copy_16, copy_tail; movl(result, len); @@ -10746,6 +10749,7 @@ assert_different_registers(src, dst, len, tmp2); if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label copy_8_loop, copy_bytes, copy_tail; movl(tmp2, len); diff -r ac3017b4c336 -r abe570308c14 hotspot/src/cpu/x86/vm/vm_version_x86.cpp --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Mon Nov 23 15:09:45 2015 +0100 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Mon Nov 23 15:16:19 2015 +0100 @@ -930,10 +930,15 @@ UseXmmI2D = false; } } - if( FLAG_IS_DEFAULT(UseSSE42Intrinsics) ) { - if( supports_sse4_2() && UseSSE >= 4 ) { - UseSSE42Intrinsics = true; + if (supports_sse4_2() && UseSSE >= 4) { + if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { + FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); } + } else { + if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); + } + FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); } // some defaults for AMD family 15h @@ -1007,8 +1012,13 @@ } if (supports_sse4_2() && UseSSE >= 4) { if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { - UseSSE42Intrinsics = true; + FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); } + } else { + if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); + } + FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); } } if ((cpu_family() == 0x06) &&