--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Apr 02 09:30:07 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Apr 03 11:12:57 2013 -0700
@@ -4765,6 +4765,31 @@
pop_CPU_state();
}
+void MacroAssembler::restore_cpu_control_state_after_jni() {
+ // Either restore the MXCSR register after returning from the JNI Call
+ // or verify that it wasn't changed (with -Xcheck:jni flag).
+ if (VM_Version::supports_sse()) {
+ if (RestoreMXCSROnJNICalls) {
+ ldmxcsr(ExternalAddress(StubRoutines::addr_mxcsr_std()));
+ } else if (CheckJNICalls) {
+ call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry()));
+ }
+ }
+ if (VM_Version::supports_avx()) {
+ // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty.
+ vzeroupper();
+ }
+
+#ifndef _LP64
+ // Either restore the x87 floating pointer control word after returning
+ // from the JNI call or verify that it wasn't changed.
+ if (CheckJNICalls) {
+ call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry()));
+ }
+#endif // _LP64
+}
+
+
void MacroAssembler::load_klass(Register dst, Register src) {
#ifdef _LP64
if (UseCompressedKlassPointers) {
@@ -5759,6 +5784,8 @@
addptr(result, stride2);
subl(cnt2, stride2);
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
+ // clean upper bits of YMM registers
+ vzeroupper();
// compare wide vectors tail
bind(COMPARE_WIDE_TAIL);
@@ -5772,6 +5799,8 @@
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
bind(VECTOR_NOT_EQUAL);
+ // clean upper bits of YMM registers
+ vzeroupper();
lea(str1, Address(str1, result, scale));
lea(str2, Address(str2, result, scale));
jmp(COMPARE_16_CHARS);
@@ -6028,6 +6057,10 @@
// That's it
bind(DONE);
+ if (UseAVX >= 2) {
+ // clean upper bits of YMM registers
+ vzeroupper();
+ }
}
void MacroAssembler::generate_fill(BasicType t, bool aligned,
@@ -6157,6 +6190,10 @@
vmovdqu(Address(to, 0), xtmp);
addptr(to, 32);
subl(count, 8 << shift);
+
+ BIND(L_check_fill_8_bytes);
+ // clean upper bits of YMM registers
+ vzeroupper();
} else {
// Fill 32-byte chunks
pshufd(xtmp, xtmp, 0);
@@ -6180,8 +6217,9 @@
addptr(to, 32);
subl(count, 8 << shift);
jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
+
+ BIND(L_check_fill_8_bytes);
}
- BIND(L_check_fill_8_bytes);
addl(count, 8 << shift);
jccb(Assembler::zero, L_exit);
jmpb(L_fill_8_bytes);
@@ -6316,6 +6354,10 @@
jccb(Assembler::lessEqual, L_copy_16_chars);
bind(L_copy_16_chars_exit);
+ if (UseAVX >= 2) {
+ // clean upper bits of YMM registers
+ vzeroupper();
+ }
subptr(len, 8);
jccb(Assembler::greater, L_copy_8_chars_exit);