hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
changeset 16624 9dbd4b210bf9
parent 16368 713209c45a82
child 18507 61bfc8995bb3
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Apr 02 09:30:07 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Apr 03 11:12:57 2013 -0700
@@ -4765,6 +4765,31 @@
   pop_CPU_state();
 }
 
+void MacroAssembler::restore_cpu_control_state_after_jni() {
+  // Either restore the MXCSR register after returning from the JNI Call
+  // or verify that it wasn't changed (with -Xcheck:jni flag).
+  if (VM_Version::supports_sse()) {
+    if (RestoreMXCSROnJNICalls) {
+      ldmxcsr(ExternalAddress(StubRoutines::addr_mxcsr_std()));
+    } else if (CheckJNICalls) {
+      call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry()));
+    }
+  }
+  if (VM_Version::supports_avx()) {
+    // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty.
+    vzeroupper();
+  }
+
+#ifndef _LP64
+  // Either restore the x87 floating pointer control word after returning
+  // from the JNI call or verify that it wasn't changed.
+  if (CheckJNICalls) {
+    call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry()));
+  }
+#endif // _LP64
+}
+
+
 void MacroAssembler::load_klass(Register dst, Register src) {
 #ifdef _LP64
   if (UseCompressedKlassPointers) {
@@ -5759,6 +5784,8 @@
     addptr(result, stride2);
     subl(cnt2, stride2);
     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
+    // clean upper bits of YMM registers
+    vzeroupper();
 
     // compare wide vectors tail
     bind(COMPARE_WIDE_TAIL);
@@ -5772,6 +5799,8 @@
 
     // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
     bind(VECTOR_NOT_EQUAL);
+    // clean upper bits of YMM registers
+    vzeroupper();
     lea(str1, Address(str1, result, scale));
     lea(str2, Address(str2, result, scale));
     jmp(COMPARE_16_CHARS);
@@ -6028,6 +6057,10 @@
 
   // That's it
   bind(DONE);
+  if (UseAVX >= 2) {
+    // clean upper bits of YMM registers
+    vzeroupper();
+  }
 }
 
 void MacroAssembler::generate_fill(BasicType t, bool aligned,
@@ -6157,6 +6190,10 @@
         vmovdqu(Address(to, 0), xtmp);
         addptr(to, 32);
         subl(count, 8 << shift);
+
+        BIND(L_check_fill_8_bytes);
+        // clean upper bits of YMM registers
+        vzeroupper();
       } else {
         // Fill 32-byte chunks
         pshufd(xtmp, xtmp, 0);
@@ -6180,8 +6217,9 @@
         addptr(to, 32);
         subl(count, 8 << shift);
         jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
+
+        BIND(L_check_fill_8_bytes);
       }
-      BIND(L_check_fill_8_bytes);
       addl(count, 8 << shift);
       jccb(Assembler::zero, L_exit);
       jmpb(L_fill_8_bytes);
@@ -6316,6 +6354,10 @@
     jccb(Assembler::lessEqual, L_copy_16_chars);
 
     bind(L_copy_16_chars_exit);
+    if (UseAVX >= 2) {
+      // clean upper bits of YMM registers
+      vzeroupper();
+    }
     subptr(len, 8);
     jccb(Assembler::greater, L_copy_8_chars_exit);