8076373: In 32-bit VM interpreter and compiled code process NaN values differently
Summary: Change interpreter to use XMM registers on x86_32 if they are available. Add stubs for methods transforming from/to int/long float/double.
Reviewed-by: kvn, mcberg
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Aug 19 08:55:18 2015 +0200
@@ -1674,6 +1674,13 @@
emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, true);
}
+void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true);
+ emit_int8(0x2A);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
@@ -6604,13 +6611,6 @@
emit_operand(dst, src);
}
-void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
- NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true);
- emit_int8(0x2A);
- emit_int8((unsigned char)(0xC0 | encode));
-}
-
void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
if (VM_Version::supports_evex()) {
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86.cpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86.cpp Wed Aug 19 08:55:18 2015 +0200
@@ -355,8 +355,8 @@
case ctos: // fall through
case stos: // fall through
case itos: movl(rax, val_addr); break;
- case ftos: movflt(xmm0, val_addr); break;
- case dtos: movdbl(xmm0, val_addr); break;
+ case ftos: load_float(val_addr); break;
+ case dtos: load_double(val_addr); break;
case vtos: /* nothing to do */ break;
default : ShouldNotReachHere();
}
@@ -376,8 +376,8 @@
case ctos: // fall through
case stos: // fall through
case itos: movl(rax, val_addr); break;
- case ftos: fld_s(val_addr); break;
- case dtos: fld_d(val_addr); break;
+ case ftos: load_float(val_addr); break;
+ case dtos: load_double(val_addr); break;
case vtos: /* nothing to do */ break;
default : ShouldNotReachHere();
}
@@ -578,6 +578,26 @@
push(r);
}
+void InterpreterMacroAssembler::push_f(XMMRegister r) {
+ subptr(rsp, wordSize);
+ movflt(Address(rsp, 0), r);
+}
+
+void InterpreterMacroAssembler::pop_f(XMMRegister r) {
+ movflt(r, Address(rsp, 0));
+ addptr(rsp, wordSize);
+}
+
+void InterpreterMacroAssembler::push_d(XMMRegister r) {
+ subptr(rsp, 2 * wordSize);
+ movdbl(Address(rsp, 0), r);
+}
+
+void InterpreterMacroAssembler::pop_d(XMMRegister r) {
+ movdbl(r, Address(rsp, 0));
+ addptr(rsp, 2 * Interpreter::stackElementSize);
+}
+
#ifdef _LP64
void InterpreterMacroAssembler::pop_i(Register r) {
// XXX can't use pop currently, upper half non clean
@@ -590,31 +610,11 @@
addptr(rsp, 2 * Interpreter::stackElementSize);
}
-void InterpreterMacroAssembler::pop_f(XMMRegister r) {
- movflt(r, Address(rsp, 0));
- addptr(rsp, wordSize);
-}
-
-void InterpreterMacroAssembler::pop_d(XMMRegister r) {
- movdbl(r, Address(rsp, 0));
- addptr(rsp, 2 * Interpreter::stackElementSize);
-}
-
void InterpreterMacroAssembler::push_l(Register r) {
subptr(rsp, 2 * wordSize);
movq(Address(rsp, 0), r);
}
-void InterpreterMacroAssembler::push_f(XMMRegister r) {
- subptr(rsp, wordSize);
- movflt(Address(rsp, 0), r);
-}
-
-void InterpreterMacroAssembler::push_d(XMMRegister r) {
- subptr(rsp, 2 * wordSize);
- movdbl(Address(rsp, 0), r);
-}
-
void InterpreterMacroAssembler::pop(TosState state) {
switch (state) {
case atos: pop_ptr(); break;
@@ -623,8 +623,8 @@
case stos:
case itos: pop_i(); break;
case ltos: pop_l(); break;
- case ftos: pop_f(); break;
- case dtos: pop_d(); break;
+ case ftos: pop_f(xmm0); break;
+ case dtos: pop_d(xmm0); break;
case vtos: /* nothing to do */ break;
default: ShouldNotReachHere();
}
@@ -640,8 +640,8 @@
case stos:
case itos: push_i(); break;
case ltos: push_l(); break;
- case ftos: push_f(); break;
- case dtos: push_d(); break;
+ case ftos: push_f(xmm0); break;
+ case dtos: push_d(xmm0); break;
case vtos: /* nothing to do */ break;
default : ShouldNotReachHere();
}
@@ -675,8 +675,20 @@
case stos: // fall through
case itos: pop_i(rax); break;
case ltos: pop_l(rax, rdx); break;
- case ftos: pop_f(); break;
- case dtos: pop_d(); break;
+ case ftos:
+ if (UseSSE >= 1) {
+ pop_f(xmm0);
+ } else {
+ pop_f();
+ }
+ break;
+ case dtos:
+ if (UseSSE >= 2) {
+ pop_d(xmm0);
+ } else {
+ pop_d();
+ }
+ break;
case vtos: /* nothing to do */ break;
default : ShouldNotReachHere();
}
@@ -695,7 +707,7 @@
fstp_s(Address(rsp, 0));
}
-void InterpreterMacroAssembler::push_d(Register r) {
+void InterpreterMacroAssembler::push_d() {
// Do not schedule for no AGI! Never write beyond rsp!
subptr(rsp, 2 * wordSize);
fstp_d(Address(rsp, 0));
@@ -711,8 +723,20 @@
case stos: // fall through
case itos: push_i(rax); break;
case ltos: push_l(rax, rdx); break;
- case ftos: push_f(); break;
- case dtos: push_d(rax); break;
+ case ftos:
+ if (UseSSE >= 1) {
+ push_f(xmm0);
+ } else {
+ push_f();
+ }
+ break;
+ case dtos:
+ if (UseSSE >= 2) {
+ push_d(xmm0);
+ } else {
+ push_d();
+ }
+ break;
case vtos: /* nothing to do */ break;
default : ShouldNotReachHere();
}
@@ -995,22 +1019,6 @@
leave(); // remove frame anchor
pop(ret_addr); // get return address
mov(rsp, rbx); // set sp to sender sp
-#ifndef _LP64
- if (UseSSE) {
- // float and double are returned in xmm register in SSE-mode
- if (state == ftos && UseSSE >= 1) {
- subptr(rsp, wordSize);
- fstp_s(Address(rsp, 0));
- movflt(xmm0, Address(rsp, 0));
- addptr(rsp, wordSize);
- } else if (state == dtos && UseSSE >= 2) {
- subptr(rsp, 2*wordSize);
- fstp_d(Address(rsp, 0));
- movdbl(xmm0, Address(rsp, 0));
- addptr(rsp, 2*wordSize);
- }
- }
-#endif // _LP64
}
#endif // !CC_INTERP
@@ -1783,7 +1791,10 @@
void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
#ifndef _LP64
- if (state == ftos || state == dtos) MacroAssembler::verify_FPU(stack_depth);
+ if ((state == ftos && UseSSE < 1) ||
+ (state == dtos && UseSSE < 2)) {
+ MacroAssembler::verify_FPU(stack_depth);
+ }
#endif
}
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp Wed Aug 19 08:55:18 2015 +0200
@@ -140,20 +140,20 @@
void push_ptr(Register r = rax);
void push_i(Register r = rax);
+ void push_f(XMMRegister r);
+ void pop_f(XMMRegister r);
+ void pop_d(XMMRegister r);
+ void push_d(XMMRegister r);
#ifdef _LP64
void pop_l(Register r = rax);
- void pop_f(XMMRegister r = xmm0);
- void pop_d(XMMRegister r = xmm0);
void push_l(Register r = rax);
- void push_f(XMMRegister r = xmm0);
- void push_d(XMMRegister r = xmm0);
#else
void pop_l(Register lo = rax, Register hi = rdx);
void pop_f();
void pop_d();
void push_l(Register lo = rax, Register hi = rdx);
- void push_d(Register r = rax);
+ void push_d();
void push_f();
#endif // _LP64
--- a/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp Wed Aug 19 08:55:18 2015 +0200
@@ -42,6 +42,12 @@
address generate_Reference_get_entry();
address generate_CRC32_update_entry();
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+#ifndef _LP64
+ address generate_Float_intBitsToFloat_entry();
+ address generate_Float_floatToRawIntBits_entry();
+ address generate_Double_longBitsToDouble_entry();
+ address generate_Double_doubleToRawLongBits_entry();
+#endif
void lock_method(void);
void generate_stack_overflow_check(void);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Aug 19 08:55:18 2015 +0200
@@ -3314,6 +3314,42 @@
fincstp();
}
+void MacroAssembler::load_float(Address src) {
+ if (UseSSE >= 1) {
+ movflt(xmm0, src);
+ } else {
+ LP64_ONLY(ShouldNotReachHere());
+ NOT_LP64(fld_s(src));
+ }
+}
+
+void MacroAssembler::store_float(Address dst) {
+ if (UseSSE >= 1) {
+ movflt(dst, xmm0);
+ } else {
+ LP64_ONLY(ShouldNotReachHere());
+ NOT_LP64(fstp_s(dst));
+ }
+}
+
+void MacroAssembler::load_double(Address src) {
+ if (UseSSE >= 2) {
+ movdbl(xmm0, src);
+ } else {
+ LP64_ONLY(ShouldNotReachHere());
+ NOT_LP64(fld_d(src));
+ }
+}
+
+void MacroAssembler::store_double(Address dst) {
+ if (UseSSE >= 2) {
+ movdbl(dst, xmm0);
+ } else {
+ LP64_ONLY(ShouldNotReachHere());
+ NOT_LP64(fstp_d(dst));
+ }
+}
+
void MacroAssembler::fremr(Register tmp) {
save_rax(tmp);
{ Label L;
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Wed Aug 19 08:55:18 2015 +0200
@@ -471,6 +471,22 @@
// Pop ST (ffree & fincstp combined)
void fpop();
+ // Load float value from 'address'. If UseSSE >= 1, the value is loaded into
+ // register xmm0. Otherwise, the value is loaded onto the FPU stack.
+ void load_float(Address src);
+
+ // Store float value to 'address'. If UseSSE >= 1, the value is stored
+ // from register xmm0. Otherwise, the value is stored from the FPU stack.
+ void store_float(Address dst);
+
+ // Load double value from 'address'. If UseSSE >= 2, the value is loaded into
+ // register xmm0. Otherwise, the value is loaded onto the FPU stack.
+ void load_double(Address src);
+
+ // Store double value to 'address'. If UseSSE >= 2, the value is stored
+ // from register xmm0. Otherwise, the value is stored from the FPU stack.
+ void store_double(Address dst);
+
// pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
void push_fTOS();
--- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Wed Aug 19 08:55:18 2015 +0200
@@ -170,22 +170,12 @@
__ MacroAssembler::verify_FPU(0, "generate_return_entry_for compiled");
}
- // In SSE mode, interpreter returns FP results in xmm0 but they need
- // to end up back on the FPU so it can operate on them.
- if (state == ftos && UseSSE >= 1) {
- __ subptr(rsp, wordSize);
- __ movflt(Address(rsp, 0), xmm0);
- __ fld_s(Address(rsp, 0));
- __ addptr(rsp, wordSize);
- } else if (state == dtos && UseSSE >= 2) {
- __ subptr(rsp, 2*wordSize);
- __ movdbl(Address(rsp, 0), xmm0);
- __ fld_d(Address(rsp, 0));
- __ addptr(rsp, 2*wordSize);
+ if (state == ftos) {
+ __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_return_entry_for in interpreter");
+ } else if (state == dtos) {
+ __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_return_entry_for in interpreter");
}
- __ MacroAssembler::verify_FPU(state == ftos || state == dtos ? 1 : 0, "generate_return_entry_for in interpreter");
-
// Restore stack bottom in case i2c adjusted stack
__ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize));
// and NULL it as marker that rsp is now tos until next java call
@@ -217,21 +207,12 @@
address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) {
address entry = __ pc();
- // In SSE mode, FP results are in xmm0
- if (state == ftos && UseSSE > 0) {
- __ subptr(rsp, wordSize);
- __ movflt(Address(rsp, 0), xmm0);
- __ fld_s(Address(rsp, 0));
- __ addptr(rsp, wordSize);
- } else if (state == dtos && UseSSE >= 2) {
- __ subptr(rsp, 2*wordSize);
- __ movdbl(Address(rsp, 0), xmm0);
- __ fld_d(Address(rsp, 0));
- __ addptr(rsp, 2*wordSize);
+ if (state == ftos) {
+ __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_deopt_entry_for in interpreter");
+ } else if (state == dtos) {
+ __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_deopt_entry_for in interpreter");
}
- __ MacroAssembler::verify_FPU(state == ftos || state == dtos ? 1 : 0, "generate_deopt_entry_for in interpreter");
-
// The stack is not extended by deopt but we must NULL last_sp as this
// entry is like a "return".
__ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
@@ -735,7 +716,7 @@
if (UseCRC32Intrinsics) {
address entry = __ pc();
- // rbx,: Method*
+ // rbx: Method*
// rsi: senderSP must preserved for slow path, set SP to it on fast path
// rdx: scratch
// rdi: scratch
@@ -841,6 +822,124 @@
return generate_native_entry(false);
}
+/**
+ * Method entry for static native method:
+ * java.lang.Float.intBitsToFloat(int bits)
+ */
+address InterpreterGenerator::generate_Float_intBitsToFloat_entry() {
+ address entry;
+
+ if (UseSSE >= 1) {
+ entry = __ pc();
+
+ // rsi: the sender's SP
+
+ // Skip safepoint check (compiler intrinsic versions of this method
+ // do not perform safepoint checks either).
+
+ // Load 'bits' into xmm0 (interpreter returns results in xmm0)
+ __ movflt(xmm0, Address(rsp, wordSize));
+
+ // Return
+ __ pop(rdi); // get return address
+ __ mov(rsp, rsi); // set rsp to the sender's SP
+ __ jmp(rdi);
+ } else {
+ entry = generate_native_entry(false);
+ }
+
+ return entry;
+}
+
+/**
+ * Method entry for static native method:
+ * java.lang.Float.floatToRawIntBits(float value)
+ */
+address InterpreterGenerator::generate_Float_floatToRawIntBits_entry() {
+ address entry;
+
+ if (UseSSE >= 1) {
+ entry = __ pc();
+
+ // rsi: the sender's SP
+
+ // Skip safepoint check (compiler intrinsic versions of this method
+ // do not perform safepoint checks either).
+
+ // Load the parameter (a floating-point value) into rax.
+ __ movl(rax, Address(rsp, wordSize));
+
+ // Return
+ __ pop(rdi); // get return address
+ __ mov(rsp, rsi); // set rsp to the sender's SP
+ __ jmp(rdi);
+ } else {
+ entry = generate_native_entry(false);
+ }
+
+ return entry;
+}
+
+
+/**
+ * Method entry for static native method:
+ * java.lang.Double.longBitsToDouble(long bits)
+ */
+address InterpreterGenerator::generate_Double_longBitsToDouble_entry() {
+ address entry;
+
+ if (UseSSE >= 2) {
+ entry = __ pc();
+
+ // rsi: the sender's SP
+
+ // Skip safepoint check (compiler intrinsic versions of this method
+ // do not perform safepoint checks either).
+
+ // Load 'bits' into xmm0 (interpreter returns results in xmm0)
+ __ movdbl(xmm0, Address(rsp, wordSize));
+
+ // Return
+ __ pop(rdi); // get return address
+ __ mov(rsp, rsi); // set rsp to the sender's SP
+ __ jmp(rdi);
+ } else {
+ entry = generate_native_entry(false);
+ }
+
+ return entry;
+}
+
+/**
+ * Method entry for static native method:
+ * java.lang.Double.doubleToRawLongBits(double value)
+ */
+address InterpreterGenerator::generate_Double_doubleToRawLongBits_entry() {
+ address entry;
+
+ if (UseSSE >= 2) {
+ entry = __ pc();
+
+ // rsi: the sender's SP
+
+ // Skip safepoint check (compiler intrinsic versions of this method
+ // do not perform safepoint checks either).
+
+ // Load the parameter (a floating-point value) into rax.
+ __ movl(rdx, Address(rsp, 2*wordSize));
+ __ movl(rax, Address(rsp, wordSize));
+
+ // Return
+ __ pop(rdi); // get return address
+ __ mov(rsp, rsi); // set rsp to the sender's SP
+ __ jmp(rdi);
+ } else {
+ entry = generate_native_entry(false);
+ }
+
+ return entry;
+}
+
//
// Interpreter stub for calling a native method. (asm interpreter)
// This sets up a somewhat different looking stack for calling the native method
@@ -1090,7 +1189,7 @@
double_handler.addr());
__ jcc(Assembler::notEqual, L);
__ bind(push_double);
- __ push(dtos);
+ __ push_d(); // FP values are returned using the FPU, so push FPU contents (even if UseSSE > 0).
__ bind(L);
}
__ push(ltos);
--- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Wed Aug 19 08:55:18 2015 +0200
@@ -1707,10 +1707,10 @@
address& vep) {
assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
Label L;
- aep = __ pc(); __ push_ptr(); __ jmp(L);
- fep = __ pc(); __ push_f(); __ jmp(L);
- dep = __ pc(); __ push_d(); __ jmp(L);
- lep = __ pc(); __ push_l(); __ jmp(L);
+ aep = __ pc(); __ push_ptr(); __ jmp(L);
+ fep = __ pc(); __ push_f(xmm0); __ jmp(L);
+ dep = __ pc(); __ push_d(xmm0); __ jmp(L);
+ lep = __ pc(); __ push_l(); __ jmp(L);
bep = cep = sep =
iep = __ pc(); __ push_i();
vep = __ pc();
--- a/hotspot/src/cpu/x86/vm/templateTable_x86.cpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86.cpp Wed Aug 19 08:55:18 2015 +0200
@@ -349,53 +349,60 @@
void TemplateTable::fconst(int value) {
transition(vtos, ftos);
+ if (UseSSE >= 1) {
+ static float one = 1.0f, two = 2.0f;
+ switch (value) {
+ case 0:
+ __ xorps(xmm0, xmm0);
+ break;
+ case 1:
+ __ movflt(xmm0, ExternalAddress((address) &one));
+ break;
+ case 2:
+ __ movflt(xmm0, ExternalAddress((address) &two));
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ } else {
#ifdef _LP64
- static float one = 1.0f, two = 2.0f;
- switch (value) {
- case 0:
- __ xorps(xmm0, xmm0);
- break;
- case 1:
- __ movflt(xmm0, ExternalAddress((address) &one));
- break;
- case 2:
- __ movflt(xmm0, ExternalAddress((address) &two));
- break;
- default:
ShouldNotReachHere();
- break;
+#else
+ if (value == 0) { __ fldz();
+ } else if (value == 1) { __ fld1();
+ } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
+ } else { ShouldNotReachHere();
+ }
+#endif // _LP64
}
-#else
- if (value == 0) { __ fldz();
- } else if (value == 1) { __ fld1();
- } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
- } else { ShouldNotReachHere();
- }
-#endif
}
void TemplateTable::dconst(int value) {
transition(vtos, dtos);
+ if (UseSSE >= 2) {
+ static double one = 1.0;
+ switch (value) {
+ case 0:
+ __ xorpd(xmm0, xmm0);
+ break;
+ case 1:
+ __ movdbl(xmm0, ExternalAddress((address) &one));
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ } else {
#ifdef _LP64
- static double one = 1.0;
- switch (value) {
- case 0:
- __ xorpd(xmm0, xmm0);
- break;
- case 1:
- __ movdbl(xmm0, ExternalAddress((address) &one));
- break;
- default:
ShouldNotReachHere();
- break;
+#else
+ if (value == 0) { __ fldz();
+ } else if (value == 1) { __ fld1();
+ } else { ShouldNotReachHere();
+ }
+#endif
}
-
-#else
- if (value == 0) { __ fldz();
- } else if (value == 1) { __ fld1();
- } else { ShouldNotReachHere();
- }
-#endif
}
void TemplateTable::bipush() {
@@ -454,8 +461,7 @@
__ jccb(Assembler::notEqual, notFloat);
// ftos
- LP64_ONLY(__ movflt(xmm0, Address(rcx, rbx, Address::times_8, base_offset)));
- NOT_LP64(__ fld_s( Address(rcx, rbx, Address::times_ptr, base_offset)));
+ __ load_float(Address(rcx, rbx, Address::times_ptr, base_offset));
__ push(ftos);
__ jmp(Done);
@@ -522,8 +528,7 @@
__ jccb(Assembler::notEqual, Long);
// dtos
- LP64_ONLY(__ movdbl(xmm0, Address(rcx, rbx, Address::times_8, base_offset)));
- NOT_LP64(__ fld_d( Address(rcx, rbx, Address::times_ptr, base_offset)));
+ __ load_double(Address(rcx, rbx, Address::times_ptr, base_offset));
__ push(dtos);
__ jmpb(Done);
@@ -617,15 +622,13 @@
void TemplateTable::fload() {
transition(vtos, ftos);
locals_index(rbx);
- LP64_ONLY(__ movflt(xmm0, faddress(rbx)));
- NOT_LP64(__ fld_s(faddress(rbx)));
+ __ load_float(faddress(rbx));
}
void TemplateTable::dload() {
transition(vtos, dtos);
locals_index(rbx);
- LP64_ONLY(__ movdbl(xmm0, daddress(rbx)));
- NOT_LP64(__ fld_d(daddress(rbx)));
+ __ load_double(daddress(rbx));
}
void TemplateTable::aload() {
@@ -657,15 +660,13 @@
void TemplateTable::wide_fload() {
transition(vtos, ftos);
locals_index_wide(rbx);
- LP64_ONLY(__ movflt(xmm0, faddress(rbx)));
- NOT_LP64(__ fld_s(faddress(rbx)));
+ __ load_float(faddress(rbx));
}
void TemplateTable::wide_dload() {
transition(vtos, dtos);
locals_index_wide(rbx);
- LP64_ONLY(__ movdbl(xmm0, daddress(rbx)));
- NOT_LP64(__ fld_d(daddress(rbx)));
+ __ load_double(daddress(rbx));
}
void TemplateTable::wide_aload() {
@@ -726,10 +727,9 @@
// rax: index
// rdx: array
index_check(rdx, rax); // kills rbx
- LP64_ONLY(__ movflt(xmm0, Address(rdx, rax,
- Address::times_4,
- arrayOopDesc::base_offset_in_bytes(T_FLOAT))));
- NOT_LP64(__ fld_s(Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT))));
+ __ load_float(Address(rdx, rax,
+ Address::times_4,
+ arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
}
void TemplateTable::daload() {
@@ -737,10 +737,9 @@
// rax: index
// rdx: array
index_check(rdx, rax); // kills rbx
- LP64_ONLY(__ movdbl(xmm0, Address(rdx, rax,
- Address::times_8,
- arrayOopDesc::base_offset_in_bytes(T_DOUBLE))));
- NOT_LP64(__ fld_d(Address(rdx, rax, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))));
+ __ load_double(Address(rdx, rax,
+ Address::times_8,
+ arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
}
void TemplateTable::aaload() {
@@ -807,14 +806,12 @@
void TemplateTable::fload(int n) {
transition(vtos, ftos);
- LP64_ONLY(__ movflt(xmm0, faddress(n)));
- NOT_LP64(__ fld_s(faddress(n)));
+ __ load_float(faddress(n));
}
void TemplateTable::dload(int n) {
transition(vtos, dtos);
- LP64_ONLY(__ movdbl(xmm0, daddress(n)));
- NOT_LP64(__ fld_d(daddress(n)));
+ __ load_double(daddress(n));
}
void TemplateTable::aload(int n) {
@@ -919,15 +916,13 @@
void TemplateTable::fstore() {
transition(ftos, vtos);
locals_index(rbx);
- LP64_ONLY(__ movflt(faddress(rbx), xmm0));
- NOT_LP64(__ fstp_s(faddress(rbx)));
+ __ store_float(faddress(rbx));
}
void TemplateTable::dstore() {
transition(dtos, vtos);
locals_index(rbx);
- LP64_ONLY(__ movdbl(daddress(rbx), xmm0));
- NOT_LP64(__ fstp_d(daddress(rbx)));
+ __ store_double(daddress(rbx));
}
void TemplateTable::astore() {
@@ -956,7 +951,7 @@
void TemplateTable::wide_fstore() {
#ifdef _LP64
transition(vtos, vtos);
- __ pop_f();
+ __ pop_f(xmm0);
locals_index_wide(rbx);
__ movflt(faddress(rbx), xmm0);
#else
@@ -967,7 +962,7 @@
void TemplateTable::wide_dstore() {
#ifdef _LP64
transition(vtos, vtos);
- __ pop_d();
+ __ pop_d(xmm0);
locals_index_wide(rbx);
__ movdbl(daddress(rbx), xmm0);
#else
@@ -1011,29 +1006,21 @@
void TemplateTable::fastore() {
transition(ftos, vtos);
__ pop_i(rbx);
- // xmm0: value
+ // value is in UseSSE >= 1 ? xmm0 : ST(0)
// rbx: index
// rdx: array
index_check(rdx, rbx); // prefer index in rbx
- LP64_ONLY(__ movflt(Address(rdx, rbx,
- Address::times_4,
- arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
- xmm0));
- NOT_LP64(__ fstp_s(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT))));
+ __ store_float(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
}
void TemplateTable::dastore() {
transition(dtos, vtos);
__ pop_i(rbx);
- // xmm0: value
+ // value is in UseSSE >= 2 ? xmm0 : ST(0)
// rbx: index
// rdx: array
index_check(rdx, rbx); // prefer index in rbx
- LP64_ONLY(__ movdbl(Address(rdx, rbx,
- Address::times_8,
- arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
- xmm0));
- NOT_LP64(__ fstp_d(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))));
+ __ store_double(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
}
void TemplateTable::aastore() {
@@ -1134,14 +1121,12 @@
void TemplateTable::fstore(int n) {
transition(ftos, vtos);
- LP64_ONLY(__ movflt(faddress(n), xmm0));
- NOT_LP64(__ fstp_s(faddress(n)));
+ __ store_float(faddress(n));
}
void TemplateTable::dstore(int n) {
transition(dtos, vtos);
- LP64_ONLY(__ movdbl(daddress(n), xmm0));
- NOT_LP64(__ fstp_d(daddress(n)));
+ __ store_double(daddress(n));
}
@@ -1425,82 +1410,127 @@
void TemplateTable::fop2(Operation op) {
transition(ftos, ftos);
+
+ if (UseSSE >= 1) {
+ switch (op) {
+ case add:
+ __ addss(xmm0, at_rsp());
+ __ addptr(rsp, Interpreter::stackElementSize);
+ break;
+ case sub:
+ __ movflt(xmm1, xmm0);
+ __ pop_f(xmm0);
+ __ subss(xmm0, xmm1);
+ break;
+ case mul:
+ __ mulss(xmm0, at_rsp());
+ __ addptr(rsp, Interpreter::stackElementSize);
+ break;
+ case div:
+ __ movflt(xmm1, xmm0);
+ __ pop_f(xmm0);
+ __ divss(xmm0, xmm1);
+ break;
+ case rem:
+ // On x86_64 platforms the SharedRuntime::frem method is called to perform the
+ // modulo operation. The frem method calls the function
+ // double fmod(double x, double y) in math.h. The documentation of fmod states:
+ // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN
+ // (signalling or quiet) is returned.
+ //
+ // On x86_32 platforms the FPU is used to perform the modulo operation. The
+ // reason is that on 32-bit Windows the sign of modulo operations diverges from
+ // what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f).
+ // The fprem instruction used on x86_32 is functionally equivalent to
+ // SharedRuntime::frem in that it returns a NaN.
#ifdef _LP64
- switch (op) {
- case add:
- __ addss(xmm0, at_rsp());
- __ addptr(rsp, Interpreter::stackElementSize);
- break;
- case sub:
- __ movflt(xmm1, xmm0);
- __ pop_f(xmm0);
- __ subss(xmm0, xmm1);
- break;
- case mul:
- __ mulss(xmm0, at_rsp());
- __ addptr(rsp, Interpreter::stackElementSize);
- break;
- case div:
- __ movflt(xmm1, xmm0);
- __ pop_f(xmm0);
- __ divss(xmm0, xmm1);
- break;
- case rem:
- __ movflt(xmm1, xmm0);
- __ pop_f(xmm0);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
- break;
- default:
+ __ movflt(xmm1, xmm0);
+ __ pop_f(xmm0);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
+#else
+ __ push_f(xmm0);
+ __ pop_f();
+ __ fld_s(at_rsp());
+ __ fremr(rax);
+ __ f2ieee();
+ __ pop(rax); // pop second operand off the stack
+ __ push_f();
+ __ pop_f(xmm0);
+#endif
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ } else {
+#ifdef _LP64
ShouldNotReachHere();
- break;
- }
#else
- switch (op) {
+ switch (op) {
case add: __ fadd_s (at_rsp()); break;
case sub: __ fsubr_s(at_rsp()); break;
case mul: __ fmul_s (at_rsp()); break;
case div: __ fdivr_s(at_rsp()); break;
case rem: __ fld_s (at_rsp()); __ fremr(rax); break;
default : ShouldNotReachHere();
+ }
+ __ f2ieee();
+ __ pop(rax); // pop second operand off the stack
+#endif // _LP64
}
- __ f2ieee();
- __ pop(rax); // pop float thing off
-#endif
}
void TemplateTable::dop2(Operation op) {
transition(dtos, dtos);
+ if (UseSSE >= 2) {
+ switch (op) {
+ case add:
+ __ addsd(xmm0, at_rsp());
+ __ addptr(rsp, 2 * Interpreter::stackElementSize);
+ break;
+ case sub:
+ __ movdbl(xmm1, xmm0);
+ __ pop_d(xmm0);
+ __ subsd(xmm0, xmm1);
+ break;
+ case mul:
+ __ mulsd(xmm0, at_rsp());
+ __ addptr(rsp, 2 * Interpreter::stackElementSize);
+ break;
+ case div:
+ __ movdbl(xmm1, xmm0);
+ __ pop_d(xmm0);
+ __ divsd(xmm0, xmm1);
+ break;
+ case rem:
+ // Similar to fop2(), the modulo operation is performed using the
+ // SharedRuntime::drem method (on x86_64 platforms) or using the
+ // FPU (on x86_32 platforms) for the same reasons as mentioned in fop2().
#ifdef _LP64
- switch (op) {
- case add:
- __ addsd(xmm0, at_rsp());
- __ addptr(rsp, 2 * Interpreter::stackElementSize);
- break;
- case sub:
- __ movdbl(xmm1, xmm0);
- __ pop_d(xmm0);
- __ subsd(xmm0, xmm1);
- break;
- case mul:
- __ mulsd(xmm0, at_rsp());
- __ addptr(rsp, 2 * Interpreter::stackElementSize);
- break;
- case div:
- __ movdbl(xmm1, xmm0);
- __ pop_d(xmm0);
- __ divsd(xmm0, xmm1);
- break;
- case rem:
- __ movdbl(xmm1, xmm0);
- __ pop_d(xmm0);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
- break;
- default:
+ __ movdbl(xmm1, xmm0);
+ __ pop_d(xmm0);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
+#else
+ __ push_d(xmm0);
+ __ pop_d();
+ __ fld_d(at_rsp());
+ __ fremr(rax);
+ __ d2ieee();
+ __ pop(rax);
+ __ pop(rdx);
+ __ push_d();
+ __ pop_d(xmm0);
+#endif
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ } else {
+#ifdef _LP64
ShouldNotReachHere();
- break;
- }
#else
- switch (op) {
+ switch (op) {
case add: __ fadd_d (at_rsp()); break;
case sub: __ fsubr_d(at_rsp()); break;
case mul: {
@@ -1543,12 +1573,13 @@
}
case rem: __ fld_d (at_rsp()); __ fremr(rax); break;
default : ShouldNotReachHere();
+ }
+ __ d2ieee();
+ // Pop double precision number from rsp.
+ __ pop(rax);
+ __ pop(rdx);
+#endif
}
- __ d2ieee();
- // Pop double precision number from rsp.
- __ pop(rax);
- __ pop(rdx);
-#endif
}
void TemplateTable::ineg() {
@@ -1562,7 +1593,6 @@
NOT_LP64(__ lneg(rdx, rax));
}
-#ifdef _LP64
// Note: 'double' and 'long long' have 32-bits alignment on x86.
static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
// Use the expression (adr)&(~0xF) to provide 128-bits aligned address
@@ -1577,26 +1607,30 @@
// Buffer for 128-bits masks used by SSE instructions.
static jlong float_signflip_pool[2*2];
static jlong double_signflip_pool[2*2];
-#endif
void TemplateTable::fneg() {
transition(ftos, ftos);
-#ifdef _LP64
- static jlong *float_signflip = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000);
- __ xorps(xmm0, ExternalAddress((address) float_signflip));
-#else
- __ fchs();
-#endif
+ if (UseSSE >= 1) {
+ static jlong *float_signflip = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000);
+ __ xorps(xmm0, ExternalAddress((address) float_signflip));
+ } else {
+ LP64_ONLY(ShouldNotReachHere());
+ NOT_LP64(__ fchs());
+ }
}
void TemplateTable::dneg() {
transition(dtos, dtos);
+ if (UseSSE >= 2) {
+ static jlong *double_signflip = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000);
+ __ xorpd(xmm0, ExternalAddress((address) double_signflip));
+ } else {
#ifdef _LP64
- static jlong *double_signflip = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000);
- __ xorpd(xmm0, ExternalAddress((address) double_signflip));
+ ShouldNotReachHere();
#else
- __ fchs();
+ __ fchs();
#endif
+ }
}
void TemplateTable::iinc() {
@@ -1798,18 +1832,26 @@
__ extend_sign(rdx, rax);
break;
case Bytecodes::_i2f:
- __ push(rax); // store int on tos
- __ fild_s(at_rsp()); // load int to ST0
- __ f2ieee(); // truncate to float size
- __ pop(rcx); // adjust rsp
+ if (UseSSE >= 1) {
+ __ cvtsi2ssl(xmm0, rax);
+ } else {
+ __ push(rax); // store int on tos
+ __ fild_s(at_rsp()); // load int to ST0
+ __ f2ieee(); // truncate to float size
+ __ pop(rcx); // adjust rsp
+ }
break;
case Bytecodes::_i2d:
+ if (UseSSE >= 2) {
+ __ cvtsi2sdl(xmm0, rax);
+ } else {
__ push(rax); // add one slot for d2ieee()
__ push(rax); // store int on tos
__ fild_s(at_rsp()); // load int to ST0
__ d2ieee(); // truncate to double size
__ pop(rcx); // adjust rsp
__ pop(rcx);
+ }
break;
case Bytecodes::_i2b:
__ shll(rax, 24); // truncate upper 24 bits
@@ -1829,50 +1871,102 @@
/* nothing to do */
break;
case Bytecodes::_l2f:
+ // On 64-bit platforms, the cvtsi2ssq instruction is used to convert
+ // 64-bit long values to floats. On 32-bit platforms it is not possible
+ // to use that instruction with 64-bit operands, therefore the FPU is
+ // used to perform the conversion.
__ push(rdx); // store long on tos
__ push(rax);
__ fild_d(at_rsp()); // load long to ST0
__ f2ieee(); // truncate to float size
__ pop(rcx); // adjust rsp
__ pop(rcx);
+ if (UseSSE >= 1) {
+ __ push_f();
+ __ pop_f(xmm0);
+ }
break;
case Bytecodes::_l2d:
+ // On 32-bit platforms the FPU is used for conversion because on
+ // 32-bit platforms it is not not possible to use the cvtsi2sdq
+ // instruction with 64-bit operands.
__ push(rdx); // store long on tos
__ push(rax);
__ fild_d(at_rsp()); // load long to ST0
__ d2ieee(); // truncate to double size
__ pop(rcx); // adjust rsp
__ pop(rcx);
+ if (UseSSE >= 2) {
+ __ push_d();
+ __ pop_d(xmm0);
+ }
break;
case Bytecodes::_f2i:
- __ push(rcx); // reserve space for argument
- __ fstp_s(at_rsp()); // pass float argument on stack
+ // SharedRuntime::f2i does not differentiate between sNaNs and qNaNs
+ // as it returns 0 for any NaN.
+ if (UseSSE >= 1) {
+ __ push_f(xmm0);
+ } else {
+ __ push(rcx); // reserve space for argument
+ __ fstp_s(at_rsp()); // pass float argument on stack
+ }
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
break;
case Bytecodes::_f2l:
- __ push(rcx); // reserve space for argument
- __ fstp_s(at_rsp()); // pass float argument on stack
+ // SharedRuntime::f2l does not differentiate between sNaNs and qNaNs
+ // as it returns 0 for any NaN.
+ if (UseSSE >= 1) {
+ __ push_f(xmm0);
+ } else {
+ __ push(rcx); // reserve space for argument
+ __ fstp_s(at_rsp()); // pass float argument on stack
+ }
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
break;
case Bytecodes::_f2d:
- /* nothing to do */
+ if (UseSSE < 1) {
+ /* nothing to do */
+ } else if (UseSSE == 1) {
+ __ push_f(xmm0);
+ __ pop_f();
+ } else { // UseSSE >= 2
+ __ cvtss2sd(xmm0, xmm0);
+ }
break;
case Bytecodes::_d2i:
- __ push(rcx); // reserve space for argument
- __ push(rcx);
- __ fstp_d(at_rsp()); // pass double argument on stack
+ if (UseSSE >= 2) {
+ __ push_d(xmm0);
+ } else {
+ __ push(rcx); // reserve space for argument
+ __ push(rcx);
+ __ fstp_d(at_rsp()); // pass double argument on stack
+ }
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
break;
case Bytecodes::_d2l:
- __ push(rcx); // reserve space for argument
- __ push(rcx);
- __ fstp_d(at_rsp()); // pass double argument on stack
+ if (UseSSE >= 2) {
+ __ push_d(xmm0);
+ } else {
+ __ push(rcx); // reserve space for argument
+ __ push(rcx);
+ __ fstp_d(at_rsp()); // pass double argument on stack
+ }
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
break;
case Bytecodes::_d2f:
- __ push(rcx); // reserve space for f2ieee()
- __ f2ieee(); // truncate to float size
- __ pop(rcx); // adjust rsp
+ if (UseSSE <= 1) {
+ __ push(rcx); // reserve space for f2ieee()
+ __ f2ieee(); // truncate to float size
+ __ pop(rcx); // adjust rsp
+ if (UseSSE == 1) {
+ // The cvtsd2ss instruction is not available if UseSSE==1, therefore
+ // the conversion is performed using the FPU in this case.
+ __ push_f();
+ __ pop_f(xmm0);
+ }
+ } else { // UseSSE >= 2
+ __ cvtsd2ss(xmm0, xmm0);
+ }
break;
default :
ShouldNotReachHere();
@@ -1901,42 +1995,47 @@
}
void TemplateTable::float_cmp(bool is_float, int unordered_result) {
-#ifdef _LP64
- Label done;
- if (is_float) {
- // XXX get rid of pop here, use ... reg, mem32
- __ pop_f(xmm1);
- __ ucomiss(xmm1, xmm0);
- } else {
- // XXX get rid of pop here, use ... reg, mem64
- __ pop_d(xmm1);
- __ ucomisd(xmm1, xmm0);
- }
- if (unordered_result < 0) {
- __ movl(rax, -1);
- __ jccb(Assembler::parity, done);
- __ jccb(Assembler::below, done);
- __ setb(Assembler::notEqual, rdx);
- __ movzbl(rax, rdx);
+ if ((is_float && UseSSE >= 1) ||
+ (!is_float && UseSSE >= 2)) {
+ Label done;
+ if (is_float) {
+ // XXX get rid of pop here, use ... reg, mem32
+ __ pop_f(xmm1);
+ __ ucomiss(xmm1, xmm0);
+ } else {
+ // XXX get rid of pop here, use ... reg, mem64
+ __ pop_d(xmm1);
+ __ ucomisd(xmm1, xmm0);
+ }
+ if (unordered_result < 0) {
+ __ movl(rax, -1);
+ __ jccb(Assembler::parity, done);
+ __ jccb(Assembler::below, done);
+ __ setb(Assembler::notEqual, rdx);
+ __ movzbl(rax, rdx);
+ } else {
+ __ movl(rax, 1);
+ __ jccb(Assembler::parity, done);
+ __ jccb(Assembler::above, done);
+ __ movl(rax, 0);
+ __ jccb(Assembler::equal, done);
+ __ decrementl(rax);
+ }
+ __ bind(done);
} else {
- __ movl(rax, 1);
- __ jccb(Assembler::parity, done);
- __ jccb(Assembler::above, done);
- __ movl(rax, 0);
- __ jccb(Assembler::equal, done);
- __ decrementl(rax);
- }
- __ bind(done);
+#ifdef _LP64
+ ShouldNotReachHere();
#else
- if (is_float) {
- __ fld_s(at_rsp());
- } else {
- __ fld_d(at_rsp());
- __ pop(rdx);
+ if (is_float) {
+ __ fld_s(at_rsp());
+ } else {
+ __ fld_d(at_rsp());
+ __ pop(rdx);
+ }
+ __ pop(rcx);
+ __ fcmp2int(rax, unordered_result < 0);
+#endif // _LP64
}
- __ pop(rcx);
- __ fcmp2int(rax, unordered_result < 0);
-#endif
}
void TemplateTable::branch(bool is_jsr, bool is_wide) {
@@ -2747,8 +2846,7 @@
__ jcc(Assembler::notEqual, notFloat);
// ftos
- LP64_ONLY(__ movflt(xmm0, field));
- NOT_LP64(__ fld_s(field));
+ __ load_float(field);
__ push(ftos);
// Rewrite bytecode to be faster
if (!is_static && rc == may_rewrite) {
@@ -2762,8 +2860,7 @@
__ jcc(Assembler::notEqual, notDouble);
#endif
// dtos
- LP64_ONLY(__ movdbl(xmm0, field));
- NOT_LP64(__ fld_d(field));
+ __ load_double(field);
__ push(dtos);
// Rewrite bytecode to be faster
if (!is_static && rc == may_rewrite) {
@@ -3045,8 +3142,7 @@
{
__ pop(ftos);
if (!is_static) pop_and_check_object(obj);
- NOT_LP64( __ fstp_s(field);)
- LP64_ONLY( __ movflt(field, xmm0);)
+ __ store_float(field);
if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
}
@@ -3063,8 +3159,7 @@
{
__ pop(dtos);
if (!is_static) pop_and_check_object(obj);
- NOT_LP64( __ fstp_d(field);)
- LP64_ONLY( __ movdbl(field, xmm0);)
+ __ store_double(field);
if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
}
@@ -3122,8 +3217,8 @@
case Bytecodes::_fast_sputfield: // fall through
case Bytecodes::_fast_cputfield: // fall through
case Bytecodes::_fast_iputfield: __ push_i(rax); break;
- case Bytecodes::_fast_dputfield: __ push_d(); break;
- case Bytecodes::_fast_fputfield: __ push_f(); break;
+ case Bytecodes::_fast_dputfield: __ push(dtos); break;
+ case Bytecodes::_fast_fputfield: __ push(ftos); break;
case Bytecodes::_fast_lputfield: __ push_l(rax); break;
default:
@@ -3146,8 +3241,8 @@
case Bytecodes::_fast_sputfield: // fall through
case Bytecodes::_fast_cputfield: // fall through
case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
- case Bytecodes::_fast_dputfield: __ pop_d(); break;
- case Bytecodes::_fast_fputfield: __ pop_f(); break;
+ case Bytecodes::_fast_dputfield: __ pop(dtos); break;
+ case Bytecodes::_fast_fputfield: __ pop(ftos); break;
case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
}
__ bind(L2);
@@ -3211,12 +3306,10 @@
__ movw(field, rax);
break;
case Bytecodes::_fast_fputfield:
- NOT_LP64( __ fstp_s(field); )
- LP64_ONLY( __ movflt(field, xmm0);)
+ __ store_float(field);
break;
case Bytecodes::_fast_dputfield:
- NOT_LP64( __ fstp_d(field); )
- LP64_ONLY( __ movdbl(field, xmm0);)
+ __ store_double(field);
break;
default:
ShouldNotReachHere();
@@ -3301,12 +3394,10 @@
__ load_unsigned_short(rax, field);
break;
case Bytecodes::_fast_fgetfield:
- LP64_ONLY(__ movflt(xmm0, field));
- NOT_LP64(__ fld_s(field));
+ __ load_float(field);
break;
case Bytecodes::_fast_dgetfield:
- LP64_ONLY(__ movdbl(xmm0, field));
- NOT_LP64(__ fld_d(field));
+ __ load_double(field);
break;
default:
ShouldNotReachHere();
@@ -3346,8 +3437,7 @@
__ verify_oop(rax);
break;
case ftos:
- LP64_ONLY(__ movflt(xmm0, field));
- NOT_LP64(__ fld_s(field));
+ __ load_float(field);
break;
default:
ShouldNotReachHere();
--- a/hotspot/src/share/vm/compiler/compileBroker.cpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/share/vm/compiler/compileBroker.cpp Wed Aug 19 08:55:18 2015 +0200
@@ -1399,6 +1399,28 @@
// do the compilation
if (method->is_native()) {
if (!PreferInterpreterNativeStubs || method->is_method_handle_intrinsic()) {
+ // The following native methods:
+ //
+ // java.lang.Float.intBitsToFloat
+ // java.lang.Float.floatToRawIntBits
+ // java.lang.Double.longBitsToDouble
+ // java.lang.Double.doubleToRawLongBits
+ //
+ // are called through the interpreter even if interpreter native stubs
+ // are not preferred (i.e., calling through adapter handlers is preferred).
+ // The reason is that on x86_32 signaling NaNs (sNaNs) are not preserved
+ // if the version of the methods from the native libraries is called.
+ // As the interpreter and the C2-intrinsified version of the methods preserves
+ // sNaNs, that would result in an inconsistent way of handling of sNaNs.
+ if ((UseSSE >= 1 &&
+ (method->intrinsic_id() == vmIntrinsics::_intBitsToFloat ||
+ method->intrinsic_id() == vmIntrinsics::_floatToRawIntBits)) ||
+ (UseSSE >= 2 &&
+ (method->intrinsic_id() == vmIntrinsics::_longBitsToDouble ||
+ method->intrinsic_id() == vmIntrinsics::_doubleToRawLongBits))) {
+ return NULL;
+ }
+
// To properly handle the appendix argument for out-of-line calls we are using a small trampoline that
// pops off the appendix argument and jumps to the target (see gen_special_dispatch in SharedRuntime).
//
--- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp Wed Aug 19 08:55:18 2015 +0200
@@ -90,6 +90,10 @@
java_util_zip_CRC32_update, // implementation of java.util.zip.CRC32.update()
java_util_zip_CRC32_updateBytes, // implementation of java.util.zip.CRC32.updateBytes()
java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer()
+ java_lang_Float_intBitsToFloat, // implementation of java.lang.Float.intBitsToFloat()
+ java_lang_Float_floatToRawIntBits, // implementation of java.lang.Float.floatToRawIntBits()
+ java_lang_Double_longBitsToDouble, // implementation of java.lang.Double.longBitsToDouble()
+ java_lang_Double_doubleToRawLongBits, // implementation of java.lang.Double.doubleToRawLongBits()
number_of_method_entries,
invalid = -1
};
--- a/hotspot/src/share/vm/interpreter/interpreter.cpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/share/vm/interpreter/interpreter.cpp Wed Aug 19 08:55:18 2015 +0200
@@ -234,7 +234,15 @@
case vmIntrinsics::_updateByteBufferCRC32 : return java_util_zip_CRC32_updateByteBuffer;
}
}
-#endif
+
+ switch(m->intrinsic_id()) {
+ case vmIntrinsics::_intBitsToFloat: return java_lang_Float_intBitsToFloat;
+ case vmIntrinsics::_floatToRawIntBits: return java_lang_Float_floatToRawIntBits;
+ case vmIntrinsics::_longBitsToDouble: return java_lang_Double_longBitsToDouble;
+ case vmIntrinsics::_doubleToRawLongBits: return java_lang_Double_doubleToRawLongBits;
+ }
+
+#endif // CC_INTERP
// Native method?
// Note: This test must come _before_ the test for intrinsic
@@ -559,6 +567,25 @@
: // fall thru
case Interpreter::java_util_zip_CRC32_updateByteBuffer
: entry_point = generate_CRC32_updateBytes_entry(kind); break;
+#if defined(TARGET_ARCH_x86) && !defined(_LP64)
+ // On x86_32 platforms, a special entry is generated for the following four methods.
+ // On other platforms the normal entry is used to enter these methods.
+ case Interpreter::java_lang_Float_intBitsToFloat
+ : entry_point = generate_Float_intBitsToFloat_entry(); break;
+ case Interpreter::java_lang_Float_floatToRawIntBits
+ : entry_point = generate_Float_floatToRawIntBits_entry(); break;
+ case Interpreter::java_lang_Double_longBitsToDouble
+ : entry_point = generate_Double_longBitsToDouble_entry(); break;
+ case Interpreter::java_lang_Double_doubleToRawLongBits
+ : entry_point = generate_Double_doubleToRawLongBits_entry(); break;
+#else
+ case Interpreter::java_lang_Float_intBitsToFloat:
+ case Interpreter::java_lang_Float_floatToRawIntBits:
+ case Interpreter::java_lang_Double_longBitsToDouble:
+ case Interpreter::java_lang_Double_doubleToRawLongBits:
+ entry_point = generate_native_entry(false);
+ break;
+#endif // defined(TARGET_ARCH_x86) && !defined(_LP64)
#endif // CC_INTERP
default:
fatal(err_msg("unexpected method kind: %d", kind));
--- a/hotspot/src/share/vm/interpreter/templateInterpreter.cpp Fri Aug 14 00:28:45 2015 +0200
+++ b/hotspot/src/share/vm/interpreter/templateInterpreter.cpp Wed Aug 19 08:55:18 2015 +0200
@@ -397,34 +397,39 @@
// all non-native method kinds
method_entry(zerolocals)
- method_entry(zerolocals_synchronized)
- method_entry(empty)
- method_entry(accessor)
- method_entry(abstract)
- method_entry(java_lang_math_sin )
- method_entry(java_lang_math_cos )
- method_entry(java_lang_math_tan )
- method_entry(java_lang_math_abs )
- method_entry(java_lang_math_sqrt )
- method_entry(java_lang_math_log )
- method_entry(java_lang_math_log10)
- method_entry(java_lang_math_exp )
- method_entry(java_lang_math_pow )
- method_entry(java_lang_ref_reference_get)
+ method_entry(zerolocals_synchronized)
+ method_entry(empty)
+ method_entry(accessor)
+ method_entry(abstract)
+ method_entry(java_lang_math_sin )
+ method_entry(java_lang_math_cos )
+ method_entry(java_lang_math_tan )
+ method_entry(java_lang_math_abs )
+ method_entry(java_lang_math_sqrt )
+ method_entry(java_lang_math_log )
+ method_entry(java_lang_math_log10)
+ method_entry(java_lang_math_exp )
+ method_entry(java_lang_math_pow )
+ method_entry(java_lang_ref_reference_get)
- if (UseCRC32Intrinsics) {
- method_entry(java_util_zip_CRC32_update)
- method_entry(java_util_zip_CRC32_updateBytes)
- method_entry(java_util_zip_CRC32_updateByteBuffer)
- }
+ if (UseCRC32Intrinsics) {
+ method_entry(java_util_zip_CRC32_update)
+ method_entry(java_util_zip_CRC32_updateBytes)
+ method_entry(java_util_zip_CRC32_updateByteBuffer)
+ }
+
+ method_entry(java_lang_Float_intBitsToFloat);
+ method_entry(java_lang_Float_floatToRawIntBits);
+ method_entry(java_lang_Double_longBitsToDouble);
+ method_entry(java_lang_Double_doubleToRawLongBits);
initialize_method_handle_entries();
// all native method kinds (must be one contiguous block)
Interpreter::_native_entry_begin = Interpreter::code()->code_end();
method_entry(native)
- method_entry(native_synchronized)
- Interpreter::_native_entry_end = Interpreter::code()->code_end();
+ method_entry(native_synchronized)
+ Interpreter::_native_entry_end = Interpreter::code()->code_end();
#undef method_entry
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/floatingpoint/NaNTest.java Wed Aug 19 08:55:18 2015 +0200
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/**
+ * @test
+ * @bug 8076373
+ * @summary Verify if signaling NaNs are preserved.
+ * @run main NaNTest
+ */
+public class NaNTest {
+ static void testFloat() {
+ int originalValue = 0x7f800001;
+ int readBackValue = Float.floatToRawIntBits(Float.intBitsToFloat(originalValue));
+ if (originalValue != readBackValue) {
+ String errorMessage = String.format("Original and read back float values mismatch\n0x%X 0x%X\n",
+ originalValue,
+ readBackValue);
+ throw new RuntimeException(errorMessage);
+ } else {
+ System.out.printf("Written and read back float values match\n0x%X 0x%X\n",
+ originalValue,
+ readBackValue);
+ }
+ }
+
+ static void testDouble() {
+ long originalValue = 0xFFF0000000000001L;
+ long readBackValue = Double.doubleToRawLongBits(Double.longBitsToDouble(originalValue));
+ if (originalValue != readBackValue) {
+ String errorMessage = String.format("Original and read back double values mismatch\n0x%X 0x%X\n",
+ originalValue,
+ readBackValue);
+ throw new RuntimeException(errorMessage);
+ } else {
+ System.out.printf("Written and read back double values match\n0x%X 0x%X\n",
+ originalValue,
+ readBackValue);
+ }
+
+ }
+
+ public static void main(String args[]) {
+ System.out.println("### NanTest started");
+
+ testFloat();
+ testDouble();
+
+ System.out.println("### NanTest ended");
+ }
+}