jdk-sandbox: comparison hotspot/src/cpu/x86/vm/templateTable

equal deleted inserted replaced

-:8815f2d1447f
+:01e2f5e916c7
 void TemplateTable::fconst(int value) {
 transition(vtos, ftos);
+if (UseSSE >= 1) {
+static float one = 1.0f, two = 2.0f;
+switch (value) {
+case 0:
+__ xorps(xmm0, xmm0);
+break;
+case 1:
+__ movflt(xmm0, ExternalAddress((address) &one));
+break;
+case 2:
+__ movflt(xmm0, ExternalAddress((address) &two));
+break;
+default:
+ShouldNotReachHere();
+break;
+}
+} else {
 #ifdef _LP64
-static float one = 1.0f, two = 2.0f;
-switch (value) {
-case 0:
-__ xorps(xmm0, xmm0);
-break;
-case 1:
-__ movflt(xmm0, ExternalAddress((address) &one));
-break;
-case 2:
-__ movflt(xmm0, ExternalAddress((address) &two));
-break;
-default:
 ShouldNotReachHere();
-break;
-}
 #else
 if (value == 0) { __ fldz();
 } else if (value == 1) { __ fld1();
 } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
 } else                 { ShouldNotReachHere();
 }
-#endif
+#endif // _LP64
+}
 }
 void TemplateTable::dconst(int value) {
 transition(vtos, dtos);
+if (UseSSE >= 2) {
+static double one = 1.0;
+switch (value) {
+case 0:
+__ xorpd(xmm0, xmm0);
+break;
+case 1:
+__ movdbl(xmm0, ExternalAddress((address) &one));
+break;
+default:
+ShouldNotReachHere();
+break;
+}
+} else {
 #ifdef _LP64
-static double one = 1.0;
-switch (value) {
-case 0:
-__ xorpd(xmm0, xmm0);
-break;
-case 1:
-__ movdbl(xmm0, ExternalAddress((address) &one));
-break;
-default:
 ShouldNotReachHere();
-break;
-}
 #else
 if (value == 0) { __ fldz();
 } else if (value == 1) { __ fld1();
 } else                 { ShouldNotReachHere();
 }
 #endif
+}
 }
 void TemplateTable::bipush() {
 transition(vtos, itos);
 __ load_signed_byte(rax, at_bcp(1));
 __ bind(notClass);
 __ cmpl(rdx, JVM_CONSTANT_Float);
 __ jccb(Assembler::notEqual, notFloat);
 // ftos
-LP64_ONLY(__ movflt(xmm0, Address(rcx, rbx, Address::times_8, base_offset)));
+__ load_float(Address(rcx, rbx, Address::times_ptr, base_offset));
-NOT_LP64(__ fld_s(    Address(rcx, rbx, Address::times_ptr, base_offset)));
 __ push(ftos);
 __ jmp(Done);
 __ bind(notFloat);
 #ifdef ASSERT
 __ cmpb(Address(rax, rbx, Address::times_1, tags_offset),
 JVM_CONSTANT_Double);
 __ jccb(Assembler::notEqual, Long);
 // dtos
-LP64_ONLY(__ movdbl(xmm0, Address(rcx, rbx, Address::times_8, base_offset)));
+__ load_double(Address(rcx, rbx, Address::times_ptr, base_offset));
-NOT_LP64(__ fld_d(    Address(rcx, rbx, Address::times_ptr, base_offset)));
 __ push(dtos);
 __ jmpb(Done);
 __ bind(Long);
 }
 void TemplateTable::fload() {
 transition(vtos, ftos);
 locals_index(rbx);
-LP64_ONLY(__ movflt(xmm0, faddress(rbx)));
+__ load_float(faddress(rbx));
-NOT_LP64(__ fld_s(faddress(rbx)));
 }
 void TemplateTable::dload() {
 transition(vtos, dtos);
 locals_index(rbx);
-LP64_ONLY(__ movdbl(xmm0, daddress(rbx)));
+__ load_double(daddress(rbx));
-NOT_LP64(__ fld_d(daddress(rbx)));
 }
 void TemplateTable::aload() {
 transition(vtos, atos);
 locals_index(rbx);
 }
 void TemplateTable::wide_fload() {
 transition(vtos, ftos);
 locals_index_wide(rbx);
-LP64_ONLY(__ movflt(xmm0, faddress(rbx)));
+__ load_float(faddress(rbx));
-NOT_LP64(__ fld_s(faddress(rbx)));
 }
 void TemplateTable::wide_dload() {
 transition(vtos, dtos);
 locals_index_wide(rbx);
-LP64_ONLY(__ movdbl(xmm0, daddress(rbx)));
+__ load_double(daddress(rbx));
-NOT_LP64(__ fld_d(daddress(rbx)));
 }
 void TemplateTable::wide_aload() {
 transition(vtos, atos);
 locals_index_wide(rbx);
 void TemplateTable::faload() {
 transition(itos, ftos);
 // rax: index
 // rdx: array
 index_check(rdx, rax); // kills rbx
-LP64_ONLY(__ movflt(xmm0, Address(rdx, rax,
+__ load_float(Address(rdx, rax,
 Address::times_4,
-arrayOopDesc::base_offset_in_bytes(T_FLOAT))));
+arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
-NOT_LP64(__ fld_s(Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT))));
 }
 void TemplateTable::daload() {
 transition(itos, dtos);
 // rax: index
 // rdx: array
 index_check(rdx, rax); // kills rbx
-LP64_ONLY(__ movdbl(xmm0, Address(rdx, rax,
+__ load_double(Address(rdx, rax,
 Address::times_8,
-arrayOopDesc::base_offset_in_bytes(T_DOUBLE))));
+arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
-NOT_LP64(__ fld_d(Address(rdx, rax, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))));
 }
 void TemplateTable::aaload() {
 transition(itos, atos);
 // rax: index
 NOT_LP64(__ movptr(rdx, haddress(n)));
 }
 void TemplateTable::fload(int n) {
 transition(vtos, ftos);
-LP64_ONLY(__ movflt(xmm0, faddress(n)));
+__ load_float(faddress(n));
-NOT_LP64(__ fld_s(faddress(n)));
 }
 void TemplateTable::dload(int n) {
 transition(vtos, dtos);
-LP64_ONLY(__ movdbl(xmm0, daddress(n)));
+__ load_double(daddress(n));
-NOT_LP64(__ fld_d(daddress(n)));
 }
 void TemplateTable::aload(int n) {
 transition(vtos, atos);
 __ movptr(rax, aaddress(n));
 }
 void TemplateTable::fstore() {
 transition(ftos, vtos);
 locals_index(rbx);
-LP64_ONLY(__ movflt(faddress(rbx), xmm0));
+__ store_float(faddress(rbx));
-NOT_LP64(__ fstp_s(faddress(rbx)));
 }
 void TemplateTable::dstore() {
 transition(dtos, vtos);
 locals_index(rbx);
-LP64_ONLY(__ movdbl(daddress(rbx), xmm0));
+__ store_double(daddress(rbx));
-NOT_LP64(__ fstp_d(daddress(rbx)));
 }
 void TemplateTable::astore() {
 transition(vtos, vtos);
 __ pop_ptr(rax);
 }
 void TemplateTable::wide_fstore() {
 #ifdef _LP64
 transition(vtos, vtos);
-__ pop_f();
+__ pop_f(xmm0);
 locals_index_wide(rbx);
 __ movflt(faddress(rbx), xmm0);
 #else
 wide_istore();
 #endif
 }
 void TemplateTable::wide_dstore() {
 #ifdef _LP64
 transition(vtos, vtos);
-__ pop_d();
+__ pop_d(xmm0);
 locals_index_wide(rbx);
 __ movdbl(daddress(rbx), xmm0);
 #else
 wide_lstore();
 #endif
 void TemplateTable::fastore() {
 transition(ftos, vtos);
 __ pop_i(rbx);
-// xmm0: value
+// value is in UseSSE >= 1 ? xmm0 : ST(0)
 // rbx:  index
 // rdx:  array
 index_check(rdx, rbx); // prefer index in rbx
-LP64_ONLY(__ movflt(Address(rdx, rbx,
+__ store_float(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
-Address::times_4,
-arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
-xmm0));
-NOT_LP64(__ fstp_s(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT))));
 }
 void TemplateTable::dastore() {
 transition(dtos, vtos);
 __ pop_i(rbx);
-// xmm0: value
+// value is in UseSSE >= 2 ? xmm0 : ST(0)
 // rbx:  index
 // rdx:  array
 index_check(rdx, rbx); // prefer index in rbx
-LP64_ONLY(__ movdbl(Address(rdx, rbx,
+__ store_double(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
-Address::times_8,
-arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
-xmm0));
-NOT_LP64(__ fstp_d(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))));
 }
 void TemplateTable::aastore() {
 Label is_null, ok_is_subtype, done;
 transition(vtos, vtos);
 NOT_LP64(__ movptr(haddress(n), rdx));
 }
 void TemplateTable::fstore(int n) {
 transition(ftos, vtos);
-LP64_ONLY(__ movflt(faddress(n), xmm0));
+__ store_float(faddress(n));
-NOT_LP64(__ fstp_s(faddress(n)));
 }
 void TemplateTable::dstore(int n) {
 transition(dtos, vtos);
-LP64_ONLY(__ movdbl(daddress(n), xmm0));
+__ store_double(daddress(n));
-NOT_LP64(__ fstp_d(daddress(n)));
 }
 void TemplateTable::astore(int n) {
 transition(vtos, vtos);
 #endif
 }
 void TemplateTable::fop2(Operation op) {
 transition(ftos, ftos);
+if (UseSSE >= 1) {
+switch (op) {
+case add:
+__ addss(xmm0, at_rsp());
+__ addptr(rsp, Interpreter::stackElementSize);
+break;
+case sub:
+__ movflt(xmm1, xmm0);
+__ pop_f(xmm0);
+__ subss(xmm0, xmm1);
+break;
+case mul:
+__ mulss(xmm0, at_rsp());
+__ addptr(rsp, Interpreter::stackElementSize);
+break;
+case div:
+__ movflt(xmm1, xmm0);
+__ pop_f(xmm0);
+__ divss(xmm0, xmm1);
+break;
+case rem:
+// On x86_64 platforms the SharedRuntime::frem method is called to perform the
+// modulo operation. The frem method calls the function
+// double fmod(double x, double y) in math.h. The documentation of fmod states:
+// "If x or y is a NaN, a NaN is returned." without specifying what type of NaN
+// (signalling or quiet) is returned.
+//
+// On x86_32 platforms the FPU is used to perform the modulo operation. The
+// reason is that on 32-bit Windows the sign of modulo operations diverges from
+// what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f).
+// The fprem instruction used on x86_32 is functionally equivalent to
+// SharedRuntime::frem in that it returns a NaN.
 #ifdef _LP64
-switch (op) {
+__ movflt(xmm1, xmm0);
-case add:
+__ pop_f(xmm0);
-__ addss(xmm0, at_rsp());
+__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
-__ addptr(rsp, Interpreter::stackElementSize);
+#else
-break;
+__ push_f(xmm0);
-case sub:
+__ pop_f();
-__ movflt(xmm1, xmm0);
+__ fld_s(at_rsp());
-__ pop_f(xmm0);
+__ fremr(rax);
-__ subss(xmm0, xmm1);
+__ f2ieee();
-break;
+__ pop(rax);  // pop second operand off the stack
-case mul:
+__ push_f();
-__ mulss(xmm0, at_rsp());
+__ pop_f(xmm0);
-__ addptr(rsp, Interpreter::stackElementSize);
+#endif
 break;
-case div:
+default:
-__ movflt(xmm1, xmm0);
+ShouldNotReachHere();
-__ pop_f(xmm0);
+break;
-__ divss(xmm0, xmm1);
+}
-break;
+} else {
-case rem:
+#ifdef _LP64
-__ movflt(xmm1, xmm0);
-__ pop_f(xmm0);
-__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
-break;
-default:
 ShouldNotReachHere();
-break;
-}
 #else
 switch (op) {
 case add: __ fadd_s (at_rsp());                break;
 case sub: __ fsubr_s(at_rsp());                break;
 case mul: __ fmul_s (at_rsp());                break;
 case div: __ fdivr_s(at_rsp());                break;
 case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
 default : ShouldNotReachHere();
 }
 __ f2ieee();
-__ pop(rax);  // pop float thing off
+__ pop(rax);  // pop second operand off the stack
-#endif
+#endif // _LP64
+}
 }
 void TemplateTable::dop2(Operation op) {
 transition(dtos, dtos);
+if (UseSSE >= 2) {
+switch (op) {
+case add:
+__ addsd(xmm0, at_rsp());
+__ addptr(rsp, 2 * Interpreter::stackElementSize);
+break;
+case sub:
+__ movdbl(xmm1, xmm0);
+__ pop_d(xmm0);
+__ subsd(xmm0, xmm1);
+break;
+case mul:
+__ mulsd(xmm0, at_rsp());
+__ addptr(rsp, 2 * Interpreter::stackElementSize);
+break;
+case div:
+__ movdbl(xmm1, xmm0);
+__ pop_d(xmm0);
+__ divsd(xmm0, xmm1);
+break;
+case rem:
+// Similar to fop2(), the modulo operation is performed using the
+// SharedRuntime::drem method (on x86_64 platforms) or using the
+// FPU (on x86_32 platforms) for the same reasons as mentioned in fop2().
 #ifdef _LP64
-switch (op) {
+__ movdbl(xmm1, xmm0);
-case add:
+__ pop_d(xmm0);
-__ addsd(xmm0, at_rsp());
+__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
-__ addptr(rsp, 2 * Interpreter::stackElementSize);
+#else
-break;
+__ push_d(xmm0);
-case sub:
+__ pop_d();
-__ movdbl(xmm1, xmm0);
+__ fld_d(at_rsp());
-__ pop_d(xmm0);
+__ fremr(rax);
-__ subsd(xmm0, xmm1);
+__ d2ieee();
-break;
+__ pop(rax);
-case mul:
+__ pop(rdx);
-__ mulsd(xmm0, at_rsp());
+__ push_d();
-__ addptr(rsp, 2 * Interpreter::stackElementSize);
+__ pop_d(xmm0);
-break;
+#endif
-case div:
+break;
-__ movdbl(xmm1, xmm0);
+default:
-__ pop_d(xmm0);
+ShouldNotReachHere();
-__ divsd(xmm0, xmm1);
+break;
-break;
+}
-case rem:
+} else {
-__ movdbl(xmm1, xmm0);
+#ifdef _LP64
-__ pop_d(xmm0);
-__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
-break;
-default:
 ShouldNotReachHere();
-break;
-}
 #else
 switch (op) {
 case add: __ fadd_d (at_rsp());                break;
 case sub: __ fsubr_d(at_rsp());                break;
 case mul: {
 Label L_strict;
 Label L_join;
 __ bind(L_join);
 break;
 }
 case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
 default : ShouldNotReachHere();
 }
 __ d2ieee();
 // Pop double precision number from rsp.
 __ pop(rax);
 __ pop(rdx);
 #endif
+}
 }
 void TemplateTable::ineg() {
 transition(itos, itos);
 __ negl(rax);
 transition(ltos, ltos);
 LP64_ONLY(__ negq(rax));
 NOT_LP64(__ lneg(rdx, rax));
 }
-#ifdef _LP64
 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 // of 128-bits operands for SSE instructions.
 jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
 }
 // Buffer for 128-bits masks used by SSE instructions.
 static jlong float_signflip_pool[2*2];
 static jlong double_signflip_pool[2*2];
-#endif
 void TemplateTable::fneg() {
 transition(ftos, ftos);
-#ifdef _LP64
+if (UseSSE >= 1) {
 static jlong *float_signflip  = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000);
 __ xorps(xmm0, ExternalAddress((address) float_signflip));
-#else
+} else {
-__ fchs();
+LP64_ONLY(ShouldNotReachHere());
-#endif
+NOT_LP64(__ fchs());
+}
 }
 void TemplateTable::dneg() {
 transition(dtos, dtos);
+if (UseSSE >= 2) {
+static jlong *double_signflip  = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000);
+__ xorpd(xmm0, ExternalAddress((address) double_signflip));
+} else {
 #ifdef _LP64
-static jlong *double_signflip  = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000);
+ShouldNotReachHere();
-__ xorpd(xmm0, ExternalAddress((address) double_signflip));
 #else
 __ fchs();
 #endif
+}
 }
 void TemplateTable::iinc() {
 transition(vtos, vtos);
 __ load_signed_byte(rdx, at_bcp(2)); // get constant
 switch (bytecode()) {
 case Bytecodes::_i2l:
 __ extend_sign(rdx, rax);
 break;
 case Bytecodes::_i2f:
-__ push(rax);          // store int on tos
+if (UseSSE >= 1) {
-__ fild_s(at_rsp());   // load int to ST0
+__ cvtsi2ssl(xmm0, rax);
-__ f2ieee();           // truncate to float size
+} else {
-__ pop(rcx);           // adjust rsp
+__ push(rax);          // store int on tos
+__ fild_s(at_rsp());   // load int to ST0
+__ f2ieee();           // truncate to float size
+__ pop(rcx);           // adjust rsp
+}
 break;
 case Bytecodes::_i2d:
+if (UseSSE >= 2) {
+__ cvtsi2sdl(xmm0, rax);
+} else {
 __ push(rax);          // add one slot for d2ieee()
 __ push(rax);          // store int on tos
 __ fild_s(at_rsp());   // load int to ST0
 __ d2ieee();           // truncate to double size
 __ pop(rcx);           // adjust rsp
 __ pop(rcx);
+}
 break;
 case Bytecodes::_i2b:
 __ shll(rax, 24);      // truncate upper 24 bits
 __ sarl(rax, 24);      // and sign-extend byte
 LP64_ONLY(__ movsbl(rax, rax));
 break;
 case Bytecodes::_l2i:
 /* nothing to do */
 break;
 case Bytecodes::_l2f:
+// On 64-bit platforms, the cvtsi2ssq instruction is used to convert
+// 64-bit long values to floats. On 32-bit platforms it is not possible
+// to use that instruction with 64-bit operands, therefore the FPU is
+// used to perform the conversion.
 __ push(rdx);          // store long on tos
 __ push(rax);
 __ fild_d(at_rsp());   // load long to ST0
 __ f2ieee();           // truncate to float size
 __ pop(rcx);           // adjust rsp
 __ pop(rcx);
+if (UseSSE >= 1) {
+__ push_f();
+__ pop_f(xmm0);
+}
 break;
 case Bytecodes::_l2d:
+// On 32-bit platforms the FPU is used for conversion because on
+// 32-bit platforms it is not not possible to use the cvtsi2sdq
+// instruction with 64-bit operands.
 __ push(rdx);          // store long on tos
 __ push(rax);
 __ fild_d(at_rsp());   // load long to ST0
 __ d2ieee();           // truncate to double size
 __ pop(rcx);           // adjust rsp
 __ pop(rcx);
+if (UseSSE >= 2) {
+__ push_d();
+__ pop_d(xmm0);
+}
 break;
 case Bytecodes::_f2i:
-__ push(rcx);          // reserve space for argument
+// SharedRuntime::f2i does not differentiate between sNaNs and qNaNs
-__ fstp_s(at_rsp());   // pass float argument on stack
+// as it returns 0 for any NaN.
+if (UseSSE >= 1) {
+__ push_f(xmm0);
+} else {
+__ push(rcx);          // reserve space for argument
+__ fstp_s(at_rsp());   // pass float argument on stack
+}
 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
 break;
 case Bytecodes::_f2l:
-__ push(rcx);          // reserve space for argument
+// SharedRuntime::f2l does not differentiate between sNaNs and qNaNs
-__ fstp_s(at_rsp());   // pass float argument on stack
+// as it returns 0 for any NaN.
+if (UseSSE >= 1) {
+__ push_f(xmm0);
+} else {
+__ push(rcx);          // reserve space for argument
+__ fstp_s(at_rsp());   // pass float argument on stack
+}
 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
 break;
 case Bytecodes::_f2d:
-/* nothing to do */
+if (UseSSE < 1) {
+/* nothing to do */
+} else if (UseSSE == 1) {
+__ push_f(xmm0);
+__ pop_f();
+} else { // UseSSE >= 2
+__ cvtss2sd(xmm0, xmm0);
+}
 break;
 case Bytecodes::_d2i:
-__ push(rcx);          // reserve space for argument
+if (UseSSE >= 2) {
-__ push(rcx);
+__ push_d(xmm0);
-__ fstp_d(at_rsp());   // pass double argument on stack
+} else {
+__ push(rcx);          // reserve space for argument
+__ push(rcx);
+__ fstp_d(at_rsp());   // pass double argument on stack
+}
 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
 break;
 case Bytecodes::_d2l:
-__ push(rcx);          // reserve space for argument
+if (UseSSE >= 2) {
-__ push(rcx);
+__ push_d(xmm0);
-__ fstp_d(at_rsp());   // pass double argument on stack
+} else {
+__ push(rcx);          // reserve space for argument
+__ push(rcx);
+__ fstp_d(at_rsp());   // pass double argument on stack
+}
 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
 break;
 case Bytecodes::_d2f:
-__ push(rcx);          // reserve space for f2ieee()
+if (UseSSE <= 1) {
-__ f2ieee();           // truncate to float size
+__ push(rcx);          // reserve space for f2ieee()
-__ pop(rcx);           // adjust rsp
+__ f2ieee();           // truncate to float size
+__ pop(rcx);           // adjust rsp
+if (UseSSE == 1) {
+// The cvtsd2ss instruction is not available if UseSSE==1, therefore
+// the conversion is performed using the FPU in this case.
+__ push_f();
+__ pop_f(xmm0);
+}
+} else { // UseSSE >= 2
+__ cvtsd2ss(xmm0, xmm0);
+}
 break;
 default             :
 ShouldNotReachHere();
 }
 #endif
 __ mov(rax, rcx);
 #endif
 }
 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
+if ((is_float && UseSSE >= 1) ||
+(!is_float && UseSSE >= 2)) {
+Label done;
+if (is_float) {
+// XXX get rid of pop here, use ... reg, mem32
+__ pop_f(xmm1);
+__ ucomiss(xmm1, xmm0);
+} else {
+// XXX get rid of pop here, use ... reg, mem64
+__ pop_d(xmm1);
+__ ucomisd(xmm1, xmm0);
+}
+if (unordered_result < 0) {
+__ movl(rax, -1);
+__ jccb(Assembler::parity, done);
+__ jccb(Assembler::below, done);
+__ setb(Assembler::notEqual, rdx);
+__ movzbl(rax, rdx);
+} else {
+__ movl(rax, 1);
+__ jccb(Assembler::parity, done);
+__ jccb(Assembler::above, done);
+__ movl(rax, 0);
+__ jccb(Assembler::equal, done);
+__ decrementl(rax);
+}
+__ bind(done);
+} else {
 #ifdef _LP64
-Label done;
+ShouldNotReachHere();
-if (is_float) {
-// XXX get rid of pop here, use ... reg, mem32
-__ pop_f(xmm1);
-__ ucomiss(xmm1, xmm0);
-} else {
-// XXX get rid of pop here, use ... reg, mem64
-__ pop_d(xmm1);
-__ ucomisd(xmm1, xmm0);
-}
-if (unordered_result < 0) {
-__ movl(rax, -1);
-__ jccb(Assembler::parity, done);
-__ jccb(Assembler::below, done);
-__ setb(Assembler::notEqual, rdx);
-__ movzbl(rax, rdx);
-} else {
-__ movl(rax, 1);
-__ jccb(Assembler::parity, done);
-__ jccb(Assembler::above, done);
-__ movl(rax, 0);
-__ jccb(Assembler::equal, done);
-__ decrementl(rax);
-}
-__ bind(done);
 #else
 if (is_float) {
 __ fld_s(at_rsp());
 } else {
 __ fld_d(at_rsp());
 __ pop(rdx);
 }
 __ pop(rcx);
 __ fcmp2int(rax, unordered_result < 0);
-#endif
+#endif // _LP64
+}
 }
 void TemplateTable::branch(bool is_jsr, bool is_wide) {
 __ get_method(rcx); // rcx holds method
 __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
 __ bind(notLong);
 __ cmpl(flags, ftos);
 __ jcc(Assembler::notEqual, notFloat);
 // ftos
-LP64_ONLY(__ movflt(xmm0, field));
+__ load_float(field);
-NOT_LP64(__ fld_s(field));
 __ push(ftos);
 // Rewrite bytecode to be faster
 if (!is_static && rc == may_rewrite) {
 patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
 }
 #ifdef ASSERT
 __ cmpl(flags, dtos);
 __ jcc(Assembler::notEqual, notDouble);
 #endif
 // dtos
-LP64_ONLY(__ movdbl(xmm0, field));
+__ load_double(field);
-NOT_LP64(__ fld_d(field));
 __ push(dtos);
 // Rewrite bytecode to be faster
 if (!is_static && rc == may_rewrite) {
 patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
 }
 // ftos
 {
 __ pop(ftos);
 if (!is_static) pop_and_check_object(obj);
-NOT_LP64( __ fstp_s(field);)
+__ store_float(field);
-LP64_ONLY( __ movflt(field, xmm0);)
 if (!is_static && rc == may_rewrite) {
 patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
 }
 __ jmp(Done);
 }
 // dtos
 {
 __ pop(dtos);
 if (!is_static) pop_and_check_object(obj);
-NOT_LP64( __ fstp_d(field);)
+__ store_double(field);
-LP64_ONLY( __ movdbl(field, xmm0);)
 if (!is_static && rc == may_rewrite) {
 patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
 }
 }
 case Bytecodes::_fast_aputfield: __ push_ptr(rax); break;
 case Bytecodes::_fast_bputfield: // fall through
 case Bytecodes::_fast_sputfield: // fall through
 case Bytecodes::_fast_cputfield: // fall through
 case Bytecodes::_fast_iputfield: __ push_i(rax); break;
-case Bytecodes::_fast_dputfield: __ push_d(); break;
+case Bytecodes::_fast_dputfield: __ push(dtos); break;
-case Bytecodes::_fast_fputfield: __ push_f(); break;
+case Bytecodes::_fast_fputfield: __ push(ftos); break;
 case Bytecodes::_fast_lputfield: __ push_l(rax); break;
 default:
 ShouldNotReachHere();
 }
 case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break;
 case Bytecodes::_fast_bputfield: // fall through
 case Bytecodes::_fast_sputfield: // fall through
 case Bytecodes::_fast_cputfield: // fall through
 case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
-case Bytecodes::_fast_dputfield: __ pop_d(); break;
+case Bytecodes::_fast_dputfield: __ pop(dtos); break;
-case Bytecodes::_fast_fputfield: __ pop_f(); break;
+case Bytecodes::_fast_fputfield: __ pop(ftos); break;
 case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
 }
 __ bind(L2);
 }
 }
 // fall through
 case Bytecodes::_fast_cputfield:
 __ movw(field, rax);
 break;
 case Bytecodes::_fast_fputfield:
-NOT_LP64( __ fstp_s(field); )
+__ store_float(field);
-LP64_ONLY( __ movflt(field, xmm0);)
 break;
 case Bytecodes::_fast_dputfield:
-NOT_LP64( __ fstp_d(field); )
+__ store_double(field);
-LP64_ONLY( __ movdbl(field, xmm0);)
 break;
 default:
 ShouldNotReachHere();
 }
 break;
 case Bytecodes::_fast_cgetfield:
 __ load_unsigned_short(rax, field);
 break;
 case Bytecodes::_fast_fgetfield:
-LP64_ONLY(__ movflt(xmm0, field));
+__ load_float(field);
-NOT_LP64(__ fld_s(field));
 break;
 case Bytecodes::_fast_dgetfield:
-LP64_ONLY(__ movdbl(xmm0, field));
+__ load_double(field);
-NOT_LP64(__ fld_d(field));
 break;
 default:
 ShouldNotReachHere();
 }
 // [jk] not needed currently
 case atos:
 __ load_heap_oop(rax, field);
 __ verify_oop(rax);
 break;
 case ftos:
-LP64_ONLY(__ movflt(xmm0, field));
+__ load_float(field);
-NOT_LP64(__ fld_s(field));
 break;
 default:
 ShouldNotReachHere();
 }

changeset 32391	01e2f5e916c7
parent 30132	1f788eb36811
child 32400	ed1a43020a93