diff -r 4ebc2e2fb97c -r 71c04702a3d5 src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp Tue Sep 12 19:03:39 2017 +0200 @@ -0,0 +1,470 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "runtime/arguments.hpp" +#include "runtime/sharedRuntime.hpp" + +#define __ _masm-> + + +address TemplateInterpreterGenerator::generate_slow_signature_handler() { + address entry = __ pc(); + // rbx,: method + // rcx: temporary + // rdi: pointer to locals + // rsp: end of copied parameters area + __ mov(rcx, rsp); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), rbx, rdi, rcx); + __ ret(0); + return entry; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address TemplateInterpreterGenerator::generate_CRC32_update_entry() { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rbx: Method* + // rsi: senderSP must preserved for slow path, set SP to it on fast path + // rdx: scratch + // rdi: scratch + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), + SafepointSynchronize::_not_synchronized); + __ jcc(Assembler::notEqual, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = rax; // crc + const Register val = rdx; // source java byte value + const Register tbl = rdi; // scratch + + // Arguments are reversed on java expression stack + __ movl(val, Address(rsp, wordSize)); // byte value + __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC + + __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); + __ notl(crc); // ~crc + __ update_byte_crc32(crc, val, tbl); + __ notl(crc); // ~crc + // result in rax + + // _areturn + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set sp to sender sp + __ jmp(rdi); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rbx,: Method* + // rsi: senderSP must preserved for slow path, set SP to it on fast path + // rdx: scratch + // rdi: scratch + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), + SafepointSynchronize::_not_synchronized); + __ jcc(Assembler::notEqual, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = rax; // crc + const Register buf = rdx; // source java byte array address + const Register len = rdi; // length + + // value x86_32 + // interp. arg ptr ESP + 4 + // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + // 3 2 1 0 + // int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + // 4 2,3 1 0 + + // Arguments are reversed on java expression stack + __ movl(len, Address(rsp, 4 + 0)); // Length + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf + __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset + __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC + } else { + __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array + __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset + __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC + } + + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); + // result in rax + + // _areturn + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set sp to sender sp + __ jmp(rdi); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** +* Method entry for static native methods: +* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) +* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) +*/ +address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32CIntrinsics) { + address entry = __ pc(); + // Load parameters + const Register crc = rax; // crc + const Register buf = rcx; // source java byte array address + const Register len = rdx; // length + const Register end = len; + + // value x86_32 + // interp. arg ptr ESP + 4 + // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end) + // 3 2 1 0 + // int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end) + // 4 2,3 1 0 + + // Arguments are reversed on java expression stack + __ movl(end, Address(rsp, 4 + 0)); // end + __ subl(len, Address(rsp, 4 + 1 * wordSize)); // end - offset == length + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { + __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address + __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset + __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC + } else { + __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array + __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset + __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC + } + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); + // result in rax + // _areturn + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set sp to sender sp + __ jmp(rdi); + + return entry; + } + return NULL; +} + +/** + * Method entry for static native method: + * java.lang.Float.intBitsToFloat(int bits) + */ +address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() { + if (UseSSE >= 1) { + address entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load 'bits' into xmm0 (interpreter returns results in xmm0) + __ movflt(xmm0, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + return entry; + } + + return NULL; +} + +/** + * Method entry for static native method: + * java.lang.Float.floatToRawIntBits(float value) + */ +address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() { + if (UseSSE >= 1) { + address entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load the parameter (a floating-point value) into rax. + __ movl(rax, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + return entry; + } + + return NULL; +} + + +/** + * Method entry for static native method: + * java.lang.Double.longBitsToDouble(long bits) + */ +address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() { + if (UseSSE >= 2) { + address entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load 'bits' into xmm0 (interpreter returns results in xmm0) + __ movdbl(xmm0, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + return entry; + } + + return NULL; +} + +/** + * Method entry for static native method: + * java.lang.Double.doubleToRawLongBits(double value) + */ +address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { + if (UseSSE >= 2) { + address entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load the parameter (a floating-point value) into rax. + __ movl(rdx, Address(rsp, 2*wordSize)); + __ movl(rax, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + return entry; + } + + return NULL; +} + +address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { + + // rbx,: Method* + // rcx: scratrch + // rsi: sender sp + + if (!InlineIntrinsics) return NULL; // Generate a vanilla entry + + address entry_point = __ pc(); + + // These don't need a safepoint check because they aren't virtually + // callable. We won't enter these intrinsics from compiled code. + // If in the future we added an intrinsic which was virtually callable + // we'd have to worry about how to safepoint so that this code is used. + + // mathematical functions inlined by compiler + // (interpreter must provide identical implementation + // in order to avoid monotonicity bugs when switching + // from interpreter to compiler in the middle of some + // computation) + // + // stack: [ ret adr ] <-- rsp + // [ lo(arg) ] + // [ hi(arg) ] + // + if (kind == Interpreter::java_lang_math_fmaD) { + if (!UseFMA) { + return NULL; // Generate a vanilla entry + } + __ movdbl(xmm2, Address(rsp, 5 * wordSize)); + __ movdbl(xmm1, Address(rsp, 3 * wordSize)); + __ movdbl(xmm0, Address(rsp, 1 * wordSize)); + __ fmad(xmm0, xmm1, xmm2, xmm0); + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set sp to sender sp + __ jmp(rdi); + + return entry_point; + } else if (kind == Interpreter::java_lang_math_fmaF) { + if (!UseFMA) { + return NULL; // Generate a vanilla entry + } + __ movflt(xmm2, Address(rsp, 3 * wordSize)); + __ movflt(xmm1, Address(rsp, 2 * wordSize)); + __ movflt(xmm0, Address(rsp, 1 * wordSize)); + __ fmaf(xmm0, xmm1, xmm2, xmm0); + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set sp to sender sp + __ jmp(rdi); + + return entry_point; + } + + __ fld_d(Address(rsp, 1*wordSize)); + switch (kind) { + case Interpreter::java_lang_math_sin : + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)); + } + __ addptr(rsp, 2 * wordSize); + break; + case Interpreter::java_lang_math_cos : + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)); + } + __ addptr(rsp, 2 * wordSize); + break; + case Interpreter::java_lang_math_tan : + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (StubRoutines::dtan() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)); + } + __ addptr(rsp, 2 * wordSize); + break; + case Interpreter::java_lang_math_sqrt: + __ fsqrt(); + break; + case Interpreter::java_lang_math_abs: + __ fabs(); + break; + case Interpreter::java_lang_math_log: + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (StubRoutines::dlog() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)); + } + __ addptr(rsp, 2 * wordSize); + break; + case Interpreter::java_lang_math_log10: + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (StubRoutines::dlog10() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)); + } + __ addptr(rsp, 2 * wordSize); + break; + case Interpreter::java_lang_math_pow: + __ fld_d(Address(rsp, 3*wordSize)); // second argument + __ subptr(rsp, 4 * wordSize); + __ fstp_d(Address(rsp, 0)); + __ fstp_d(Address(rsp, 2 * wordSize)); + if (StubRoutines::dpow() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)); + } + __ addptr(rsp, 4 * wordSize); + break; + case Interpreter::java_lang_math_exp: + __ subptr(rsp, 2*wordSize); + __ fstp_d(Address(rsp, 0)); + if (StubRoutines::dexp() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)); + } + __ addptr(rsp, 2*wordSize); + break; + default : + ShouldNotReachHere(); + } + + // return double result in xmm0 for interpreter and compilers. + if (UseSSE >= 2) { + __ subptr(rsp, 2*wordSize); + __ fstp_d(Address(rsp, 0)); + __ movdbl(xmm0, Address(rsp, 0)); + __ addptr(rsp, 2*wordSize); + } + + // done, result in FPU ST(0) or XMM0 + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set sp to sender sp + __ jmp(rdi); + + return entry_point; +}