8021977: Opening a file using java.io can throw IOException on Windows
Summary: Remove IOException related error-handling code for backward compatibility
Reviewed-by: alanb, lancea, mr
//// Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.//// This code is free software; you can redistribute it and/or modify it// under the terms of the GNU General Public License version 2 only, as// published by the Free Software Foundation.//// This code is distributed in the hope that it will be useful, but WITHOUT// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License// version 2 for more details (a copy is included in the LICENSE file that// accompanied this code).//// You should have received a copy of the GNU General Public License version// 2 along with this work; if not, write to the Free Software Foundation,// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.//// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA// or visit www.oracle.com if you need additional information or have any// questions.////// X86 Architecture Description File//----------REGISTER DEFINITION BLOCK------------------------------------------// This information is used by the matcher and the register allocator to// describe individual registers and classes of registers within the target// archtecture.register %{//----------Architecture Description Register Definitions----------------------// General Registers// "reg_def" name ( register save type, C convention save type,// ideal register type, encoding );// Register Save Types://// NS = No-Save: The register allocator assumes that these registers// can be used without saving upon entry to the method, &// that they do not need to be saved at call sites.//// SOC = Save-On-Call: The register allocator assumes that these registers// can be used without saving upon entry to the method,// but that they must be saved at call sites.//// SOE = Save-On-Entry: The register allocator assumes that these registers// must be saved before using them upon entry to the// method, but they do not need to be saved at call// sites.//// AS = Always-Save: The register allocator assumes that these registers// must be saved before using them upon entry to the// method, & that they must be saved at call sites.//// Ideal Register Type is used to determine how to save & restore a// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.//// The encoding number is the actual bit-pattern placed into the opcodes.// General Registers// Previously set EBX, ESI, and EDI as save-on-entry for java code// Turn off SOE in java-code due to frequent use of uncommon-traps.// Now that allocator is better, turn on ESI and EDI as SOE registers.reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog codereg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());// Float registers. We treat TOS/FPR0 special. It is invisible to the// allocator, and only shows up in the encodings.reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());// Ok so here's the trick FPR1 is really st(0) except in the midst// of emission of assembly for a machnode. During the emission the fpu stack// is pushed making FPR1 == st(1) temporarily. However at any safepoint// the stack will not have this element so FPR1 == st(0) from the// oopMap viewpoint. This same weirdness with numbering causes// instruction encoding to have to play games with the register// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation// where it does flt->flt moves to see an example//reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());// Specify priority of register selection within phases of register// allocation. Highest priority is first. A useful heuristic is to// give registers a low priority when they are required by machine// instructions, like EAX and EDX. Registers which are used as// pairs must fall on an even boundary (witness the FPR#L's in this list).// For the Intel integer registers, the equivalent Long pairs are// EDX:EAX, EBX:ECX, and EDI:EBP.alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, FPR6L, FPR6H, FPR7L, FPR7H );//----------Architecture Description Register Classes--------------------------// Several register classes are automatically defined based upon information in// this architecture description.// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )//// Class for all registersreg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);// Class for general registersreg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);// Class for general registers which may be used for implicit null checks on win95// Also safe for use by tailjump. We don't want to allocate in rbp,reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);// Class of "X" registersreg_class int_x_reg(EBX, ECX, EDX, EAX);// Class of registers that can appear in an address with no offset.// EBP and ESP require an extra instruction byte for zero offset.// Used in fast-unlockreg_class p_reg(EDX, EDI, ESI, EBX);// Class for general registers not including ECXreg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);// Class for general registers not including EAXreg_class nax_reg(EDX, EDI, ESI, ECX, EBX);// Class for general registers not including EAX or EBX.reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);// Class of EAX (for multiply and divide operations)reg_class eax_reg(EAX);// Class of EBX (for atomic add)reg_class ebx_reg(EBX);// Class of ECX (for shift and JCXZ operations and cmpLTMask)reg_class ecx_reg(ECX);// Class of EDX (for multiply and divide operations)reg_class edx_reg(EDX);// Class of EDI (for synchronization)reg_class edi_reg(EDI);// Class of ESI (for synchronization)reg_class esi_reg(ESI);// Singleton class for interpreter's stack pointerreg_class ebp_reg(EBP);// Singleton class for stack pointerreg_class sp_reg(ESP);// Singleton class for instruction pointer// reg_class ip_reg(EIP);// Class of integer register pairsreg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );// Class of integer register pairs that aligns with calling conventionreg_class eadx_reg( EAX,EDX );reg_class ebcx_reg( ECX,EBX );// Not AX or DX, used in dividesreg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );// Floating point registers. Notice FPR0 is not a choice.// FPR0 is not ever allocated; we use clever encodings to fake// a 2-address instructions out of Intels FP stack.reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );reg_class fp_flt_reg0( FPR1L );reg_class fp_dbl_reg0( FPR1L,FPR1H );reg_class fp_dbl_reg1( FPR2L,FPR2H );reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );%}//----------SOURCE BLOCK-------------------------------------------------------// This is a block of C++ code which provides values, functions, and// definitions necessary in the rest of the architecture descriptionsource_hpp %{// Must be visible to the DFA in dfa_x86_32.cppextern bool is_operand_hi32_zero(Node* n);%}source %{#define RELOC_IMM32 Assembler::imm_operand#define RELOC_DISP32 Assembler::disp32_operand#define __ _masm.// How to find the high register of a Long pair, given the low register#define HIGH_FROM_LOW(x) ((x)+2)// These masks are used to provide 128-bit aligned bitmasks to the XMM// instructions, to allow sign-masking or sign-bit flipping. They allow// fast versions of NegF/NegD and AbsF/AbsD.// Note: 'double' and 'long long' have 32-bits alignment on x86.static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { // Use the expression (adr)&(~0xF) to provide 128-bits aligned address // of 128-bits operands for SSE instructions. jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); // Store the value to a 128-bits operand. operand[0] = lo; operand[1] = hi; return operand;}// Buffer for 128-bits masks used by SSE instructions.static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)// Static initialization during VM startup.static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));// Offset hacking within calls.static int pre_call_resets_size() { int size = 0; Compile* C = Compile::current(); if (C->in_24_bit_fp_mode()) { size += 6; // fldcw } if (C->max_vector_size() > 16) { size += 3; // vzeroupper } return size;}static int preserve_SP_size() { return 2; // op, rm(reg/reg)}// !!!!! Special hack to get all type of calls to specify the byte offset// from the start of the call to the point where the return address// will point.int MachCallStaticJavaNode::ret_addr_offset() { int offset = 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points if (_method_handle_invoke) offset += preserve_SP_size(); return offset;}int MachCallDynamicJavaNode::ret_addr_offset() { return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points}static int sizeof_FFree_Float_Stack_All = -1;int MachCallRuntimeNode::ret_addr_offset() { assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();}// Indicate if the safepoint node needs the polling page as an input.// Since x86 does have absolute addressing, it doesn't.bool SafePointNode::needs_polling_address_input() { return false;}//// Compute padding required for nodes which need alignment//// The address of the call instruction needs to be 4-byte aligned to// ensure that it does not span a cache line so that it can be patched.int CallStaticJavaDirectNode::compute_padding(int current_offset) const { current_offset += pre_call_resets_size(); // skip fldcw, if any current_offset += 1; // skip call opcode byte return round_to(current_offset, alignment_required()) - current_offset;}// The address of the call instruction needs to be 4-byte aligned to// ensure that it does not span a cache line so that it can be patched.int CallStaticJavaHandleNode::compute_padding(int current_offset) const { current_offset += pre_call_resets_size(); // skip fldcw, if any current_offset += preserve_SP_size(); // skip mov rbp, rsp current_offset += 1; // skip call opcode byte return round_to(current_offset, alignment_required()) - current_offset;}// The address of the call instruction needs to be 4-byte aligned to// ensure that it does not span a cache line so that it can be patched.int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { current_offset += pre_call_resets_size(); // skip fldcw, if any current_offset += 5; // skip MOV instruction current_offset += 1; // skip call opcode byte return round_to(current_offset, alignment_required()) - current_offset;}// EMIT_RM()void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); cbuf.insts()->emit_int8(c);}// EMIT_CC()void emit_cc(CodeBuffer &cbuf, int f1, int f2) { unsigned char c = (unsigned char)( f1 | f2 ); cbuf.insts()->emit_int8(c);}// EMIT_OPCODE()void emit_opcode(CodeBuffer &cbuf, int code) { cbuf.insts()->emit_int8((unsigned char) code);}// EMIT_OPCODE() w/ relocation informationvoid emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { cbuf.relocate(cbuf.insts_mark() + offset, reloc); emit_opcode(cbuf, code);}// EMIT_D8()void emit_d8(CodeBuffer &cbuf, int d8) { cbuf.insts()->emit_int8((unsigned char) d8);}// EMIT_D16()void emit_d16(CodeBuffer &cbuf, int d16) { cbuf.insts()->emit_int16(d16);}// EMIT_D32()void emit_d32(CodeBuffer &cbuf, int d32) { cbuf.insts()->emit_int32(d32);}// emit 32 bit value and construct relocation entry from relocInfo::relocTypevoid emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, int format) { cbuf.relocate(cbuf.insts_mark(), reloc, format); cbuf.insts()->emit_int32(d32);}// emit 32 bit value and construct relocation entry from RelocationHoldervoid emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, int format) {#ifdef ASSERT if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); }#endif cbuf.relocate(cbuf.insts_mark(), rspec, format); cbuf.insts()->emit_int32(d32);}// Access stack slot for load or storevoid store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) if( -128 <= disp && disp <= 127 ) { emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte emit_d8 (cbuf, disp); // Displacement // R/M byte } else { emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte emit_d32(cbuf, disp); // Displacement // R/M byte }} // rRegI ereg, memory mem) %{ // emit_reg_memvoid encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { // There is no index & no scale, use form without SIB byte if ((index == 0x4) && (scale == 0) && (base != ESP_enc)) { // If no displacement, mode is 0x0; unless base is [EBP] if ( (displace == 0) && (base != EBP_enc) ) { emit_rm(cbuf, 0x0, reg_encoding, base); } else { // If 8-bit displacement, mode 0x1 if ((displace >= -128) && (displace <= 127) && (disp_reloc == relocInfo::none) ) { emit_rm(cbuf, 0x1, reg_encoding, base); emit_d8(cbuf, displace); } else { // If 32-bit displacement if (base == -1) { // Special flag for absolute address emit_rm(cbuf, 0x0, reg_encoding, 0x5); // (manual lies; no SIB needed here) if ( disp_reloc != relocInfo::none ) { emit_d32_reloc(cbuf, displace, disp_reloc, 1); } else { emit_d32 (cbuf, displace); } } else { // Normal base + offset emit_rm(cbuf, 0x2, reg_encoding, base); if ( disp_reloc != relocInfo::none ) { emit_d32_reloc(cbuf, displace, disp_reloc, 1); } else { emit_d32 (cbuf, displace); } } } } } else { // Else, encode with the SIB byte // If no displacement, mode is 0x0; unless base is [EBP] if (displace == 0 && (base != EBP_enc)) { // If no displacement emit_rm(cbuf, 0x0, reg_encoding, 0x4); emit_rm(cbuf, scale, index, base); } else { // If 8-bit displacement, mode 0x1 if ((displace >= -128) && (displace <= 127) && (disp_reloc == relocInfo::none) ) { emit_rm(cbuf, 0x1, reg_encoding, 0x4); emit_rm(cbuf, scale, index, base); emit_d8(cbuf, displace); } else { // If 32-bit displacement if (base == 0x04 ) { emit_rm(cbuf, 0x2, reg_encoding, 0x4); emit_rm(cbuf, scale, index, 0x04); } else { emit_rm(cbuf, 0x2, reg_encoding, 0x4); emit_rm(cbuf, scale, index, base); } if ( disp_reloc != relocInfo::none ) { emit_d32_reloc(cbuf, displace, disp_reloc, 1); } else { emit_d32 (cbuf, displace); } } } }}void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { if( dst_encoding == src_encoding ) { // reg-reg copy, use an empty encoding } else { emit_opcode( cbuf, 0x8B ); emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); }}void emit_cmpfp_fixup(MacroAssembler& _masm) { Label exit; __ jccb(Assembler::noParity, exit); __ pushf(); // // comiss/ucomiss instructions set ZF,PF,CF flags and // zero OF,AF,SF for NaN values. // Fixup flags by zeroing ZF,PF so that compare of NaN // values returns 'less than' result (CF is set). // Leave the rest of flags unchanged. // // 7 6 5 4 3 2 1 0 // |S|Z|r|A|r|P|r|C| (r - reserved bit) // 0 0 1 0 1 0 1 1 (0x2B) // __ andl(Address(rsp, 0), 0xffffff2b); __ popf(); __ bind(exit);}void emit_cmpfp3(MacroAssembler& _masm, Register dst) { Label done; __ movl(dst, -1); __ jcc(Assembler::parity, done); __ jcc(Assembler::below, done); __ setb(Assembler::notEqual, dst); __ movzbl(dst, dst); __ bind(done);}//=============================================================================const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;int Compile::ConstantTable::calculate_table_base_offset() const { return 0; // absolute addressing, no offset}void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { // Empty encoding}uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { return 0;}#ifndef PRODUCTvoid MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { st->print("# MachConstantBaseNode (empty encoding)");}#endif//=============================================================================#ifndef PRODUCTvoid MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { Compile* C = ra_->C; int framesize = C->frame_slots() << LogBytesPerInt; assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove wordSize for return addr which is already pushed. framesize -= wordSize; if (C->need_stack_bang(framesize)) { framesize -= wordSize; st->print("# stack bang"); st->print("\n\t"); st->print("PUSH EBP\t# Save EBP"); if (framesize) { st->print("\n\t"); st->print("SUB ESP, #%d\t# Create frame",framesize); } } else { st->print("SUB ESP, #%d\t# Create frame",framesize); st->print("\n\t"); framesize -= wordSize; st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); } if (VerifyStackAtCalls) { st->print("\n\t"); framesize -= wordSize; st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); } if( C->in_24_bit_fp_mode() ) { st->print("\n\t"); st->print("FLDCW \t# load 24 bit fpu control word"); } if (UseSSE >= 2 && VerifyFPU) { st->print("\n\t"); st->print("# verify FPU stack (must be clean on entry)"); }#ifdef ASSERT if (VerifyStackAtCalls) { st->print("\n\t"); st->print("# stack alignment check"); }#endif st->cr();}#endifvoid MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { Compile* C = ra_->C; MacroAssembler _masm(&cbuf); int framesize = C->frame_slots() << LogBytesPerInt; __ verified_entry(framesize, C->need_stack_bang(framesize), C->in_24_bit_fp_mode()); C->set_frame_complete(cbuf.insts_size()); if (C->has_mach_constant_base_node()) { // NOTE: We set the table base offset here because users might be // emitted before MachConstantBaseNode. Compile::ConstantTable& constant_table = C->constant_table(); constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); }}uint MachPrologNode::size(PhaseRegAlloc *ra_) const { return MachNode::size(ra_); // too many variables; just compute it the hard way}int MachPrologNode::reloc() const { return 0; // a large enough number}//=============================================================================#ifndef PRODUCTvoid MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { Compile *C = ra_->C; int framesize = C->frame_slots() << LogBytesPerInt; assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; if (C->max_vector_size() > 16) { st->print("VZEROUPPER"); st->cr(); st->print("\t"); } if (C->in_24_bit_fp_mode()) { st->print("FLDCW standard control word"); st->cr(); st->print("\t"); } if (framesize) { st->print("ADD ESP,%d\t# Destroy frame",framesize); st->cr(); st->print("\t"); } st->print_cr("POPL EBP"); st->print("\t"); if (do_polling() && C->is_method_compilation()) { st->print("TEST PollPage,EAX\t! Poll Safepoint"); st->cr(); st->print("\t"); }}#endifvoid MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { Compile *C = ra_->C; if (C->max_vector_size() > 16) { // Clear upper bits of YMM registers when current compiled code uses // wide vectors to avoid AVX <-> SSE transition penalty during call. MacroAssembler masm(&cbuf); masm.vzeroupper(); } // If method set FPU control word, restore to standard control word if (C->in_24_bit_fp_mode()) { MacroAssembler masm(&cbuf); masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); } int framesize = C->frame_slots() << LogBytesPerInt; assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here if (framesize >= 128) { emit_opcode(cbuf, 0x81); // add SP, #framesize emit_rm(cbuf, 0x3, 0x00, ESP_enc); emit_d32(cbuf, framesize); } else if (framesize) { emit_opcode(cbuf, 0x83); // add SP, #framesize emit_rm(cbuf, 0x3, 0x00, ESP_enc); emit_d8(cbuf, framesize); } emit_opcode(cbuf, 0x58 | EBP_enc); if (do_polling() && C->is_method_compilation()) { cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); emit_opcode(cbuf,0x85); emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX emit_d32(cbuf, (intptr_t)os::get_polling_page()); }}uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { Compile *C = ra_->C; // If method set FPU control word, restore to standard control word int size = C->in_24_bit_fp_mode() ? 6 : 0; if (C->max_vector_size() > 16) size += 3; // vzeroupper if (do_polling() && C->is_method_compilation()) size += 6; int framesize = C->frame_slots() << LogBytesPerInt; assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; size++; // popl rbp, if (framesize >= 128) { size += 6; } else { size += framesize ? 3 : 0; } return size;}int MachEpilogNode::reloc() const { return 0; // a large enough number}const Pipeline * MachEpilogNode::pipeline() const { return MachNode::pipeline_class();}int MachEpilogNode::safepoint_offset() const { return 0; }//=============================================================================enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };static enum RC rc_class( OptoReg::Name reg ) { if( !OptoReg::is_valid(reg) ) return rc_bad; if (OptoReg::is_stack(reg)) return rc_stack; VMReg r = OptoReg::as_VMReg(reg); if (r->is_Register()) return rc_int; if (r->is_FloatRegister()) { assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); return rc_float; } assert(r->is_XMMRegister(), "must be"); return rc_xmm;}static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, int opcode, const char *op_str, int size, outputStream* st ) { if( cbuf ) { emit_opcode (*cbuf, opcode ); encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);#ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); if( opcode == 0x8B || opcode == 0x89 ) { // MOV if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); } else { // FLD, FST, PUSH, POP st->print("%s [ESP + #%d]",op_str,offset); }#endif } int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); return size+3+offset_size;}// Helper for XMM registers. Extra opcode bits, limited syntax.static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { if (cbuf) { MacroAssembler _masm(cbuf); if (reg_lo+1 == reg_hi) { // double move? if (is_load) { __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); } else { __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); } } else { if (is_load) { __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); } else { __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); } }#ifndef PRODUCT } else if (!do_size) { if (size != 0) st->print("\n\t"); if (reg_lo+1 == reg_hi) { // double move? if (is_load) st->print("%s %s,[ESP + #%d]", UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", Matcher::regName[reg_lo], offset); else st->print("MOVSD [ESP + #%d],%s", offset, Matcher::regName[reg_lo]); } else { if (is_load) st->print("MOVSS %s,[ESP + #%d]", Matcher::regName[reg_lo], offset); else st->print("MOVSS [ESP + #%d],%s", offset, Matcher::regName[reg_lo]); }#endif } int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. return size+5+offset_size;}static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, int size, outputStream* st ) { if (cbuf) { MacroAssembler _masm(cbuf); if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); } else { __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); }#ifndef PRODUCT } else if (!do_size) { if (size != 0) st->print("\n\t"); if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } else { st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } } else { if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } else { st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } }#endif } // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. // Only MOVAPS SSE prefix uses 1 byte. int sz = 4; if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; return size + sz;}static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, int size, outputStream* st ) { // 32-bit if (cbuf) { MacroAssembler _masm(cbuf); __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]));#ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);#endif } return 4;}static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, int size, outputStream* st ) { // 32-bit if (cbuf) { MacroAssembler _masm(cbuf); __ movdl(as_Register(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));#ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);#endif } return 4;}static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { if( cbuf ) { emit_opcode(*cbuf, 0x8B ); emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );#ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);#endif } return size+2;}static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, int offset, int size, outputStream* st ) { if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there if( cbuf ) { emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );#ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); st->print("FLD %s",Matcher::regName[src_lo]);#endif } size += 2; } int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; const char *op_str; int op; if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; op = 0xDD; } else { // 32-bit store op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; op = 0xD9; assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); } return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);}// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, uint ireg, outputStream* st);static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, int stack_offset, int reg, uint ireg, outputStream* st);static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, int dst_offset, uint ireg, outputStream* st) { int calc_size = 0; int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); switch (ireg) { case Op_VecS: calc_size = 3+src_offset_size + 3+dst_offset_size; break; case Op_VecD: calc_size = 3+src_offset_size + 3+dst_offset_size; src_offset += 4; dst_offset += 4; src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); calc_size += 3+src_offset_size + 3+dst_offset_size; break; case Op_VecX: calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; break; case Op_VecY: calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; break; default: ShouldNotReachHere(); } if (cbuf) { MacroAssembler _masm(cbuf); int offset = __ offset(); switch (ireg) { case Op_VecS: __ pushl(Address(rsp, src_offset)); __ popl (Address(rsp, dst_offset)); break; case Op_VecD: __ pushl(Address(rsp, src_offset)); __ popl (Address(rsp, dst_offset)); __ pushl(Address(rsp, src_offset+4)); __ popl (Address(rsp, dst_offset+4)); break; case Op_VecX: __ movdqu(Address(rsp, -16), xmm0); __ movdqu(xmm0, Address(rsp, src_offset)); __ movdqu(Address(rsp, dst_offset), xmm0); __ movdqu(xmm0, Address(rsp, -16)); break; case Op_VecY: __ vmovdqu(Address(rsp, -32), xmm0); __ vmovdqu(xmm0, Address(rsp, src_offset)); __ vmovdqu(Address(rsp, dst_offset), xmm0); __ vmovdqu(xmm0, Address(rsp, -32)); break; default: ShouldNotReachHere(); } int size = __ offset() - offset; assert(size == calc_size, "incorrect size calculattion"); return size;#ifndef PRODUCT } else if (!do_size) { switch (ireg) { case Op_VecS: st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" "popl [rsp + #%d]", src_offset, dst_offset); break; case Op_VecD: st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" "popq [rsp + #%d]\n\t" "pushl [rsp + #%d]\n\t" "popq [rsp + #%d]", src_offset, dst_offset, src_offset+4, dst_offset+4); break; case Op_VecX: st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" "movdqu xmm0, [rsp + #%d]\n\t" "movdqu [rsp + #%d], xmm0\n\t" "movdqu xmm0, [rsp - #16]", src_offset, dst_offset); break; case Op_VecY: st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" "vmovdqu xmm0, [rsp + #%d]\n\t" "vmovdqu [rsp + #%d], xmm0\n\t" "vmovdqu xmm0, [rsp - #32]", src_offset, dst_offset); break; default: ShouldNotReachHere(); }#endif } return calc_size;}uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { // Get registers to move OptoReg::Name src_second = ra_->get_reg_second(in(1)); OptoReg::Name src_first = ra_->get_reg_first(in(1)); OptoReg::Name dst_second = ra_->get_reg_second(this ); OptoReg::Name dst_first = ra_->get_reg_first(this ); enum RC src_second_rc = rc_class(src_second); enum RC src_first_rc = rc_class(src_first); enum RC dst_second_rc = rc_class(dst_second); enum RC dst_first_rc = rc_class(dst_first); assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); // Generate spill code! int size = 0; if( src_first == dst_first && src_second == dst_second ) return size; // Self copy, no move if (bottom_type()->isa_vect() != NULL) { uint ireg = ideal_reg(); assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity"); if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { // mem -> mem int src_offset = ra_->reg2offset(src_first); int dst_offset = ra_->reg2offset(dst_first); return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { int stack_offset = ra_->reg2offset(dst_first); return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { int stack_offset = ra_->reg2offset(src_first); return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); } else { ShouldNotReachHere(); } } // -------------------------------------- // Check for mem-mem move. push/pop to move. if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { if( src_second == dst_first ) { // overlapping stack copy ranges assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits } // move low bits size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); } return size; } // -------------------------------------- // Check for integer reg-reg copy if( src_first_rc == rc_int && dst_first_rc == rc_int ) size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); // Check for integer store if( src_first_rc == rc_int && dst_first_rc == rc_stack ) size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); // Check for integer load if( dst_first_rc == rc_int && src_first_rc == rc_stack ) size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); // Check for integer reg-xmm reg copy if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), "no 64 bit integer-float reg moves" ); return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); } // -------------------------------------- // Check for float reg-reg copy if( src_first_rc == rc_float && dst_first_rc == rc_float ) { assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); if( cbuf ) { // Note the mucking with the register encode to compensate for the 0/1 // indexing issue mentioned in a comment in the reg_def sections // for FPR registers many lines above here. if( src_first != FPR1L_num ) { emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); } else { emit_opcode (*cbuf, 0xDD ); // FST ST(i) emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); }#ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); else st->print( "FST %s", Matcher::regName[dst_first]);#endif } return size + ((src_first != FPR1L_num) ? 2+2 : 2); } // Check for float store if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); } // Check for float load if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { int offset = ra_->reg2offset(src_first); const char *op_str; int op; if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? op_str = "FLD_D"; op = 0xDD; } else { // 32-bit load op_str = "FLD_S"; op = 0xD9; assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); } if( cbuf ) { emit_opcode (*cbuf, op ); encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );#ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);#endif } int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); return size + 3+offset_size+2; } // Check for xmm reg-reg copy if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); } // Check for xmm reg-integer reg copy if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), "no 64 bit float-integer reg moves" ); return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); } // Check for xmm store if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); } // Check for float xmm load if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); } // Copy from float reg to xmm reg if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { // copy to the top of stack from floating point reg // and use LEA to preserve flags if( cbuf ) { emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] emit_rm(*cbuf, 0x1, ESP_enc, 0x04); emit_rm(*cbuf, 0x0, 0x04, ESP_enc); emit_d8(*cbuf,0xF8);#ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); st->print("LEA ESP,[ESP-8]");#endif } size += 4; size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); // Copy from the temp memory to the xmm reg. size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); if( cbuf ) { emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] emit_rm(*cbuf, 0x1, ESP_enc, 0x04); emit_rm(*cbuf, 0x0, 0x04, ESP_enc); emit_d8(*cbuf,0x08);#ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); st->print("LEA ESP,[ESP+8]");#endif } size += 4; return size; } assert( size > 0, "missed a case" ); // -------------------------------------------------------------------- // Check for second bits still needing moving. if( src_second == dst_second ) return size; // Self copy; no move assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); // Check for second word int-int move if( src_second_rc == rc_int && dst_second_rc == rc_int ) return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); // Check for second word integer store if( src_second_rc == rc_int && dst_second_rc == rc_stack ) return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); // Check for second word integer load if( dst_second_rc == rc_int && src_second_rc == rc_stack ) return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); Unimplemented();}#ifndef PRODUCTvoid MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { implementation( NULL, ra_, false, st );}#endifvoid MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { implementation( &cbuf, ra_, false, NULL );}uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { return implementation( NULL, ra_, true, NULL );}//=============================================================================#ifndef PRODUCTvoid BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); int reg = ra_->get_reg_first(this); st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);}#endifvoid BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); int reg = ra_->get_encode(this); if( offset >= 128 ) { emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] emit_rm(cbuf, 0x2, reg, 0x04); emit_rm(cbuf, 0x0, 0x04, ESP_enc); emit_d32(cbuf, offset); } else { emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] emit_rm(cbuf, 0x1, reg, 0x04); emit_rm(cbuf, 0x0, 0x04, ESP_enc); emit_d8(cbuf, offset); }}uint BoxLockNode::size(PhaseRegAlloc *ra_) const { int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); if( offset >= 128 ) { return 7; } else { return 4; }}//=============================================================================#ifndef PRODUCTvoid MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); st->print_cr("\tNOP"); st->print_cr("\tNOP"); if( !OptoBreakpoint ) st->print_cr("\tNOP");}#endifvoid MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { MacroAssembler masm(&cbuf);#ifdef ASSERT uint insts_size = cbuf.insts_size();#endif masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub())); /* WARNING these NOPs are critical so that verified entry point is properly aligned for patching by NativeJump::patch_verified_entry() */ int nops_cnt = 2; if( !OptoBreakpoint ) // Leave space for int3 nops_cnt += 1; masm.nop(nops_cnt); assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");}uint MachUEPNode::size(PhaseRegAlloc *ra_) const { return OptoBreakpoint ? 11 : 12;}//=============================================================================uint size_exception_handler() { // NativeCall instruction size is the same as NativeJump. // exception handler starts out as jump and can be patched to // a call be deoptimization. (4932387) // Note that this value is also credited (in output.cpp) to // the size of the code section. return NativeJump::instruction_size;}// Emit exception handler code. Stuff framesize into a register// and call a VM stub routine.int emit_exception_handler(CodeBuffer& cbuf) { // Note that the code buffer's insts_mark is always relative to insts. // That's why we must use the macroassembler to generate a handler. MacroAssembler _masm(&cbuf); address base = __ start_a_stub(size_exception_handler()); if (base == NULL) return 0; // CodeBuffer::expand failed int offset = __ offset(); __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); __ end_a_stub(); return offset;}uint size_deopt_handler() { // NativeCall instruction size is the same as NativeJump. // exception handler starts out as jump and can be patched to // a call be deoptimization. (4932387) // Note that this value is also credited (in output.cpp) to // the size of the code section. return 5 + NativeJump::instruction_size; // pushl(); jmp;}// Emit deopt handler code.int emit_deopt_handler(CodeBuffer& cbuf) { // Note that the code buffer's insts_mark is always relative to insts. // That's why we must use the macroassembler to generate a handler. MacroAssembler _masm(&cbuf); address base = __ start_a_stub(size_exception_handler()); if (base == NULL) return 0; // CodeBuffer::expand failed int offset = __ offset(); InternalAddress here(__ pc()); __ pushptr(here.addr()); __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); __ end_a_stub(); return offset;}int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk}// This is UltraSparc specific, true just means we have fast l2f conversionconst bool Matcher::convL2FSupported(void) { return true;}// Is this branch offset short enough that a short branch can be used?//// NOTE: If the platform does not provide any short branch variants, then// this method should return false for offset 0.bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { // The passed offset is relative to address of the branch. // On 86 a branch displacement is calculated relative to address // of a next instruction. offset -= br_size; // the short version of jmpConUCF2 contains multiple branches, // making the reach slightly less if (rule == jmpConUCF2_rule) return (-126 <= offset && offset <= 125); return (-128 <= offset && offset <= 127);}const bool Matcher::isSimpleConstant64(jlong value) { // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. return false;}// The ecx parameter to rep stos for the ClearArray node is in dwords.const bool Matcher::init_array_count_is_in_bytes = false;// Threshold size for cleararray.const int Matcher::init_array_short_size = 8 * BytesPerLong;// Needs 2 CMOV's for longs.const int Matcher::long_cmove_cost() { return 1; }// No CMOVF/CMOVD with SSE/SSE2const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }// Should the Matcher clone shifts on addressing modes, expecting them to// be subsumed into complex addressing expressions or compute them into// registers? True for Intel but false for most RISCsconst bool Matcher::clone_shift_expressions = true;// Do we need to mask the count passed to shift instructions or does// the cpu only look at the lower 5/6 bits anyway?const bool Matcher::need_masked_shift_count = false;bool Matcher::narrow_oop_use_complex_address() { ShouldNotCallThis(); return true;}bool Matcher::narrow_klass_use_complex_address() { ShouldNotCallThis(); return true;}// Is it better to copy float constants, or load them directly from memory?// Intel can load a float constant from a direct address, requiring no// extra registers. Most RISCs will have to materialize an address into a// register first, so they would do better to copy the constant from stack.const bool Matcher::rematerialize_float_constants = true;// If CPU can load and store mis-aligned doubles directly then no fixup is// needed. Else we split the double into 2 integer pieces and move it// piece-by-piece. Only happens when passing doubles into C code as the// Java calling convention forces doubles to be aligned.const bool Matcher::misaligned_doubles_ok = true;void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { // Get the memory operand from the node uint numopnds = node->num_opnds(); // Virtual call for number of operands uint skipped = node->oper_input_base(); // Sum of leaves skipped so far assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); uint opcnt = 1; // First operand uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand while( idx >= skipped+num_edges ) { skipped += num_edges; opcnt++; // Bump operand count assert( opcnt < numopnds, "Accessing non-existent operand" ); num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand } MachOper *memory = node->_opnds[opcnt]; MachOper *new_memory = NULL; switch (memory->opcode()) { case DIRECT: case INDOFFSET32X: // No transformation necessary. return; case INDIRECT: new_memory = new (C) indirect_win95_safeOper( ); break; case INDOFFSET8: new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); break; case INDOFFSET32: new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); break; case INDINDEXOFFSET: new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); break; case INDINDEXSCALE: new_memory = new (C) indIndexScale_win95_safeOper(memory->scale()); break; case INDINDEXSCALEOFFSET: new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); break; case LOAD_LONG_INDIRECT: case LOAD_LONG_INDOFFSET32: // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} return; default: assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); return; } node->_opnds[opcnt] = new_memory;}// Advertise here if the CPU requires explicit rounding operations// to implement the UseStrictFP mode.const bool Matcher::strict_fp_requires_explicit_rounding = true;// Are floats conerted to double when stored to stack during deoptimization?// On x32 it is stored with convertion only when FPU is used for floats.bool Matcher::float_in_double() { return (UseSSE == 0); }// Do ints take an entire long register or just half?const bool Matcher::int_in_long = false;// Return whether or not this register is ever used as an argument. This// function is used on startup to build the trampoline stubs in generateOptoStub.// Registers not mentioned will be killed by the VM call in the trampoline, and// arguments in those registers not be available to the callee.bool Matcher::can_be_java_arg( int reg ) { if( reg == ECX_num || reg == EDX_num ) return true; if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; return false;}bool Matcher::is_spillable_arg( int reg ) { return can_be_java_arg(reg);}bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { // Use hardware integer DIV instruction when // it is faster than a code which use multiply. // Only when constant divisor fits into 32 bit // (min_jint is excluded to get only correct // positive 32 bit values from negative). return VM_Version::has_fast_idiv() && (divisor == (int)divisor && divisor != min_jint);}// Register for DIVI projection of divmodIRegMask Matcher::divI_proj_mask() { return EAX_REG_mask();}// Register for MODI projection of divmodIRegMask Matcher::modI_proj_mask() { return EDX_REG_mask();}// Register for DIVL projection of divmodLRegMask Matcher::divL_proj_mask() { ShouldNotReachHere(); return RegMask();}// Register for MODL projection of divmodLRegMask Matcher::modL_proj_mask() { ShouldNotReachHere(); return RegMask();}const RegMask Matcher::method_handle_invoke_SP_save_mask() { return EBP_REG_mask();}// Returns true if the high 32 bits of the value is known to be zero.bool is_operand_hi32_zero(Node* n) { int opc = n->Opcode(); if (opc == Op_AndL) { Node* o2 = n->in(2); if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { return true; } } if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { return true; } return false;}%}//----------ENCODING BLOCK-----------------------------------------------------// This block specifies the encoding classes used by the compiler to output// byte streams. Encoding classes generate functions which are called by// Machine Instruction Nodes in order to generate the bit encoding of the// instruction. Operands specify their base encoding interface with the// interface keyword. There are currently supported four interfaces,// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an// operand to generate a function which returns its register number when// queried. CONST_INTER causes an operand to generate a function which// returns the value of the constant when queried. MEMORY_INTER causes an// operand to generate four functions which return the Base Register, the// Index Register, the Scale Value, and the Offset Value of the operand when// queried. COND_INTER causes an operand to generate six functions which// return the encoding code (ie - encoding bits for the instruction)// associated with each basic boolean condition for a conditional instruction.// Instructions specify two basic values for encoding. They use the// ins_encode keyword to specify their encoding class (which must be one of// the class names specified in the encoding block), and they use the// opcode keyword to specify, in order, their primary, secondary, and// tertiary opcode. Only the opcode sections which a particular instruction// needs for encoding need to be specified.encode %{ // Build emit functions for each basic byte or larger field in the intel // encoding scheme (opcode, rm, sib, immediate), and call them from C++ // code in the enc_class source block. Emit functions will live in the // main source block for now. In future, we can generalize this by // adding a syntax that specifies the sizes of fields in an order, // so that the adlc can build the emit functions automagically // Emit primary opcode enc_class OpcP %{ emit_opcode(cbuf, $primary); %} // Emit secondary opcode enc_class OpcS %{ emit_opcode(cbuf, $secondary); %} // Emit opcode directly enc_class Opcode(immI d8) %{ emit_opcode(cbuf, $d8$$constant); %} enc_class SizePrefix %{ emit_opcode(cbuf,0x66); %} enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) emit_opcode(cbuf,$opcode$$constant); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class mov_r32_imm0( rRegI dst ) %{ emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 %} enc_class cdq_enc %{ // Full implementation of Java idiv and irem; checks for // special case as described in JVM spec., p.243 & p.271. // // normal case special case // // input : rax,: dividend min_int // reg: divisor -1 // // output: rax,: quotient (= rax, idiv reg) min_int // rdx: remainder (= rax, irem reg) 0 // // Code sequnce: // // 81 F8 00 00 00 80 cmp rax,80000000h // 0F 85 0B 00 00 00 jne normal_case // 33 D2 xor rdx,edx // 83 F9 FF cmp rcx,0FFh // 0F 84 03 00 00 00 je done // normal_case: // 99 cdq // F7 F9 idiv rax,ecx // done: // emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done // normal_case: emit_opcode(cbuf,0x99); // cdq // idiv (note: must be emitted by the user of this rule) // normal: %} // Dense encoding for older common ops enc_class Opc_plus(immI opcode, rRegI reg) %{ emit_opcode(cbuf, $opcode$$constant + $reg$$reg); %} // Opcde enc_class for 8/32 bit immediate instructions with sign-extension enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit // Check for 8-bit immediate, and set sign extend bit in opcode if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { emit_opcode(cbuf, $primary | 0x02); } else { // If 32-bit immediate emit_opcode(cbuf, $primary); } %} enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m // Emit primary opcode and set sign-extend bit // Check for 8-bit immediate, and set sign extend bit in opcode if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { emit_opcode(cbuf, $primary | 0x02); } else { // If 32-bit immediate emit_opcode(cbuf, $primary); } // Emit r/m byte with secondary opcode, after primary opcode. emit_rm(cbuf, 0x3, $secondary, $dst$$reg); %} enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits // Check for 8-bit immediate, and set sign extend bit in opcode if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { $$$emit8$imm$$constant; } else { // If 32-bit immediate // Output immediate $$$emit32$imm$$constant; } %} enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ // Emit primary opcode and set sign-extend bit // Check for 8-bit immediate, and set sign extend bit in opcode int con = (int)$imm$$constant; // Throw away top bits emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); // Emit r/m byte with secondary opcode, after primary opcode. emit_rm(cbuf, 0x3, $secondary, $dst$$reg); if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); else emit_d32(cbuf,con); %} enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ // Emit primary opcode and set sign-extend bit // Check for 8-bit immediate, and set sign extend bit in opcode int con = (int)($imm$$constant >> 32); // Throw away bottom bits emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); // Emit r/m byte with tertiary opcode, after primary opcode. emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); else emit_d32(cbuf,con); %} enc_class OpcSReg (rRegI dst) %{ // BSWAP emit_cc(cbuf, $secondary, $dst$$reg ); %} enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP int destlo = $dst$$reg; int desthi = HIGH_FROM_LOW(destlo); // bswap lo emit_opcode(cbuf, 0x0F); emit_cc(cbuf, 0xC8, destlo); // bswap hi emit_opcode(cbuf, 0x0F); emit_cc(cbuf, 0xC8, desthi); // xchg lo and hi emit_opcode(cbuf, 0x87); emit_rm(cbuf, 0x3, destlo, desthi); %} enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... emit_rm(cbuf, 0x3, $secondary, $div$$reg ); %} enc_class enc_cmov(cmpOp cop ) %{ // CMOV $$$emit8$primary; emit_cc(cbuf, $secondary, $cop$$cmpcode); %} enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); emit_d8(cbuf, op >> 8 ); emit_d8(cbuf, op & 255); %} // emulate a CMOV with a conditional branch around a MOV enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV // Invert sense of branch from sense of CMOV emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); emit_d8( cbuf, $brOffs$$constant ); %} enc_class enc_PartialSubtypeCheck( ) %{ Register Redi = as_Register(EDI_enc); // result register Register Reax = as_Register(EAX_enc); // super class Register Recx = as_Register(ECX_enc); // killed Register Resi = as_Register(ESI_enc); // sub class Label miss; MacroAssembler _masm(&cbuf); __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, NULL, &miss, /*set_cond_codes:*/ true); if ($primary) { __ xorptr(Redi, Redi); } __ bind(miss); %} enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All MacroAssembler masm(&cbuf); int start = masm.offset(); if (UseSSE >= 2) { if (VerifyFPU) { masm.verify_FPU(0, "must be empty in SSE2+ mode"); } } else { // External c_calling_convention expects the FPU stack to be 'clean'. // Compiled code leaves it dirty. Do cleanup now. masm.empty_FPU_stack(); } if (sizeof_FFree_Float_Stack_All == -1) { sizeof_FFree_Float_Stack_All = masm.offset() - start; } else { assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); } %} enc_class Verify_FPU_For_Leaf %{ if( VerifyFPU ) { MacroAssembler masm(&cbuf); masm.verify_FPU( -3, "Returning from Runtime Leaf call"); } %} enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf // This is the instruction starting address for relocation info. cbuf.set_insts_mark(); $$$emit8$primary; // CALL directly to the runtime emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), runtime_call_Relocation::spec(), RELOC_IMM32 ); if (UseSSE >= 2) { MacroAssembler _masm(&cbuf); BasicType rt = tf()->return_type(); if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { // A C runtime call where the return value is unused. In SSE2+ // mode the result needs to be removed from the FPU stack. It's // likely that this function call could be removed by the // optimizer if the C function is a pure function. __ ffree(0); } else if (rt == T_FLOAT) { __ lea(rsp, Address(rsp, -4)); __ fstp_s(Address(rsp, 0)); __ movflt(xmm0, Address(rsp, 0)); __ lea(rsp, Address(rsp, 4)); } else if (rt == T_DOUBLE) { __ lea(rsp, Address(rsp, -8)); __ fstp_d(Address(rsp, 0)); __ movdbl(xmm0, Address(rsp, 0)); __ lea(rsp, Address(rsp, 8)); } } %} enc_class pre_call_resets %{ // If method sets FPU control word restore it here debug_only(int off0 = cbuf.insts_size()); if (ra_->C->in_24_bit_fp_mode()) { MacroAssembler _masm(&cbuf); __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); } if (ra_->C->max_vector_size() > 16) { // Clear upper bits of YMM registers when current compiled code uses // wide vectors to avoid AVX <-> SSE transition penalty during call. MacroAssembler _masm(&cbuf); __ vzeroupper(); } debug_only(int off1 = cbuf.insts_size()); assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); %} enc_class post_call_FPU %{ // If method sets FPU control word do it here also if (Compile::current()->in_24_bit_fp_mode()) { MacroAssembler masm(&cbuf); masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); } %} enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine // who we intended to call. cbuf.set_insts_mark(); $$$emit8$primary; if (!_method) { emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), runtime_call_Relocation::spec(), RELOC_IMM32 ); } else if (_optimized_virtual) { emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); } else { emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), static_call_Relocation::spec(), RELOC_IMM32 ); } if (_method) { // Emit stub for static call. CompiledStaticCall::emit_to_interp_stub(cbuf); } %} enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL MacroAssembler _masm(&cbuf); __ ic_call((address)$meth$$method); %} enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL int disp = in_bytes(Method::from_compiled_offset()); assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] cbuf.set_insts_mark(); $$$emit8$primary; emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte emit_d8(cbuf, disp); // Displacement %}// Following encoding is no longer used, but may be restored if calling// convention changes significantly.// Became: Xor_Reg(EBP), Java_To_Runtime( labl )//// enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL// // int ic_reg = Matcher::inline_cache_reg();// // int ic_encode = Matcher::_regEncode[ic_reg];// // int imo_reg = Matcher::interpreter_method_oop_reg();// // int imo_encode = Matcher::_regEncode[imo_reg];//// // // Interpreter expects method_oop in EBX, currently a callee-saved register,// // // so we load it immediately before the call// // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop// // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte//// // xor rbp,ebp// emit_opcode(cbuf, 0x33);// emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);//// // CALL to interpreter.// cbuf.set_insts_mark();// $$$emit8$primary;// emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),// runtime_call_Relocation::spec(), RELOC_IMM32 );// %} enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR $$$emit8$primary; emit_rm(cbuf, 0x3, $secondary, $dst$$reg); $$$emit8$shift$$constant; %} enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates emit_opcode(cbuf, 0xB8 + $dst$$reg); $$$emit32$src$$constant; %} enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates emit_opcode(cbuf, $primary + $dst$$reg); $$$emit32$src$$constant; %} enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates int dst_enc = $dst$$reg; int src_con = $src$$constant & 0x0FFFFFFFFL; if (src_con == 0) { // xor dst, dst emit_opcode(cbuf, 0x33); emit_rm(cbuf, 0x3, dst_enc, dst_enc); } else { emit_opcode(cbuf, $primary + dst_enc); emit_d32(cbuf, src_con); } %} enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates int dst_enc = $dst$$reg + 2; int src_con = ((julong)($src$$constant)) >> 32; if (src_con == 0) { // xor dst, dst emit_opcode(cbuf, 0x33); emit_rm(cbuf, 0x3, dst_enc, dst_enc); } else { emit_opcode(cbuf, $primary + dst_enc); emit_d32(cbuf, src_con); } %} // Encode a reg-reg copy. If it is useless, then empty encoding. enc_class enc_Copy( rRegI dst, rRegI src ) %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); %} enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); %} enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) $$$emit8$primary; emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) $$$emit8$secondary; emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); %} enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); %} enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); %} enc_class Con32 (immI src) %{ // Con32(storeImmI) // Output immediate $$$emit32$src$$constant; %} enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm // Output Float immediate bits jfloat jf = $src$$constant; int jf_as_bits = jint_cast( jf ); emit_d32(cbuf, jf_as_bits); %} enc_class Con32F_as_bits(immF src) %{ // storeX_imm // Output Float immediate bits jfloat jf = $src$$constant; int jf_as_bits = jint_cast( jf ); emit_d32(cbuf, jf_as_bits); %} enc_class Con16 (immI src) %{ // Con16(storeImmI) // Output immediate $$$emit16$src$$constant; %} enc_class Con_d32(immI src) %{ emit_d32(cbuf,$src$$constant); %} enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) // Output immediate memory reference emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); emit_d32(cbuf, 0x00); %} enc_class lock_prefix( ) %{ if( os::is_MP() ) emit_opcode(cbuf,0xF0); // [Lock] %} // Cmp-xchg long value. // Note: we need to swap rbx, and rcx before and after the // cmpxchg8 instruction because the instruction uses // rcx as the high order word of the new value to store but // our register encoding uses rbx,. enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ // XCHG rbx,ecx emit_opcode(cbuf,0x87); emit_opcode(cbuf,0xD9); // [Lock] if( os::is_MP() ) emit_opcode(cbuf,0xF0); // CMPXCHG8 [Eptr] emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0xC7); emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); // XCHG rbx,ecx emit_opcode(cbuf,0x87); emit_opcode(cbuf,0xD9); %} enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ // [Lock] if( os::is_MP() ) emit_opcode(cbuf,0xF0); // CMPXCHG [Eptr] emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0xB1); emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); %} enc_class enc_flags_ne_to_boolean( iRegI res ) %{ int res_encoding = $res$$reg; // MOV res,0 emit_opcode( cbuf, 0xB8 + res_encoding); emit_d32( cbuf, 0 ); // JNE,s fail emit_opcode(cbuf,0x75); emit_d8(cbuf, 5 ); // MOV res,1 emit_opcode( cbuf, 0xB8 + res_encoding); emit_d32( cbuf, 1 ); // fail: %} enc_class set_instruction_start( ) %{ cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand %} enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem int reg_encoding = $ereg$$reg; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; relocInfo::relocType disp_reloc = $mem->disp_reloc(); encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); %} enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp + 4; // Offset is 4 further in memory assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); %} enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ int r1, r2; if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } emit_opcode(cbuf,0x0F); emit_opcode(cbuf,$tertiary); emit_rm(cbuf, 0x3, r1, r2); emit_d8(cbuf,$cnt$$constant); emit_d8(cbuf,$primary); emit_rm(cbuf, 0x3, $secondary, r1); emit_d8(cbuf,$cnt$$constant); %} enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ emit_opcode( cbuf, 0x8B ); // Move emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); if( $cnt$$constant > 32 ) { // Shift, if not by zero emit_d8(cbuf,$primary); emit_rm(cbuf, 0x3, $secondary, $dst$$reg); emit_d8(cbuf,$cnt$$constant-32); } emit_d8(cbuf,$primary); emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); emit_d8(cbuf,31); %} enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ int r1, r2; if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } emit_opcode( cbuf, 0x8B ); // Move r1,r2 emit_rm(cbuf, 0x3, r1, r2); if( $cnt$$constant > 32 ) { // Shift, if not by zero emit_opcode(cbuf,$primary); emit_rm(cbuf, 0x3, $secondary, r1); emit_d8(cbuf,$cnt$$constant-32); } emit_opcode(cbuf,0x33); // XOR r2,r2 emit_rm(cbuf, 0x3, r2, r2); %} // Clone of RegMem but accepts an extra parameter to access each // half of a double in memory; it never needs relocation info. enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ emit_opcode(cbuf,$opcode$$constant); int reg_encoding = $rm_reg$$reg; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp + $disp_for_half$$constant; relocInfo::relocType disp_reloc = relocInfo::none; encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); %} // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! // // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant // and it never needs relocation information. // Frequently used to move data between FPU's Stack Top and memory. enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ int rm_byte_opcode = $rm_opcode$$constant; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); %} enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ int rm_byte_opcode = $rm_opcode$$constant; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); %} enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea int reg_encoding = $dst$$reg; int base = $src0$$reg; // 0xFFFFFFFF indicates no base int index = 0x04; // 0x04 indicates no index int scale = 0x00; // 0x00 indicates no scale int displace = $src1$$constant; // 0x00 indicates no displacement relocInfo::relocType disp_reloc = relocInfo::none; encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); %} enc_class min_enc (rRegI dst, rRegI src) %{ // MIN // Compare dst,src emit_opcode(cbuf,0x3B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); // jmp dst < src around move emit_opcode(cbuf,0x7C); emit_d8(cbuf,2); // move dst,src emit_opcode(cbuf,0x8B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class max_enc (rRegI dst, rRegI src) %{ // MAX // Compare dst,src emit_opcode(cbuf,0x3B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); // jmp dst > src around move emit_opcode(cbuf,0x7F); emit_d8(cbuf,2); // move dst,src emit_opcode(cbuf,0x8B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class enc_FPR_store(memory mem, regDPR src) %{ // If src is FPR1, we can just FST to store it. // Else we need to FLD it to FPR1, then FSTP to store/pop it. int reg_encoding = 0x2; // Just store int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals if( $src$$reg != FPR1L_enc ) { reg_encoding = 0x3; // Store & pop emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) emit_d8( cbuf, 0xC0-1+$src$$reg ); } cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand emit_opcode(cbuf,$primary); encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); %} enc_class neg_reg(rRegI dst) %{ // NEG $dst emit_opcode(cbuf,0xF7); emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); %} enc_class setLT_reg(eCXRegI dst) %{ // SETLT $dst emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x9C); emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); %} enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT int tmpReg = $tmp$$reg; // SUB $p,$q emit_opcode(cbuf,0x2B); emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); // SBB $tmp,$tmp emit_opcode(cbuf,0x1B); emit_rm(cbuf, 0x3, tmpReg, tmpReg); // AND $tmp,$y emit_opcode(cbuf,0x23); emit_rm(cbuf, 0x3, tmpReg, $y$$reg); // ADD $p,$tmp emit_opcode(cbuf,0x03); emit_rm(cbuf, 0x3, $p$$reg, tmpReg); %} enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ // TEST shift,32 emit_opcode(cbuf,0xF7); emit_rm(cbuf, 0x3, 0, ECX_enc); emit_d32(cbuf,0x20); // JEQ,s small emit_opcode(cbuf, 0x74); emit_d8(cbuf, 0x04); // MOV $dst.hi,$dst.lo emit_opcode( cbuf, 0x8B ); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); // CLR $dst.lo emit_opcode(cbuf, 0x33); emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);// small: // SHLD $dst.hi,$dst.lo,$shift emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0xA5); emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); // SHL $dst.lo,$shift" emit_opcode(cbuf,0xD3); emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); %} enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ // TEST shift,32 emit_opcode(cbuf,0xF7); emit_rm(cbuf, 0x3, 0, ECX_enc); emit_d32(cbuf,0x20); // JEQ,s small emit_opcode(cbuf, 0x74); emit_d8(cbuf, 0x04); // MOV $dst.lo,$dst.hi emit_opcode( cbuf, 0x8B ); emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); // CLR $dst.hi emit_opcode(cbuf, 0x33); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));// small: // SHRD $dst.lo,$dst.hi,$shift emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0xAD); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); // SHR $dst.hi,$shift" emit_opcode(cbuf,0xD3); emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); %} enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ // TEST shift,32 emit_opcode(cbuf,0xF7); emit_rm(cbuf, 0x3, 0, ECX_enc); emit_d32(cbuf,0x20); // JEQ,s small emit_opcode(cbuf, 0x74); emit_d8(cbuf, 0x05); // MOV $dst.lo,$dst.hi emit_opcode( cbuf, 0x8B ); emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); // SAR $dst.hi,31 emit_opcode(cbuf, 0xC1); emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); emit_d8(cbuf, 0x1F );// small: // SHRD $dst.lo,$dst.hi,$shift emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0xAD); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); // SAR $dst.hi,$shift" emit_opcode(cbuf,0xD3); emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); %} // ----------------- Encodings for floating point unit ----------------- // May leave result in FPU-TOS or FPU reg depending on opcodes enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV $$$emit8$primary; emit_rm(cbuf, 0x3, $secondary, $src$$reg ); %} // Pop argument in FPR0 with FSTP ST(0) enc_class PopFPU() %{ emit_opcode( cbuf, 0xDD ); emit_d8( cbuf, 0xD8 ); %} // !!!!! equivalent to Pop_Reg_F enc_class Pop_Reg_DPR( regDPR dst ) %{ emit_opcode( cbuf, 0xDD ); // FSTP ST(i) emit_d8( cbuf, 0xD8+$dst$$reg ); %} enc_class Push_Reg_DPR( regDPR dst ) %{ emit_opcode( cbuf, 0xD9 ); emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) %} enc_class strictfp_bias1( regDPR dst ) %{ emit_opcode( cbuf, 0xDB ); // FLD m80real emit_opcode( cbuf, 0x2D ); emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 emit_opcode( cbuf, 0xC8+$dst$$reg ); %} enc_class strictfp_bias2( regDPR dst ) %{ emit_opcode( cbuf, 0xDB ); // FLD m80real emit_opcode( cbuf, 0x2D ); emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 emit_opcode( cbuf, 0xC8+$dst$$reg ); %} // Special case for moving an integer register to a stack slot. enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); %} // Special case for moving a register to a stack slot. enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS // Opcode already emitted emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte emit_d32(cbuf, $dst$$disp); // Displacement %} // Push the integer in stackSlot 'src' onto FP-stack enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); %} // Push FPU's TOS float to a stack-slot, and pop FPU-stack enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); %} // Same as Pop_Mem_F except for opcode // Push FPU's TOS double to a stack-slot, and pop FPU-stack enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); %} enc_class Pop_Reg_FPR( regFPR dst ) %{ emit_opcode( cbuf, 0xDD ); // FSTP ST(i) emit_d8( cbuf, 0xD8+$dst$$reg ); %} enc_class Push_Reg_FPR( regFPR dst ) %{ emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) emit_d8( cbuf, 0xC0-1+$dst$$reg ); %} // Push FPU's float to a stack-slot, and pop FPU-stack enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ int pop = 0x02; if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) emit_d8( cbuf, 0xC0-1+$src$$reg ); pop = 0x03; } store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] %} // Push FPU's double to a stack-slot, and pop FPU-stack enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ int pop = 0x02; if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) emit_d8( cbuf, 0xC0-1+$src$$reg ); pop = 0x03; } store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] %} // Push FPU's double to a FPU-stack-slot, and pop FPU-stack enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ int pop = 0xD0 - 1; // -1 since we skip FLD if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) emit_d8( cbuf, 0xC0-1+$src$$reg ); pop = 0xD8; } emit_opcode( cbuf, 0xDD ); emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) %} enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ // load dst in FPR0 emit_opcode( cbuf, 0xD9 ); emit_d8( cbuf, 0xC0-1+$dst$$reg ); if ($src$$reg != FPR1L_enc) { // fincstp emit_opcode (cbuf, 0xD9); emit_opcode (cbuf, 0xF7); // swap src with FPR1: // FXCH FPR1 with src emit_opcode(cbuf, 0xD9); emit_d8(cbuf, 0xC8-1+$src$$reg ); // fdecstp emit_opcode (cbuf, 0xD9); emit_opcode (cbuf, 0xF6); } %} enc_class Push_ModD_encoding(regD src0, regD src1) %{ MacroAssembler _masm(&cbuf); __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src1$$XMMRegister); __ fld_d(Address(rsp, 0)); __ movdbl(Address(rsp, 0), $src0$$XMMRegister); __ fld_d(Address(rsp, 0)); %} enc_class Push_ModF_encoding(regF src0, regF src1) %{ MacroAssembler _masm(&cbuf); __ subptr(rsp, 4); __ movflt(Address(rsp, 0), $src1$$XMMRegister); __ fld_s(Address(rsp, 0)); __ movflt(Address(rsp, 0), $src0$$XMMRegister); __ fld_s(Address(rsp, 0)); %} enc_class Push_ResultD(regD dst) %{ MacroAssembler _masm(&cbuf); __ fstp_d(Address(rsp, 0)); __ movdbl($dst$$XMMRegister, Address(rsp, 0)); __ addptr(rsp, 8); %} enc_class Push_ResultF(regF dst, immI d8) %{ MacroAssembler _masm(&cbuf); __ fstp_s(Address(rsp, 0)); __ movflt($dst$$XMMRegister, Address(rsp, 0)); __ addptr(rsp, $d8$$constant); %} enc_class Push_SrcD(regD src) %{ MacroAssembler _masm(&cbuf); __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); %} enc_class push_stack_temp_qword() %{ MacroAssembler _masm(&cbuf); __ subptr(rsp, 8); %} enc_class pop_stack_temp_qword() %{ MacroAssembler _masm(&cbuf); __ addptr(rsp, 8); %} enc_class push_xmm_to_fpr1(regD src) %{ MacroAssembler _masm(&cbuf); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); %} enc_class Push_Result_Mod_DPR( regDPR src) %{ if ($src$$reg != FPR1L_enc) { // fincstp emit_opcode (cbuf, 0xD9); emit_opcode (cbuf, 0xF7); // FXCH FPR1 with src emit_opcode(cbuf, 0xD9); emit_d8(cbuf, 0xC8-1+$src$$reg ); // fdecstp emit_opcode (cbuf, 0xD9); emit_opcode (cbuf, 0xF6); } // // following asm replaced with Pop_Reg_F or Pop_Mem_F // // FSTP FPR$dst$$reg // emit_opcode( cbuf, 0xDD ); // emit_d8( cbuf, 0xD8+$dst$$reg ); %} enc_class fnstsw_sahf_skip_parity() %{ // fnstsw ax emit_opcode( cbuf, 0xDF ); emit_opcode( cbuf, 0xE0 ); // sahf emit_opcode( cbuf, 0x9E ); // jnp ::skip emit_opcode( cbuf, 0x7B ); emit_opcode( cbuf, 0x05 ); %} enc_class emitModDPR() %{ // fprem must be iterative // :: loop // fprem emit_opcode( cbuf, 0xD9 ); emit_opcode( cbuf, 0xF8 ); // wait emit_opcode( cbuf, 0x9b ); // fnstsw ax emit_opcode( cbuf, 0xDF ); emit_opcode( cbuf, 0xE0 ); // sahf emit_opcode( cbuf, 0x9E ); // jp ::loop emit_opcode( cbuf, 0x0F ); emit_opcode( cbuf, 0x8A ); emit_opcode( cbuf, 0xF4 ); emit_opcode( cbuf, 0xFF ); emit_opcode( cbuf, 0xFF ); emit_opcode( cbuf, 0xFF ); %} enc_class fpu_flags() %{ // fnstsw_ax emit_opcode( cbuf, 0xDF); emit_opcode( cbuf, 0xE0); // test ax,0x0400 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate emit_opcode( cbuf, 0xA9 ); emit_d16 ( cbuf, 0x0400 ); // // // This sequence works, but stalls for 12-16 cycles on PPro // // test rax,0x0400 // emit_opcode( cbuf, 0xA9 ); // emit_d32 ( cbuf, 0x00000400 ); // // jz exit (no unordered comparison) emit_opcode( cbuf, 0x74 ); emit_d8 ( cbuf, 0x02 ); // mov ah,1 - treat as LT case (set carry flag) emit_opcode( cbuf, 0xB4 ); emit_d8 ( cbuf, 0x01 ); // sahf emit_opcode( cbuf, 0x9E); %} enc_class cmpF_P6_fixup() %{ // Fixup the integer flags in case comparison involved a NaN // // JNP exit (no unordered comparison, P-flag is set by NaN) emit_opcode( cbuf, 0x7B ); emit_d8 ( cbuf, 0x03 ); // MOV AH,1 - treat as LT case (set carry flag) emit_opcode( cbuf, 0xB4 ); emit_d8 ( cbuf, 0x01 ); // SAHF emit_opcode( cbuf, 0x9E); // NOP // target for branch to avoid branch to branch emit_opcode( cbuf, 0x90); %}// fnstsw_ax();// sahf();// movl(dst, nan_result);// jcc(Assembler::parity, exit);// movl(dst, less_result);// jcc(Assembler::below, exit);// movl(dst, equal_result);// jcc(Assembler::equal, exit);// movl(dst, greater_result);// less_result = 1;// greater_result = -1;// equal_result = 0;// nan_result = -1; enc_class CmpF_Result(rRegI dst) %{ // fnstsw_ax(); emit_opcode( cbuf, 0xDF); emit_opcode( cbuf, 0xE0); // sahf emit_opcode( cbuf, 0x9E); // movl(dst, nan_result); emit_opcode( cbuf, 0xB8 + $dst$$reg); emit_d32( cbuf, -1 ); // jcc(Assembler::parity, exit); emit_opcode( cbuf, 0x7A ); emit_d8 ( cbuf, 0x13 ); // movl(dst, less_result); emit_opcode( cbuf, 0xB8 + $dst$$reg); emit_d32( cbuf, -1 ); // jcc(Assembler::below, exit); emit_opcode( cbuf, 0x72 ); emit_d8 ( cbuf, 0x0C ); // movl(dst, equal_result); emit_opcode( cbuf, 0xB8 + $dst$$reg); emit_d32( cbuf, 0 ); // jcc(Assembler::equal, exit); emit_opcode( cbuf, 0x74 ); emit_d8 ( cbuf, 0x05 ); // movl(dst, greater_result); emit_opcode( cbuf, 0xB8 + $dst$$reg); emit_d32( cbuf, 1 ); %} // Compare the longs and set flags // BROKEN! Do Not use as-is enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ // CMP $src1.hi,$src2.hi emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); // JNE,s done emit_opcode(cbuf,0x75); emit_d8(cbuf, 2 ); // CMP $src1.lo,$src2.lo emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );// done: %} enc_class convert_int_long( regL dst, rRegI src ) %{ // mov $dst.lo,$src int dst_encoding = $dst$$reg; int src_encoding = $src$$reg; encode_Copy( cbuf, dst_encoding , src_encoding ); // mov $dst.hi,$src encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); // sar $dst.hi,31 emit_opcode( cbuf, 0xC1 ); emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); emit_d8(cbuf, 0x1F ); %} enc_class convert_long_double( eRegL src ) %{ // push $src.hi emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); // push $src.lo emit_opcode(cbuf, 0x50+$src$$reg ); // fild 64-bits at [SP] emit_opcode(cbuf,0xdf); emit_d8(cbuf, 0x6C); emit_d8(cbuf, 0x24); emit_d8(cbuf, 0x00); // pop stack emit_opcode(cbuf, 0x83); // add SP, #8 emit_rm(cbuf, 0x3, 0x00, ESP_enc); emit_d8(cbuf, 0x8); %} enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ // IMUL EDX:EAX,$src1 emit_opcode( cbuf, 0xF7 ); emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); // SAR EDX,$cnt-32 int shift_count = ((int)$cnt$$constant) - 32; if (shift_count > 0) { emit_opcode(cbuf, 0xC1); emit_rm(cbuf, 0x3, 7, $dst$$reg ); emit_d8(cbuf, shift_count); } %} // this version doesn't have add sp, 8 enc_class convert_long_double2( eRegL src ) %{ // push $src.hi emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); // push $src.lo emit_opcode(cbuf, 0x50+$src$$reg ); // fild 64-bits at [SP] emit_opcode(cbuf,0xdf); emit_d8(cbuf, 0x6C); emit_d8(cbuf, 0x24); emit_d8(cbuf, 0x00); %} enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ // Basic idea: long = (long)int * (long)int // IMUL EDX:EAX, src emit_opcode( cbuf, 0xF7 ); emit_rm( cbuf, 0x3, 0x5, $src$$reg); %} enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) // MUL EDX:EAX, src emit_opcode( cbuf, 0xF7 ); emit_rm( cbuf, 0x3, 0x4, $src$$reg); %} enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ // Basic idea: lo(result) = lo(x_lo * y_lo) // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) // MOV $tmp,$src.lo encode_Copy( cbuf, $tmp$$reg, $src$$reg ); // IMUL $tmp,EDX emit_opcode( cbuf, 0x0F ); emit_opcode( cbuf, 0xAF ); emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); // MOV EDX,$src.hi encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); // IMUL EDX,EAX emit_opcode( cbuf, 0x0F ); emit_opcode( cbuf, 0xAF ); emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); // ADD $tmp,EDX emit_opcode( cbuf, 0x03 ); emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); // MUL EDX:EAX,$src.lo emit_opcode( cbuf, 0xF7 ); emit_rm( cbuf, 0x3, 0x4, $src$$reg ); // ADD EDX,ESI emit_opcode( cbuf, 0x03 ); emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); %} enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ // Basic idea: lo(result) = lo(src * y_lo) // hi(result) = hi(src * y_lo) + lo(src * y_hi) // IMUL $tmp,EDX,$src emit_opcode( cbuf, 0x6B ); emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); emit_d8( cbuf, (int)$src$$constant ); // MOV EDX,$src emit_opcode(cbuf, 0xB8 + EDX_enc); emit_d32( cbuf, (int)$src$$constant ); // MUL EDX:EAX,EDX emit_opcode( cbuf, 0xF7 ); emit_rm( cbuf, 0x3, 0x4, EDX_enc ); // ADD EDX,ESI emit_opcode( cbuf, 0x03 ); emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); %} enc_class long_div( eRegL src1, eRegL src2 ) %{ // PUSH src1.hi emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); // PUSH src1.lo emit_opcode(cbuf, 0x50+$src1$$reg ); // PUSH src2.hi emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); // PUSH src2.lo emit_opcode(cbuf, 0x50+$src2$$reg ); // CALL directly to the runtime cbuf.set_insts_mark(); emit_opcode(cbuf,0xE8); // Call into runtime emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); // Restore stack emit_opcode(cbuf, 0x83); // add SP, #framesize emit_rm(cbuf, 0x3, 0x00, ESP_enc); emit_d8(cbuf, 4*4); %} enc_class long_mod( eRegL src1, eRegL src2 ) %{ // PUSH src1.hi emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); // PUSH src1.lo emit_opcode(cbuf, 0x50+$src1$$reg ); // PUSH src2.hi emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); // PUSH src2.lo emit_opcode(cbuf, 0x50+$src2$$reg ); // CALL directly to the runtime cbuf.set_insts_mark(); emit_opcode(cbuf,0xE8); // Call into runtime emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); // Restore stack emit_opcode(cbuf, 0x83); // add SP, #framesize emit_rm(cbuf, 0x3, 0x00, ESP_enc); emit_d8(cbuf, 4*4); %} enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ // MOV $tmp,$src.lo emit_opcode(cbuf, 0x8B); emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); // OR $tmp,$src.hi emit_opcode(cbuf, 0x0B); emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); %} enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ // CMP $src1.lo,$src2.lo emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); // JNE,s skip emit_cc(cbuf, 0x70, 0x5); emit_d8(cbuf,2); // CMP $src1.hi,$src2.hi emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); %} enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); // MOV $tmp,$src1.hi emit_opcode( cbuf, 0x8B ); emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); // SBB $tmp,$src2.hi\t! Compute flags for long compare emit_opcode( cbuf, 0x1B ); emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); %} enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ // XOR $tmp,$tmp emit_opcode(cbuf,0x33); // XOR emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); // CMP $tmp,$src.lo emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); // SBB $tmp,$src.hi emit_opcode( cbuf, 0x1B ); emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); %} // Sniff, sniff... smells like Gnu Superoptimizer enc_class neg_long( eRegL dst ) %{ emit_opcode(cbuf,0xF7); // NEG hi emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); emit_opcode(cbuf,0xF7); // NEG lo emit_rm (cbuf,0x3, 0x3, $dst$$reg ); emit_opcode(cbuf,0x83); // SBB hi,0 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); emit_d8 (cbuf,0 ); %} // Because the transitions from emitted code to the runtime // monitorenter/exit helper stubs are so slow it's critical that // we inline both the stack-locking fast-path and the inflated fast path. // // See also: cmpFastLock and cmpFastUnlock. // // What follows is a specialized inline transliteration of the code // in slow_enter() and slow_exit(). If we're concerned about I$ bloat // another option would be to emit TrySlowEnter and TrySlowExit methods // at startup-time. These methods would accept arguments as // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. // In practice, however, the # of lock sites is bounded and is usually small. // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer // if the processor uses simple bimodal branch predictors keyed by EIP // Since the helper routines would be called from multiple synchronization // sites. // // An even better approach would be write "MonitorEnter()" and "MonitorExit()" // in java - using j.u.c and unsafe - and just bind the lock and unlock sites // to those specialized methods. That'd give us a mostly platform-independent // implementation that the JITs could optimize and inline at their pleasure. // Done correctly, the only time we'd need to cross to native could would be // to park() or unpark() threads. We'd also need a few more unsafe operators // to (a) prevent compiler-JIT reordering of non-volatile accesses, and // (b) explicit barriers or fence operations. // // TODO: // // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. // Given TLAB allocation, Self is usually manifested in a register, so passing it into // the lock operators would typically be faster than reifying Self. // // * Ideally I'd define the primitives as: // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED // Unfortunately ADLC bugs prevent us from expressing the ideal form. // Instead, we're stuck with a rather awkward and brittle register assignments below. // Furthermore the register assignments are overconstrained, possibly resulting in // sub-optimal code near the synchronization site. // // * Eliminate the sp-proximity tests and just use "== Self" tests instead. // Alternately, use a better sp-proximity test. // // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. // Either one is sufficient to uniquely identify a thread. // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. // // * Intrinsify notify() and notifyAll() for the common cases where the // object is locked by the calling thread but the waitlist is empty. // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). // // * use jccb and jmpb instead of jcc and jmp to improve code density. // But beware of excessive branch density on AMD Opterons. // // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success // or failure of the fast-path. If the fast-path fails then we pass // control to the slow-path, typically in C. In Fast_Lock and // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 // will emit a conditional branch immediately after the node. // So we have branches to branches and lots of ICC.ZF games. // Instead, it might be better to have C2 pass a "FailureLabel" // into Fast_Lock and Fast_Unlock. In the case of success, control // will drop through the node. ICC.ZF is undefined at exit. // In the case of failure, the node will branch directly to the // FailureLabel // obj: object to lock // box: on-stack box address (displaced header location) - KILLED // rax,: tmp -- KILLED // scr: tmp -- KILLED enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{ Register objReg = as_Register($obj$$reg); Register boxReg = as_Register($box$$reg); Register tmpReg = as_Register($tmp$$reg); Register scrReg = as_Register($scr$$reg); // Ensure the register assignents are disjoint guarantee (objReg != boxReg, "") ; guarantee (objReg != tmpReg, "") ; guarantee (objReg != scrReg, "") ; guarantee (boxReg != tmpReg, "") ; guarantee (boxReg != scrReg, "") ; guarantee (tmpReg == as_Register(EAX_enc), "") ; MacroAssembler masm(&cbuf); if (_counters != NULL) { masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr())); } if (EmitSync & 1) { // set box->dhw = unused_mark (3) // Force all sync thru slow-path: slow_enter() and slow_exit() masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ; masm.cmpptr (rsp, (int32_t)0) ; } else if (EmitSync & 2) { Label DONE_LABEL ; if (UseBiasedLocking) { // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); } masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword masm.orptr (tmpReg, 0x1); masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS if (os::is_MP()) { masm.lock(); } masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg masm.jcc(Assembler::equal, DONE_LABEL); // Recursive locking masm.subptr(tmpReg, rsp); masm.andptr(tmpReg, (int32_t) 0xFFFFF003 ); masm.movptr(Address(boxReg, 0), tmpReg); masm.bind(DONE_LABEL) ; } else { // Possible cases that we'll encounter in fast_lock // ------------------------------------------------ // * Inflated // -- unlocked // -- Locked // = by self // = by other // * biased // -- by Self // -- by other // * neutral // * stack-locked // -- by self // = sp-proximity test hits // = sp-proximity test generates false-negative // -- by other // Label IsInflated, DONE_LABEL, PopDone ; // TODO: optimize away redundant LDs of obj->mark and improve the markword triage // order to reduce the number of conditional branches in the most common cases. // Beware -- there's a subtle invariant that fetch of the markword // at [FETCH], below, will never observe a biased encoding (*101b). // If this invariant is not held we risk exclusion (safety) failure. if (UseBiasedLocking && !UseOptoBiasInlining) { masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); } masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH] masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral) masm.jccb (Assembler::notZero, IsInflated) ; // Attempt stack-locking ... masm.orptr (tmpReg, 0x1); masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS if (os::is_MP()) { masm.lock(); } masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg if (_counters != NULL) { masm.cond_inc32(Assembler::equal, ExternalAddress((address)_counters->fast_path_entry_count_addr())); } masm.jccb (Assembler::equal, DONE_LABEL); // Recursive locking masm.subptr(tmpReg, rsp); masm.andptr(tmpReg, 0xFFFFF003 ); masm.movptr(Address(boxReg, 0), tmpReg); if (_counters != NULL) { masm.cond_inc32(Assembler::equal, ExternalAddress((address)_counters->fast_path_entry_count_addr())); } masm.jmp (DONE_LABEL) ; masm.bind (IsInflated) ; // The object is inflated. // // TODO-FIXME: eliminate the ugly use of manifest constants: // Use markOopDesc::monitor_value instead of "2". // use markOop::unused_mark() instead of "3". // The tmpReg value is an objectMonitor reference ORed with // markOopDesc::monitor_value (2). We can either convert tmpReg to an // objectmonitor pointer by masking off the "2" bit or we can just // use tmpReg as an objectmonitor pointer but bias the objectmonitor // field offsets with "-2" to compensate for and annul the low-order tag bit. // // I use the latter as it avoids AGI stalls. // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]" // instead of "mov r, [tmpReg+OFFSETOF(Owner)]". // #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2) // boxReg refers to the on-stack BasicLock in the current frame. // We'd like to write: // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices. // This is convenient but results a ST-before-CAS penalty. The following CAS suffers // additional latency as we have another ST in the store buffer that must drain. if (EmitSync & 8192) { masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty masm.get_thread (scrReg) ; masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov if (os::is_MP()) { masm.lock(); } masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; } else if ((EmitSync & 128) == 0) { // avoid ST-before-CAS masm.movptr(scrReg, boxReg) ; masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { // prefetchw [eax + Offset(_owner)-2] masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); } if ((EmitSync & 64) == 0) { // Optimistic form: consider XORL tmpReg,tmpReg masm.movptr(tmpReg, NULL_WORD) ; } else { // Can suffer RTS->RTO upgrades on shared or cold $ lines // Test-And-CAS instead of CAS masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner masm.testptr(tmpReg, tmpReg) ; // Locked ? masm.jccb (Assembler::notZero, DONE_LABEL) ; } // Appears unlocked - try to swing _owner from null to non-null. // Ideally, I'd manifest "Self" with get_thread and then attempt // to CAS the register containing Self into m->Owner. // But we don't have enough registers, so instead we can either try to CAS // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds // we later store "Self" into m->Owner. Transiently storing a stack address // (rsp or the address of the box) into m->owner is harmless. // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. if (os::is_MP()) { masm.lock(); } masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3 masm.jccb (Assembler::notZero, DONE_LABEL) ; masm.get_thread (scrReg) ; // beware: clobbers ICCs masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success // If the CAS fails we can either retry or pass control to the slow-path. // We use the latter tactic. // Pass the CAS result in the icc.ZFlag into DONE_LABEL // If the CAS was successful ... // Self has acquired the lock // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. // Intentional fall-through into DONE_LABEL ... } else { masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty masm.movptr(boxReg, tmpReg) ; // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { // prefetchw [eax + Offset(_owner)-2] masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); } if ((EmitSync & 64) == 0) { // Optimistic form masm.xorptr (tmpReg, tmpReg) ; } else { // Can suffer RTS->RTO upgrades on shared or cold $ lines masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner masm.testptr(tmpReg, tmpReg) ; // Locked ? masm.jccb (Assembler::notZero, DONE_LABEL) ; } // Appears unlocked - try to swing _owner from null to non-null. // Use either "Self" (in scr) or rsp as thread identity in _owner. // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. masm.get_thread (scrReg) ; if (os::is_MP()) { masm.lock(); } masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // If the CAS fails we can either retry or pass control to the slow-path. // We use the latter tactic. // Pass the CAS result in the icc.ZFlag into DONE_LABEL // If the CAS was successful ... // Self has acquired the lock // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. // Intentional fall-through into DONE_LABEL ... } // DONE_LABEL is a hot target - we'd really like to place it at the // start of cache line by padding with NOPs. // See the AMD and Intel software optimization manuals for the // most efficient "long" NOP encodings. // Unfortunately none of our alignment mechanisms suffice. masm.bind(DONE_LABEL); // Avoid branch-to-branch on AMD processors // This appears to be superstition. if (EmitSync & 32) masm.nop() ; // At DONE_LABEL the icc ZFlag is set as follows ... // Fast_Unlock uses the same protocol. // ZFlag == 1 -> Success // ZFlag == 0 -> Failure - force control through the slow-path } %} // obj: object to unlock // box: box address (displaced header location), killed. Must be EAX. // rbx,: killed tmp; cannot be obj nor box. // // Some commentary on balanced locking: // // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. // Methods that don't have provably balanced locking are forced to run in the // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. // The interpreter provides two properties: // I1: At return-time the interpreter automatically and quietly unlocks any // objects acquired the current activation (frame). Recall that the // interpreter maintains an on-stack list of locks currently held by // a frame. // I2: If a method attempts to unlock an object that is not held by the // the frame the interpreter throws IMSX. // // Lets say A(), which has provably balanced locking, acquires O and then calls B(). // B() doesn't have provably balanced locking so it runs in the interpreter. // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O // is still locked by A(). // // The only other source of unbalanced locking would be JNI. The "Java Native Interface: // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter // should not be unlocked by "normal" java-level locking and vice-versa. The specification // doesn't specify what will occur if a program engages in such mixed-mode locking, however. enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{ Register objReg = as_Register($obj$$reg); Register boxReg = as_Register($box$$reg); Register tmpReg = as_Register($tmp$$reg); guarantee (objReg != boxReg, "") ; guarantee (objReg != tmpReg, "") ; guarantee (boxReg != tmpReg, "") ; guarantee (boxReg == as_Register(EAX_enc), "") ; MacroAssembler masm(&cbuf); if (EmitSync & 4) { // Disable - inhibit all inlining. Force control through the slow-path masm.cmpptr (rsp, 0) ; } else if (EmitSync & 8) { Label DONE_LABEL ; if (UseBiasedLocking) { masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); } // classic stack-locking code ... masm.movptr(tmpReg, Address(boxReg, 0)) ; masm.testptr(tmpReg, tmpReg) ; masm.jcc (Assembler::zero, DONE_LABEL) ; if (os::is_MP()) { masm.lock(); } masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box masm.bind(DONE_LABEL); } else { Label DONE_LABEL, Stacked, CheckSucc, Inflated ; // Critically, the biased locking test must have precedence over // and appear before the (box->dhw == 0) recursive stack-lock test. if (UseBiasedLocking && !UseOptoBiasInlining) { masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); } masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock masm.testptr(tmpReg, 0x02) ; // Inflated? masm.jccb (Assembler::zero, Stacked) ; masm.bind (Inflated) ; // It's inflated. // Despite our balanced locking property we still check that m->_owner == Self // as java routines or native JNI code called by this thread might // have released the lock. // Refer to the comments in synchronizer.cpp for how we might encode extra // state in _succ so we can avoid fetching EntryList|cxq. // // I'd like to add more cases in fast_lock() and fast_unlock() -- // such as recursive enter and exit -- but we have to be wary of // I$ bloat, T$ effects and BP$ effects. // // If there's no contention try a 1-0 exit. That is, exit without // a costly MEMBAR or CAS. See synchronizer.cpp for details on how // we detect and recover from the race that the 1-0 exit admits. // // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier // before it STs null into _owner, releasing the lock. Updates // to data protected by the critical section must be visible before // we drop the lock (and thus before any other thread could acquire // the lock and observe the fields protected by the lock). // IA32's memory-model is SPO, so STs are ordered with respect to // each other and there's no need for an explicit barrier (fence). // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. masm.get_thread (boxReg) ; if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { // prefetchw [ebx + Offset(_owner)-2] masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2)); } // Note that we could employ various encoding schemes to reduce // the number of loads below (currently 4) to just 2 or 3. // Refer to the comments in synchronizer.cpp. // In practice the chain of fetches doesn't seem to impact performance, however. if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { // Attempt to reduce branch density - AMD's branch predictor. masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; masm.jccb (Assembler::notZero, DONE_LABEL) ; masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; masm.jmpb (DONE_LABEL) ; } else { masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; masm.jccb (Assembler::notZero, DONE_LABEL) ; masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; masm.jccb (Assembler::notZero, CheckSucc) ; masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; masm.jmpb (DONE_LABEL) ; } // The Following code fragment (EmitSync & 65536) improves the performance of // contended applications and contended synchronization microbenchmarks. // Unfortunately the emission of the code - even though not executed - causes regressions // in scimark and jetstream, evidently because of $ effects. Replacing the code // with an equal number of never-executed NOPs results in the same regression. // We leave it off by default. if ((EmitSync & 65536) != 0) { Label LSuccess, LGoSlowPath ; masm.bind (CheckSucc) ; // Optional pre-test ... it's safe to elide this if ((EmitSync & 16) == 0) { masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; masm.jccb (Assembler::zero, LGoSlowPath) ; } // We have a classic Dekker-style idiom: // ST m->_owner = 0 ; MEMBAR; LD m->_succ // There are a number of ways to implement the barrier: // (1) lock:andl &m->_owner, 0 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form. // LOCK: ANDL [ebx+Offset(_Owner)-2], 0 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 // (2) If supported, an explicit MFENCE is appealing. // In older IA32 processors MFENCE is slower than lock:add or xchg // particularly if the write-buffer is full as might be the case if // if stores closely precede the fence or fence-equivalent instruction. // In more modern implementations MFENCE appears faster, however. // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack // The $lines underlying the top-of-stack should be in M-state. // The locked add instruction is serializing, of course. // (4) Use xchg, which is serializing // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. // The integer condition codes will tell us if succ was 0. // Since _succ and _owner should reside in the same $line and // we just stored into _owner, it's likely that the $line // remains in M-state for the lock:orl. // // We currently use (3), although it's likely that switching to (2) // is correct for the future. masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; if (os::is_MP()) { if (VM_Version::supports_sse2() && 1 == FenceInstruction) { masm.mfence(); } else { masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; } } // Ratify _succ remains non-null masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; masm.jccb (Assembler::notZero, LSuccess) ; masm.xorptr(boxReg, boxReg) ; // box is really EAX if (os::is_MP()) { masm.lock(); } masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); masm.jccb (Assembler::notEqual, LSuccess) ; // Since we're low on registers we installed rsp as a placeholding in _owner. // Now install Self over rsp. This is safe as we're transitioning from // non-null to non=null masm.get_thread (boxReg) ; masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ; // Intentional fall-through into LGoSlowPath ... masm.bind (LGoSlowPath) ; masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure masm.jmpb (DONE_LABEL) ; masm.bind (LSuccess) ; masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success masm.jmpb (DONE_LABEL) ; } masm.bind (Stacked) ; // It's not inflated and it's not recursively stack-locked and it's not biased. // It must be stack-locked. // Try to reset the header to displaced header. // The "box" value on the stack is stable, so we can reload // and be assured we observe the same value as above. masm.movptr(tmpReg, Address(boxReg, 0)) ; if (os::is_MP()) { masm.lock(); } masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box // Intention fall-thru into DONE_LABEL // DONE_LABEL is a hot target - we'd really like to place it at the // start of cache line by padding with NOPs. // See the AMD and Intel software optimization manuals for the // most efficient "long" NOP encodings. // Unfortunately none of our alignment mechanisms suffice. if ((EmitSync & 65536) == 0) { masm.bind (CheckSucc) ; } masm.bind(DONE_LABEL); // Avoid branch to branch on AMD processors if (EmitSync & 32768) { masm.nop() ; } } %} enc_class enc_pop_rdx() %{ emit_opcode(cbuf,0x5A); %} enc_class enc_rethrow() %{ cbuf.set_insts_mark(); emit_opcode(cbuf, 0xE9); // jmp entry emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, runtime_call_Relocation::spec(), RELOC_IMM32 ); %} // Convert a double to an int. Java semantics require we do complex // manglelations in the corner cases. So we set the rounding mode to // 'zero', store the darned double down as an int, and reset the // rounding mode to 'nearest'. The hardware throws an exception which // patches up the correct value directly to the stack. enc_class DPR2I_encoding( regDPR src ) %{ // Flip to round-to-zero mode. We attempted to allow invalid-op // exceptions here, so that a NAN or other corner-case value will // thrown an exception (but normal values get converted at full speed). // However, I2C adapters and other float-stack manglers leave pending // invalid-op exceptions hanging. We would have to clear them before // enabling them and that is more expensive than just testing for the // invalid value Intel stores down in the corner cases. emit_opcode(cbuf,0xD9); // FLDCW trunc emit_opcode(cbuf,0x2D); emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); // Allocate a word emit_opcode(cbuf,0x83); // SUB ESP,4 emit_opcode(cbuf,0xEC); emit_d8(cbuf,0x04); // Encoding assumes a double has been pushed into FPR0. // Store down the double as an int, popping the FPU stack emit_opcode(cbuf,0xDB); // FISTP [ESP] emit_opcode(cbuf,0x1C); emit_d8(cbuf,0x24); // Restore the rounding mode; mask the exception emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode emit_opcode(cbuf,0x2D); emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); // Load the converted int; adjust CPU stack emit_opcode(cbuf,0x58); // POP EAX emit_opcode(cbuf,0x3D); // CMP EAX,imm emit_d32 (cbuf,0x80000000); // 0x80000000 emit_opcode(cbuf,0x75); // JNE around_slow_call emit_d8 (cbuf,0x07); // Size of slow_call // Push src onto stack slow-path emit_opcode(cbuf,0xD9 ); // FLD ST(i) emit_d8 (cbuf,0xC0-1+$src$$reg ); // CALL directly to the runtime cbuf.set_insts_mark(); emit_opcode(cbuf,0xE8); // Call into runtime emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); // Carry on here... %} enc_class DPR2L_encoding( regDPR src ) %{ emit_opcode(cbuf,0xD9); // FLDCW trunc emit_opcode(cbuf,0x2D); emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); // Allocate a word emit_opcode(cbuf,0x83); // SUB ESP,8 emit_opcode(cbuf,0xEC); emit_d8(cbuf,0x08); // Encoding assumes a double has been pushed into FPR0. // Store down the double as a long, popping the FPU stack emit_opcode(cbuf,0xDF); // FISTP [ESP] emit_opcode(cbuf,0x3C); emit_d8(cbuf,0x24); // Restore the rounding mode; mask the exception emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode emit_opcode(cbuf,0x2D); emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); // Load the converted int; adjust CPU stack emit_opcode(cbuf,0x58); // POP EAX emit_opcode(cbuf,0x5A); // POP EDX emit_opcode(cbuf,0x81); // CMP EDX,imm emit_d8 (cbuf,0xFA); // rdx emit_d32 (cbuf,0x80000000); // 0x80000000 emit_opcode(cbuf,0x75); // JNE around_slow_call emit_d8 (cbuf,0x07+4); // Size of slow_call emit_opcode(cbuf,0x85); // TEST EAX,EAX emit_opcode(cbuf,0xC0); // 2/rax,/rax, emit_opcode(cbuf,0x75); // JNE around_slow_call emit_d8 (cbuf,0x07); // Size of slow_call // Push src onto stack slow-path emit_opcode(cbuf,0xD9 ); // FLD ST(i) emit_d8 (cbuf,0xC0-1+$src$$reg ); // CALL directly to the runtime cbuf.set_insts_mark(); emit_opcode(cbuf,0xE8); // Call into runtime emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); // Carry on here... %} enc_class FMul_ST_reg( eRegFPR src1 ) %{ // Operand was loaded from memory into fp ST (stack top) // FMUL ST,$src /* D8 C8+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC8 + $src1$$reg); %} enc_class FAdd_ST_reg( eRegFPR src2 ) %{ // FADDP ST,src2 /* D8 C0+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC0 + $src2$$reg); //could use FADDP src2,fpST /* DE C0+i */ %} enc_class FAddP_reg_ST( eRegFPR src2 ) %{ // FADDP src2,ST /* DE C0+i */ emit_opcode(cbuf, 0xDE); emit_opcode(cbuf, 0xC0 + $src2$$reg); %} enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ // Operand has been loaded into fp ST (stack top) // FSUB ST,$src1 emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xE0 + $src1$$reg); // FDIV emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xF0 + $src2$$reg); %} enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ // Operand was loaded from memory into fp ST (stack top) // FADD ST,$src /* D8 C0+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC0 + $src1$$reg); // FMUL ST,src2 /* D8 C*+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC8 + $src2$$reg); %} enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ // Operand was loaded from memory into fp ST (stack top) // FADD ST,$src /* D8 C0+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC0 + $src1$$reg); // FMULP src2,ST /* DE C8+i */ emit_opcode(cbuf, 0xDE); emit_opcode(cbuf, 0xC8 + $src2$$reg); %} // Atomically load the volatile long enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ emit_opcode(cbuf,0xDF); int rm_byte_opcode = 0x05; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); %} // Volatile Store Long. Must be atomic, so move it into // the FP TOS and then do a 64-bit FIST. Has to probe the // target address before the store (for null-ptr checks) // so the memory operand is used twice in the encoding. enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop emit_opcode(cbuf,0xDF); int rm_byte_opcode = 0x07; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); %} // Safepoint Poll. This polls the safepoint page, and causes an // exception if it is not readable. Unfortunately, it kills the condition code // in the process // We current use TESTL [spp],EDI // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 enc_class Safepoint_Poll() %{ cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); emit_opcode(cbuf,0x85); emit_rm (cbuf, 0x0, 0x7, 0x5); emit_d32(cbuf, (intptr_t)os::get_polling_page()); %}%}//----------FRAME--------------------------------------------------------------// Definition of frame structure and management information.//// S T A C K L A Y O U T Allocators stack-slot number// | (to get allocators register number// G Owned by | | v add OptoReg::stack0())// r CALLER | |// o | +--------+ pad to even-align allocators stack-slot// w V | pad0 | numbers; owned by CALLER// t -----------+--------+----> Matcher::_in_arg_limit, unaligned// h ^ | in | 5// | | args | 4 Holes in incoming args owned by SELF// | | | | 3// | | +--------+// V | | old out| Empty on Intel, window on Sparc// | old |preserve| Must be even aligned.// | SP-+--------+----> Matcher::_old_SP, even aligned// | | in | 3 area for Intel ret address// Owned by |preserve| Empty on Sparc.// SELF +--------+// | | pad2 | 2 pad to align old SP// | +--------+ 1// | | locks | 0// | +--------+----> OptoReg::stack0(), even aligned// | | pad1 | 11 pad to align new SP// | +--------+// | | | 10// | | spills | 9 spills// V | | 8 (pad0 slot for callee)// -----------+--------+----> Matcher::_out_arg_limit, unaligned// ^ | out | 7// | | args | 6 Holes in outgoing args owned by CALLEE// Owned by +--------+// CALLEE | new out| 6 Empty on Intel, window on Sparc// | new |preserve| Must be even-aligned.// | SP-+--------+----> Matcher::_new_SP, even aligned// | | |//// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is// known from SELF's arguments and the Java calling convention.// Region 6-7 is determined per call site.// Note 2: If the calling convention leaves holes in the incoming argument// area, those holes are owned by SELF. Holes in the outgoing area// are owned by the CALLEE. Holes should not be nessecary in the// incoming area, as the Java calling convention is completely under// the control of the AD file. Doubles can be sorted and packed to// avoid holes. Holes in the outgoing arguments may be nessecary for// varargs C calling conventions.// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is// even aligned with pad0 as needed.// Region 6 is even aligned. Region 6-7 is NOT even aligned;// region 6-11 is even aligned; it may be padded out more so that// the region from SP to FP meets the minimum stack alignment.frame %{ // What direction does stack grow in (assumed to be same for C & Java) stack_direction(TOWARDS_LOW); // These three registers define part of the calling convention // between compiled code and the interpreter. inline_cache_reg(EAX); // Inline Cache Register interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] cisc_spilling_operand_name(indOffset32); // Number of stack slots consumed by locking an object sync_stack_slots(1); // Compiled code's Frame Pointer frame_pointer(ESP); // Interpreter stores its frame pointer in a register which is // stored to the stack by I2CAdaptors. // I2CAdaptors convert from interpreted java to compiled java. interpreter_frame_pointer(EBP); // Stack alignment requirement // Alignment size in bytes (128-bit -> 16 bytes) stack_alignment(StackAlignmentInBytes); // Number of stack slots between incoming argument block and the start of // a new frame. The PROLOG must add this many slots to the stack. The // EPILOG must remove this many slots. Intel needs one slot for // return address and one for rbp, (must save rbp) in_preserve_stack_slots(2+VerifyStackAtCalls); // Number of outgoing stack slots killed above the out_preserve_stack_slots // for calls to C. Supports the var-args backing area for register parms. varargs_C_out_slots_killed(0); // The after-PROLOG location of the return address. Location of // return address specifies a type (REG or STACK) and a number // representing the register number (i.e. - use a register name) or // stack slot. // Ret Addr is on stack in slot 0 if no locks or verification or alignment. // Otherwise, it is above the locks and verification slot and alignment word return_addr(STACK - 1 + round_to((Compile::current()->in_preserve_stack_slots() + Compile::current()->fixed_slots()), stack_alignment_in_slots())); // Body of function which returns an integer array locating // arguments either in registers or in stack slots. Passed an array // of ideal registers called "sig" and a "length" count. Stack-slot // offsets are based on outgoing arguments, i.e. a CALLER setting up // arguments for a CALLEE. Incoming stack arguments are // automatically biased by the preserve_stack_slots field above. calling_convention %{ // No difference between ingoing/outgoing just pass false SharedRuntime::java_calling_convention(sig_bt, regs, length, false); %} // Body of function which returns an integer array locating // arguments either in registers or in stack slots. Passed an array // of ideal registers called "sig" and a "length" count. Stack-slot // offsets are based on outgoing arguments, i.e. a CALLER setting up // arguments for a CALLEE. Incoming stack arguments are // automatically biased by the preserve_stack_slots field above. c_calling_convention %{ // This is obviously always outgoing (void) SharedRuntime::c_calling_convention(sig_bt, regs, length); %} // Location of C & interpreter return values c_return_value %{ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; // in SSE2+ mode we want to keep the FPU stack clean so pretend // that C functions return float and double results in XMM0. if( ideal_reg == Op_RegD && UseSSE>=2 ) return OptoRegPair(XMM0b_num,XMM0_num); if( ideal_reg == Op_RegF && UseSSE>=2 ) return OptoRegPair(OptoReg::Bad,XMM0_num); return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); %} // Location of return values return_value %{ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; if( ideal_reg == Op_RegD && UseSSE>=2 ) return OptoRegPair(XMM0b_num,XMM0_num); if( ideal_reg == Op_RegF && UseSSE>=1 ) return OptoRegPair(OptoReg::Bad,XMM0_num); return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); %}%}//----------ATTRIBUTES---------------------------------------------------------//----------Operand Attributes-------------------------------------------------op_attrib op_cost(0); // Required cost attribute//----------Instruction Attributes---------------------------------------------ins_attrib ins_cost(100); // Required cost attributeins_attrib ins_size(8); // Required size attribute (in bits)ins_attrib ins_short_branch(0); // Required flag: is this instruction a // non-matching short branch variant of some // long branch?ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) // specifies the alignment that some part of the instruction (not // necessarily the start) requires. If > 1, a compute_padding() // function must be provided for the instruction//----------OPERANDS-----------------------------------------------------------// Operand definitions must precede instruction definitions for correct parsing// in the ADLC because operands constitute user defined types which are used in// instruction definitions.//----------Simple Operands----------------------------------------------------// Immediate Operands// Integer Immediateoperand immI() %{ match(ConI); op_cost(10); format %{ %} interface(CONST_INTER);%}// Constant for test vs zerooperand immI0() %{ predicate(n->get_int() == 0); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER);%}// Constant for incrementoperand immI1() %{ predicate(n->get_int() == 1); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER);%}// Constant for decrementoperand immI_M1() %{ predicate(n->get_int() == -1); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER);%}// Valid scale values for addressing modesoperand immI2() %{ predicate(0 <= n->get_int() && (n->get_int() <= 3)); match(ConI); format %{ %} interface(CONST_INTER);%}operand immI8() %{ predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); match(ConI); op_cost(5); format %{ %} interface(CONST_INTER);%}operand immI16() %{ predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); match(ConI); op_cost(10); format %{ %} interface(CONST_INTER);%}// Constant for long shiftsoperand immI_32() %{ predicate( n->get_int() == 32 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER);%}operand immI_1_31() %{ predicate( n->get_int() >= 1 && n->get_int() <= 31 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER);%}operand immI_32_63() %{ predicate( n->get_int() >= 32 && n->get_int() <= 63 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER);%}operand immI_1() %{ predicate( n->get_int() == 1 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER);%}operand immI_2() %{ predicate( n->get_int() == 2 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER);%}operand immI_3() %{ predicate( n->get_int() == 3 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER);%}// Pointer Immediateoperand immP() %{ match(ConP); op_cost(10); format %{ %} interface(CONST_INTER);%}// NULL Pointer Immediateoperand immP0() %{ predicate( n->get_ptr() == 0 ); match(ConP); op_cost(0); format %{ %} interface(CONST_INTER);%}// Long Immediateoperand immL() %{ match(ConL); op_cost(20); format %{ %} interface(CONST_INTER);%}// Long Immediate zerooperand immL0() %{ predicate( n->get_long() == 0L ); match(ConL); op_cost(0); format %{ %} interface(CONST_INTER);%}// Long Immediate zerooperand immL_M1() %{ predicate( n->get_long() == -1L ); match(ConL); op_cost(0); format %{ %} interface(CONST_INTER);%}// Long immediate from 0 to 127.// Used for a shorter form of long mul by 10.operand immL_127() %{ predicate((0 <= n->get_long()) && (n->get_long() <= 127)); match(ConL); op_cost(0); format %{ %} interface(CONST_INTER);%}// Long Immediate: low 32-bit maskoperand immL_32bits() %{ predicate(n->get_long() == 0xFFFFFFFFL); match(ConL); op_cost(0); format %{ %} interface(CONST_INTER);%}// Long Immediate: low 32-bit maskoperand immL32() %{ predicate(n->get_long() == (int)(n->get_long())); match(ConL); op_cost(20); format %{ %} interface(CONST_INTER);%}//Double Immediate zerooperand immDPR0() %{ // Do additional (and counter-intuitive) test against NaN to work around VC++ // bug that generates code such that NaNs compare equal to 0.0 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); match(ConD); op_cost(5); format %{ %} interface(CONST_INTER);%}// Double Immediate oneoperand immDPR1() %{ predicate( UseSSE<=1 && n->getd() == 1.0 ); match(ConD); op_cost(5); format %{ %} interface(CONST_INTER);%}// Double Immediateoperand immDPR() %{ predicate(UseSSE<=1); match(ConD); op_cost(5); format %{ %} interface(CONST_INTER);%}operand immD() %{ predicate(UseSSE>=2); match(ConD); op_cost(5); format %{ %} interface(CONST_INTER);%}// Double Immediate zerooperand immD0() %{ // Do additional (and counter-intuitive) test against NaN to work around VC++ // bug that generates code such that NaNs compare equal to 0.0 AND do not // compare equal to -0.0. predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); match(ConD); format %{ %} interface(CONST_INTER);%}// Float Immediate zerooperand immFPR0() %{ predicate(UseSSE == 0 && n->getf() == 0.0F); match(ConF); op_cost(5); format %{ %} interface(CONST_INTER);%}// Float Immediate oneoperand immFPR1() %{ predicate(UseSSE == 0 && n->getf() == 1.0F); match(ConF); op_cost(5); format %{ %} interface(CONST_INTER);%}// Float Immediateoperand immFPR() %{ predicate( UseSSE == 0 ); match(ConF); op_cost(5); format %{ %} interface(CONST_INTER);%}// Float Immediateoperand immF() %{ predicate(UseSSE >= 1); match(ConF); op_cost(5); format %{ %} interface(CONST_INTER);%}// Float Immediate zero. Zero and not -0.0operand immF0() %{ predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); match(ConF); op_cost(5); format %{ %} interface(CONST_INTER);%}// Immediates for special shifts (sign extend)// Constants for incrementoperand immI_16() %{ predicate( n->get_int() == 16 ); match(ConI); format %{ %} interface(CONST_INTER);%}operand immI_24() %{ predicate( n->get_int() == 24 ); match(ConI); format %{ %} interface(CONST_INTER);%}// Constant for byte-wide maskingoperand immI_255() %{ predicate( n->get_int() == 255 ); match(ConI); format %{ %} interface(CONST_INTER);%}// Constant for short-wide maskingoperand immI_65535() %{ predicate(n->get_int() == 65535); match(ConI); format %{ %} interface(CONST_INTER);%}// Register Operands// Integer Registeroperand rRegI() %{ constraint(ALLOC_IN_RC(int_reg)); match(RegI); match(xRegI); match(eAXRegI); match(eBXRegI); match(eCXRegI); match(eDXRegI); match(eDIRegI); match(eSIRegI); format %{ %} interface(REG_INTER);%}// Subset of Integer Registeroperand xRegI(rRegI reg) %{ constraint(ALLOC_IN_RC(int_x_reg)); match(reg); match(eAXRegI); match(eBXRegI); match(eCXRegI); match(eDXRegI); format %{ %} interface(REG_INTER);%}// Special Registersoperand eAXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(eax_reg)); match(reg); match(rRegI); format %{ "EAX" %} interface(REG_INTER);%}// Special Registersoperand eBXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(ebx_reg)); match(reg); match(rRegI); format %{ "EBX" %} interface(REG_INTER);%}operand eCXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(ecx_reg)); match(reg); match(rRegI); format %{ "ECX" %} interface(REG_INTER);%}operand eDXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(edx_reg)); match(reg); match(rRegI); format %{ "EDX" %} interface(REG_INTER);%}operand eDIRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(edi_reg)); match(reg); match(rRegI); format %{ "EDI" %} interface(REG_INTER);%}operand naxRegI() %{ constraint(ALLOC_IN_RC(nax_reg)); match(RegI); match(eCXRegI); match(eDXRegI); match(eSIRegI); match(eDIRegI); format %{ %} interface(REG_INTER);%}operand nadxRegI() %{ constraint(ALLOC_IN_RC(nadx_reg)); match(RegI); match(eBXRegI); match(eCXRegI); match(eSIRegI); match(eDIRegI); format %{ %} interface(REG_INTER);%}operand ncxRegI() %{ constraint(ALLOC_IN_RC(ncx_reg)); match(RegI); match(eAXRegI); match(eDXRegI); match(eSIRegI); match(eDIRegI); format %{ %} interface(REG_INTER);%}// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg// //operand eSIRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(esi_reg)); match(reg); match(rRegI); format %{ "ESI" %} interface(REG_INTER);%}// Pointer Registeroperand anyRegP() %{ constraint(ALLOC_IN_RC(any_reg)); match(RegP); match(eAXRegP); match(eBXRegP); match(eCXRegP); match(eDIRegP); match(eRegP); format %{ %} interface(REG_INTER);%}operand eRegP() %{ constraint(ALLOC_IN_RC(int_reg)); match(RegP); match(eAXRegP); match(eBXRegP); match(eCXRegP); match(eDIRegP); format %{ %} interface(REG_INTER);%}// On windows95, EBP is not safe to use for implicit null tests.operand eRegP_no_EBP() %{ constraint(ALLOC_IN_RC(int_reg_no_rbp)); match(RegP); match(eAXRegP); match(eBXRegP); match(eCXRegP); match(eDIRegP); op_cost(100); format %{ %} interface(REG_INTER);%}operand naxRegP() %{ constraint(ALLOC_IN_RC(nax_reg)); match(RegP); match(eBXRegP); match(eDXRegP); match(eCXRegP); match(eSIRegP); match(eDIRegP); format %{ %} interface(REG_INTER);%}operand nabxRegP() %{ constraint(ALLOC_IN_RC(nabx_reg)); match(RegP); match(eCXRegP); match(eDXRegP); match(eSIRegP); match(eDIRegP); format %{ %} interface(REG_INTER);%}operand pRegP() %{ constraint(ALLOC_IN_RC(p_reg)); match(RegP); match(eBXRegP); match(eDXRegP); match(eSIRegP); match(eDIRegP); format %{ %} interface(REG_INTER);%}// Special Registers// Return a pointer valueoperand eAXRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(eax_reg)); match(reg); format %{ "EAX" %} interface(REG_INTER);%}// Used in AtomicAddoperand eBXRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(ebx_reg)); match(reg); format %{ "EBX" %} interface(REG_INTER);%}// Tail-call (interprocedural jump) to interpreteroperand eCXRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(ecx_reg)); match(reg); format %{ "ECX" %} interface(REG_INTER);%}operand eSIRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(esi_reg)); match(reg); format %{ "ESI" %} interface(REG_INTER);%}// Used in rep stoswoperand eDIRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(edi_reg)); match(reg); format %{ "EDI" %} interface(REG_INTER);%}operand eBPRegP() %{ constraint(ALLOC_IN_RC(ebp_reg)); match(RegP); format %{ "EBP" %} interface(REG_INTER);%}operand eRegL() %{ constraint(ALLOC_IN_RC(long_reg)); match(RegL); match(eADXRegL); format %{ %} interface(REG_INTER);%}operand eADXRegL( eRegL reg ) %{ constraint(ALLOC_IN_RC(eadx_reg)); match(reg); format %{ "EDX:EAX" %} interface(REG_INTER);%}operand eBCXRegL( eRegL reg ) %{ constraint(ALLOC_IN_RC(ebcx_reg)); match(reg); format %{ "EBX:ECX" %} interface(REG_INTER);%}// Special case for integer high multiplyoperand eADXRegL_low_only() %{ constraint(ALLOC_IN_RC(eadx_reg)); match(RegL); format %{ "EAX" %} interface(REG_INTER);%}// Flags register, used as output of compare instructionsoperand eFlagsReg() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); format %{ "EFLAGS" %} interface(REG_INTER);%}// Flags register, used as output of FLOATING POINT compare instructionsoperand eFlagsRegU() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); format %{ "EFLAGS_U" %} interface(REG_INTER);%}operand eFlagsRegUCF() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); predicate(false); format %{ "EFLAGS_U_CF" %} interface(REG_INTER);%}// Condition Code Register used by long compareoperand flagsReg_long_LTGE() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); format %{ "FLAGS_LTGE" %} interface(REG_INTER);%}operand flagsReg_long_EQNE() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); format %{ "FLAGS_EQNE" %} interface(REG_INTER);%}operand flagsReg_long_LEGT() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); format %{ "FLAGS_LEGT" %} interface(REG_INTER);%}// Float register operandsoperand regDPR() %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_dbl_reg)); match(RegD); match(regDPR1); match(regDPR2); format %{ %} interface(REG_INTER);%}operand regDPR1(regDPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_dbl_reg0)); match(reg); format %{ "FPR1" %} interface(REG_INTER);%}operand regDPR2(regDPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_dbl_reg1)); match(reg); format %{ "FPR2" %} interface(REG_INTER);%}operand regnotDPR1(regDPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_dbl_notreg0)); match(reg); format %{ %} interface(REG_INTER);%}// Float register operandsoperand regFPR() %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_flt_reg)); match(RegF); match(regFPR1); format %{ %} interface(REG_INTER);%}// Float register operandsoperand regFPR1(regFPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_flt_reg0)); match(reg); format %{ "FPR1" %} interface(REG_INTER);%}// XMM Float register operandsoperand regF() %{ predicate( UseSSE>=1 ); constraint(ALLOC_IN_RC(float_reg)); match(RegF); format %{ %} interface(REG_INTER);%}// XMM Double register operandsoperand regD() %{ predicate( UseSSE>=2 ); constraint(ALLOC_IN_RC(double_reg)); match(RegD); format %{ %} interface(REG_INTER);%}//----------Memory Operands----------------------------------------------------// Direct Memory Operandoperand direct(immP addr) %{ match(addr); format %{ "[$addr]" %} interface(MEMORY_INTER) %{ base(0xFFFFFFFF); index(0x4); scale(0x0); disp($addr); %}%}// Indirect Memory Operandoperand indirect(eRegP reg) %{ constraint(ALLOC_IN_RC(int_reg)); match(reg); format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp(0x0); %}%}// Indirect Memory Plus Short Offset Operandoperand indOffset8(eRegP reg, immI8 off) %{ match(AddP reg off); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %}%}// Indirect Memory Plus Long Offset Operandoperand indOffset32(eRegP reg, immI off) %{ match(AddP reg off); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %}%}// Indirect Memory Plus Long Offset Operandoperand indOffset32X(rRegI reg, immP off) %{ match(AddP off reg); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %}%}// Indirect Memory Plus Index Register Plus Offset Operandoperand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ match(AddP (AddP reg ireg) off); op_cost(10); format %{"[$reg + $off + $ireg]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale(0x0); disp($off); %}%}// Indirect Memory Plus Index Register Plus Offset Operandoperand indIndex(eRegP reg, rRegI ireg) %{ match(AddP reg ireg); op_cost(10); format %{"[$reg + $ireg]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale(0x0); disp(0x0); %}%}// // -------------------------------------------------------------------------// // 486 architecture doesn't support "scale * index + offset" with out a base// // -------------------------------------------------------------------------// // Scaled Memory Operands// // Indirect Memory Times Scale Plus Offset Operand// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{// match(AddP off (LShiftI ireg scale));//// op_cost(10);// format %{"[$off + $ireg << $scale]" %}// interface(MEMORY_INTER) %{// base(0x4);// index($ireg);// scale($scale);// disp($off);// %}// %}// Indirect Memory Times Scale Plus Index Registeroperand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ match(AddP reg (LShiftI ireg scale)); op_cost(10); format %{"[$reg + $ireg << $scale]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale($scale); disp(0x0); %}%}// Indirect Memory Times Scale Plus Index Register Plus Offset Operandoperand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ match(AddP (AddP reg (LShiftI ireg scale)) off); op_cost(10); format %{"[$reg + $off + $ireg << $scale]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale($scale); disp($off); %}%}//----------Load Long Memory Operands------------------------------------------// The load-long idiom will use it's address expression again after loading// the first word of the long. If the load-long destination overlaps with// registers used in the addressing expression, the 2nd half will be loaded// from a clobbered address. Fix this by requiring that load-long use// address registers that do not overlap with the load-long target.// load-long supportoperand load_long_RegP() %{ constraint(ALLOC_IN_RC(esi_reg)); match(RegP); match(eSIRegP); op_cost(100); format %{ %} interface(REG_INTER);%}// Indirect Memory Operand Longoperand load_long_indirect(load_long_RegP reg) %{ constraint(ALLOC_IN_RC(esi_reg)); match(reg); format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp(0x0); %}%}// Indirect Memory Plus Long Offset Operandoperand load_long_indOffset32(load_long_RegP reg, immI off) %{ match(AddP reg off); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %}%}opclass load_long_memory(load_long_indirect, load_long_indOffset32);//----------Special Memory Operands--------------------------------------------// Stack Slot Operand - This operand is used for loading and storing temporary// values on the stack where a match requires a value to// flow through memory.operand stackSlotP(sRegP reg) %{ constraint(ALLOC_IN_RC(stack_slots)); // No match rule because this operand is only generated in matching format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base(0x4); // ESP index(0x4); // No Index scale(0x0); // No Scale disp($reg); // Stack Offset %}%}operand stackSlotI(sRegI reg) %{ constraint(ALLOC_IN_RC(stack_slots)); // No match rule because this operand is only generated in matching format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base(0x4); // ESP index(0x4); // No Index scale(0x0); // No Scale disp($reg); // Stack Offset %}%}operand stackSlotF(sRegF reg) %{ constraint(ALLOC_IN_RC(stack_slots)); // No match rule because this operand is only generated in matching format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base(0x4); // ESP index(0x4); // No Index scale(0x0); // No Scale disp($reg); // Stack Offset %}%}operand stackSlotD(sRegD reg) %{ constraint(ALLOC_IN_RC(stack_slots)); // No match rule because this operand is only generated in matching format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base(0x4); // ESP index(0x4); // No Index scale(0x0); // No Scale disp($reg); // Stack Offset %}%}operand stackSlotL(sRegL reg) %{ constraint(ALLOC_IN_RC(stack_slots)); // No match rule because this operand is only generated in matching format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base(0x4); // ESP index(0x4); // No Index scale(0x0); // No Scale disp($reg); // Stack Offset %}%}//----------Memory Operands - Win95 Implicit Null Variants----------------// Indirect Memory Operandoperand indirect_win95_safe(eRegP_no_EBP reg)%{ constraint(ALLOC_IN_RC(int_reg)); match(reg); op_cost(100); format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp(0x0); %}%}// Indirect Memory Plus Short Offset Operandoperand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)%{ match(AddP reg off); op_cost(100); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %}%}// Indirect Memory Plus Long Offset Operandoperand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)%{ match(AddP reg off); op_cost(100); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %}%}// Indirect Memory Plus Index Register Plus Offset Operandoperand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)%{ match(AddP (AddP reg ireg) off); op_cost(100); format %{"[$reg + $off + $ireg]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale(0x0); disp($off); %}%}// Indirect Memory Times Scale Plus Index Registeroperand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)%{ match(AddP reg (LShiftI ireg scale)); op_cost(100); format %{"[$reg + $ireg << $scale]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale($scale); disp(0x0); %}%}// Indirect Memory Times Scale Plus Index Register Plus Offset Operandoperand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)%{ match(AddP (AddP reg (LShiftI ireg scale)) off); op_cost(100); format %{"[$reg + $off + $ireg << $scale]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale($scale); disp($off); %}%}//----------Conditional Branch Operands----------------------------------------// Comparison Op - This is the operation of the comparison, and is limited to// the following set of codes:// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)//// Other attributes of the comparison, such as unsignedness, are specified// by the comparison instruction that sets a condition code flags register.// That result is represented by a flags operand whose subtype is appropriate// to the unsignedness (etc.) of the comparison.//// Later, the instruction which matches both the Comparison Op (a Bool) and// the flags (produced by the Cmp) specifies the coding of the comparison op// by matching a specific subtype of Bool operand below, such as cmpOpU.// Comparision Codeoperand cmpOp() %{ match(Bool); format %{ "" %} interface(COND_INTER) %{ equal(0x4, "e"); not_equal(0x5, "ne"); less(0xC, "l"); greater_equal(0xD, "ge"); less_equal(0xE, "le"); greater(0xF, "g"); %}%}// Comparison Code, unsigned compare. Used by FP also, with// C2 (unordered) turned into GT or LT already. The other bits// C0 and C3 are turned into Carry & Zero flags.operand cmpOpU() %{ match(Bool); format %{ "" %} interface(COND_INTER) %{ equal(0x4, "e"); not_equal(0x5, "ne"); less(0x2, "b"); greater_equal(0x3, "nb"); less_equal(0x6, "be"); greater(0x7, "nbe"); %}%}// Floating comparisons that don't require any fixup for the unordered caseoperand cmpOpUCF() %{ match(Bool); predicate(n->as_Bool()->_test._test == BoolTest::lt || n->as_Bool()->_test._test == BoolTest::ge || n->as_Bool()->_test._test == BoolTest::le || n->as_Bool()->_test._test == BoolTest::gt); format %{ "" %} interface(COND_INTER) %{ equal(0x4, "e"); not_equal(0x5, "ne"); less(0x2, "b"); greater_equal(0x3, "nb"); less_equal(0x6, "be"); greater(0x7, "nbe"); %}%}// Floating comparisons that can be fixed up with extra conditional jumpsoperand cmpOpUCF2() %{ match(Bool); predicate(n->as_Bool()->_test._test == BoolTest::ne || n->as_Bool()->_test._test == BoolTest::eq); format %{ "" %} interface(COND_INTER) %{ equal(0x4, "e"); not_equal(0x5, "ne"); less(0x2, "b"); greater_equal(0x3, "nb"); less_equal(0x6, "be"); greater(0x7, "nbe"); %}%}// Comparison Code for FP conditional moveoperand cmpOp_fcmov() %{ match(Bool); format %{ "" %} interface(COND_INTER) %{ equal (0x0C8); not_equal (0x1C8); less (0x0C0); greater_equal(0x1C0); less_equal (0x0D0); greater (0x1D0); %}%}// Comparision Code used in long comparesoperand cmpOp_commute() %{ match(Bool); format %{ "" %} interface(COND_INTER) %{ equal(0x4, "e"); not_equal(0x5, "ne"); less(0xF, "g"); greater_equal(0xE, "le"); less_equal(0xD, "ge"); greater(0xC, "l"); %}%}//----------OPERAND CLASSES----------------------------------------------------// Operand Classes are groups of operands that are used as to simplify// instruction definitions by not requiring the AD writer to specify separate// instructions for every form of operand when the instruction accepts// multiple operand types with the same basic encoding and format. The classic// case of this is memory operands.opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, indIndex, indIndexScale, indIndexScaleOffset);// Long memory operations are encoded in 2 instructions and a +4 offset.// This means some kind of offset is always required and you cannot use// an oop as the offset (done when working on static globals).opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, indIndex, indIndexScale, indIndexScaleOffset);//----------PIPELINE-----------------------------------------------------------// Rules which define the behavior of the target architectures pipeline.pipeline %{//----------ATTRIBUTES---------------------------------------------------------attributes %{ variable_size_instructions; // Fixed size instructions max_instructions_per_bundle = 3; // Up to 3 instructions per bundle instruction_unit_size = 1; // An instruction is 1 bytes long instruction_fetch_unit_size = 16; // The processor fetches one line instruction_fetch_units = 1; // of 16 bytes // List of nop instructions nops( MachNop );%}//----------RESOURCES----------------------------------------------------------// Resources are the functional units available to the machine// Generic P2/P3 pipeline// 3 decoders, only D0 handles big operands; a "bundle" is the limit of// 3 instructions decoded per cycle.// 2 load/store ops per cycle, 1 branch, 1 FPU,// 2 ALU op, only ALU0 handles mul/div instructions.resources( D0, D1, D2, DECODE = D0 | D1 | D2, MS0, MS1, MEM = MS0 | MS1, BR, FPU, ALU0, ALU1, ALU = ALU0 | ALU1 );//----------PIPELINE DESCRIPTION-----------------------------------------------// Pipeline Description specifies the stages in the machine's pipeline// Generic P2/P3 pipelinepipe_desc(S0, S1, S2, S3, S4, S5);//----------PIPELINE CLASSES---------------------------------------------------// Pipeline Classes describe the stages in which input and output are// referenced by the hardware pipeline.// Naming convention: ialu or fpu// Then: _reg// Then: _reg if there is a 2nd register// Then: _long if it's a pair of instructions implementing a long// Then: _fat if it requires the big decoder// Or: _mem if it requires the big decoder and a memory unit.// Integer ALU reg operationpipe_class ialu_reg(rRegI dst) %{ single_instruction; dst : S4(write); dst : S3(read); DECODE : S0; // any decoder ALU : S3; // any alu%}// Long ALU reg operationpipe_class ialu_reg_long(eRegL dst) %{ instruction_count(2); dst : S4(write); dst : S3(read); DECODE : S0(2); // any 2 decoders ALU : S3(2); // both alus%}// Integer ALU reg operation using big decoderpipe_class ialu_reg_fat(rRegI dst) %{ single_instruction; dst : S4(write); dst : S3(read); D0 : S0; // big decoder only ALU : S3; // any alu%}// Long ALU reg operation using big decoderpipe_class ialu_reg_long_fat(eRegL dst) %{ instruction_count(2); dst : S4(write); dst : S3(read); D0 : S0(2); // big decoder only; twice ALU : S3(2); // any 2 alus%}// Integer ALU reg-reg operationpipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ single_instruction; dst : S4(write); src : S3(read); DECODE : S0; // any decoder ALU : S3; // any alu%}// Long ALU reg-reg operationpipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ instruction_count(2); dst : S4(write); src : S3(read); DECODE : S0(2); // any 2 decoders ALU : S3(2); // both alus%}// Integer ALU reg-reg operationpipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ single_instruction; dst : S4(write); src : S3(read); D0 : S0; // big decoder only ALU : S3; // any alu%}// Long ALU reg-reg operationpipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ instruction_count(2); dst : S4(write); src : S3(read); D0 : S0(2); // big decoder only; twice ALU : S3(2); // both alus%}// Integer ALU reg-mem operationpipe_class ialu_reg_mem(rRegI dst, memory mem) %{ single_instruction; dst : S5(write); mem : S3(read); D0 : S0; // big decoder only ALU : S4; // any alu MEM : S3; // any mem%}// Long ALU reg-mem operationpipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ instruction_count(2); dst : S5(write); mem : S3(read); D0 : S0(2); // big decoder only; twice ALU : S4(2); // any 2 alus MEM : S3(2); // both mems%}// Integer mem operation (prefetch)pipe_class ialu_mem(memory mem)%{ single_instruction; mem : S3(read); D0 : S0; // big decoder only MEM : S3; // any mem%}// Integer Store to Memorypipe_class ialu_mem_reg(memory mem, rRegI src) %{ single_instruction; mem : S3(read); src : S5(read); D0 : S0; // big decoder only ALU : S4; // any alu MEM : S3;%}// Long Store to Memorypipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ instruction_count(2); mem : S3(read); src : S5(read); D0 : S0(2); // big decoder only; twice ALU : S4(2); // any 2 alus MEM : S3(2); // Both mems%}// Integer Store to Memorypipe_class ialu_mem_imm(memory mem) %{ single_instruction; mem : S3(read); D0 : S0; // big decoder only ALU : S4; // any alu MEM : S3;%}// Integer ALU0 reg-reg operationpipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ single_instruction; dst : S4(write); src : S3(read); D0 : S0; // Big decoder only ALU0 : S3; // only alu0%}// Integer ALU0 reg-mem operationpipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ single_instruction; dst : S5(write); mem : S3(read); D0 : S0; // big decoder only ALU0 : S4; // ALU0 only MEM : S3; // any mem%}// Integer ALU reg-reg operationpipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ single_instruction; cr : S4(write); src1 : S3(read); src2 : S3(read); DECODE : S0; // any decoder ALU : S3; // any alu%}// Integer ALU reg-imm operationpipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ single_instruction; cr : S4(write); src1 : S3(read); DECODE : S0; // any decoder ALU : S3; // any alu%}// Integer ALU reg-mem operationpipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ single_instruction; cr : S4(write); src1 : S3(read); src2 : S3(read); D0 : S0; // big decoder only ALU : S4; // any alu MEM : S3;%}// Conditional move reg-regpipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ instruction_count(4); y : S4(read); q : S3(read); p : S3(read); DECODE : S0(4); // any decoder%}// Conditional move reg-regpipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ single_instruction; dst : S4(write); src : S3(read); cr : S3(read); DECODE : S0; // any decoder%}// Conditional move reg-mempipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ single_instruction; dst : S4(write); src : S3(read); cr : S3(read); DECODE : S0; // any decoder MEM : S3;%}// Conditional move reg-reg longpipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ single_instruction; dst : S4(write); src : S3(read); cr : S3(read); DECODE : S0(2); // any 2 decoders%}// Conditional move double reg-regpipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ single_instruction; dst : S4(write); src : S3(read); cr : S3(read); DECODE : S0; // any decoder%}// Float reg-reg operationpipe_class fpu_reg(regDPR dst) %{ instruction_count(2); dst : S3(read); DECODE : S0(2); // any 2 decoders FPU : S3;%}// Float reg-reg operationpipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ instruction_count(2); dst : S4(write); src : S3(read); DECODE : S0(2); // any 2 decoders FPU : S3;%}// Float reg-reg operationpipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ instruction_count(3); dst : S4(write); src1 : S3(read); src2 : S3(read); DECODE : S0(3); // any 3 decoders FPU : S3(2);%}// Float reg-reg operationpipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ instruction_count(4); dst : S4(write); src1 : S3(read); src2 : S3(read); src3 : S3(read); DECODE : S0(4); // any 3 decoders FPU : S3(2);%}// Float reg-reg operationpipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ instruction_count(4); dst : S4(write); src1 : S3(read); src2 : S3(read); src3 : S3(read); DECODE : S1(3); // any 3 decoders D0 : S0; // Big decoder only FPU : S3(2); MEM : S3;%}// Float reg-mem operationpipe_class fpu_reg_mem(regDPR dst, memory mem) %{ instruction_count(2); dst : S5(write); mem : S3(read); D0 : S0; // big decoder only DECODE : S1; // any decoder for FPU POP FPU : S4; MEM : S3; // any mem%}// Float reg-mem operationpipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ instruction_count(3); dst : S5(write); src1 : S3(read); mem : S3(read); D0 : S0; // big decoder only DECODE : S1(2); // any decoder for FPU POP FPU : S4; MEM : S3; // any mem%}// Float mem-reg operationpipe_class fpu_mem_reg(memory mem, regDPR src) %{ instruction_count(2); src : S5(read); mem : S3(read); DECODE : S0; // any decoder for FPU PUSH D0 : S1; // big decoder only FPU : S4; MEM : S3; // any mem%}pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ instruction_count(3); src1 : S3(read); src2 : S3(read); mem : S3(read); DECODE : S0(2); // any decoder for FPU PUSH D0 : S1; // big decoder only FPU : S4; MEM : S3; // any mem%}pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ instruction_count(3); src1 : S3(read); src2 : S3(read); mem : S4(read); DECODE : S0; // any decoder for FPU PUSH D0 : S0(2); // big decoder only FPU : S4; MEM : S3(2); // any mem%}pipe_class fpu_mem_mem(memory dst, memory src1) %{ instruction_count(2); src1 : S3(read); dst : S4(read); D0 : S0(2); // big decoder only MEM : S3(2); // any mem%}pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ instruction_count(3); src1 : S3(read); src2 : S3(read); dst : S4(read); D0 : S0(3); // big decoder only FPU : S4; MEM : S3(3); // any mem%}pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ instruction_count(3); src1 : S4(read); mem : S4(read); DECODE : S0; // any decoder for FPU PUSH D0 : S0(2); // big decoder only FPU : S4; MEM : S3(2); // any mem%}// Float load constantpipe_class fpu_reg_con(regDPR dst) %{ instruction_count(2); dst : S5(write); D0 : S0; // big decoder only for the load DECODE : S1; // any decoder for FPU POP FPU : S4; MEM : S3; // any mem%}// Float load constantpipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ instruction_count(3); dst : S5(write); src : S3(read); D0 : S0; // big decoder only for the load DECODE : S1(2); // any decoder for FPU POP FPU : S4; MEM : S3; // any mem%}// UnConditional branchpipe_class pipe_jmp( label labl ) %{ single_instruction; BR : S3;%}// Conditional branchpipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ single_instruction; cr : S1(read); BR : S3;%}// Allocation idiompipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ instruction_count(1); force_serialization; fixed_latency(6); heap_ptr : S3(read); DECODE : S0(3); D0 : S2; MEM : S3; ALU : S3(2); dst : S5(write); BR : S5;%}// Generic big/slow expanded idiompipe_class pipe_slow( ) %{ instruction_count(10); multiple_bundles; force_serialization; fixed_latency(100); D0 : S0(2); MEM : S3(2);%}// The real do-nothing guypipe_class empty( ) %{ instruction_count(0);%}// Define the class for the Nop nodedefine %{ MachNop = empty;%}%}//----------INSTRUCTIONS-------------------------------------------------------//// match -- States which machine-independent subtree may be replaced// by this instruction.// ins_cost -- The estimated cost of this instruction is used by instruction// selection to identify a minimum cost tree of machine// instructions that matches a tree of machine-independent// instructions.// format -- A string providing the disassembly for this instruction.// The value of an instruction's operand may be inserted// by referring to it with a '$' prefix.// opcode -- Three instruction opcodes may be provided. These are referred// to within an encode class as $primary, $secondary, and $tertiary// respectively. The primary opcode is commonly used to// indicate the type of machine instruction, while secondary// and tertiary are often used for prefix options or addressing// modes.// ins_encode -- A list of encode classes with parameters. The encode class// name must have been defined in an 'enc_class' specification// in the encode section of the architecture description.//----------BSWAP-Instruction--------------------------------------------------instruct bytes_reverse_int(rRegI dst) %{ match(Set dst (ReverseBytesI dst)); format %{ "BSWAP $dst" %} opcode(0x0F, 0xC8); ins_encode( OpcP, OpcSReg(dst) ); ins_pipe( ialu_reg );%}instruct bytes_reverse_long(eRegL dst) %{ match(Set dst (ReverseBytesL dst)); format %{ "BSWAP $dst.lo\n\t" "BSWAP $dst.hi\n\t" "XCHG $dst.lo $dst.hi" %} ins_cost(125); ins_encode( bswap_long_bytes(dst) ); ins_pipe( ialu_reg_reg);%}instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ match(Set dst (ReverseBytesUS dst)); effect(KILL cr); format %{ "BSWAP $dst\n\t" "SHR $dst,16\n\t" %} ins_encode %{ __ bswapl($dst$$Register); __ shrl($dst$$Register, 16); %} ins_pipe( ialu_reg );%}instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ match(Set dst (ReverseBytesS dst)); effect(KILL cr); format %{ "BSWAP $dst\n\t" "SAR $dst,16\n\t" %} ins_encode %{ __ bswapl($dst$$Register); __ sarl($dst$$Register, 16); %} ins_pipe( ialu_reg );%}//---------- Zeros Count Instructions ------------------------------------------instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosI src)); effect(KILL cr); format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} ins_encode %{ __ lzcntl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg);%}instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(!UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosI src)); effect(KILL cr); format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" "JNZ skip\n\t" "MOV $dst, -1\n" "skip:\n\t" "NEG $dst\n\t" "ADD $dst, 31" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; Label skip; __ bsrl(Rdst, Rsrc); __ jccb(Assembler::notZero, skip); __ movl(Rdst, -1); __ bind(skip); __ negl(Rdst); __ addl(Rdst, BitsPerInt - 1); %} ins_pipe(ialu_reg);%}instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ predicate(UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosL src)); effect(TEMP dst, KILL cr); format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" "JNC done\n\t" "LZCNT $dst, $src.lo\n\t" "ADD $dst, 32\n" "done:" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; Label done; __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); __ jccb(Assembler::carryClear, done); __ lzcntl(Rdst, Rsrc); __ addl(Rdst, BitsPerInt); __ bind(done); %} ins_pipe(ialu_reg);%}instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ predicate(!UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosL src)); effect(TEMP dst, KILL cr); format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" "JZ msw_is_zero\n\t" "ADD $dst, 32\n\t" "JMP not_zero\n" "msw_is_zero:\n\t" "BSR $dst, $src.lo\n\t" "JNZ not_zero\n\t" "MOV $dst, -1\n" "not_zero:\n\t" "NEG $dst\n\t" "ADD $dst, 63\n" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; Label msw_is_zero; Label not_zero; __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); __ jccb(Assembler::zero, msw_is_zero); __ addl(Rdst, BitsPerInt); __ jmpb(not_zero); __ bind(msw_is_zero); __ bsrl(Rdst, Rsrc); __ jccb(Assembler::notZero, not_zero); __ movl(Rdst, -1); __ bind(not_zero); __ negl(Rdst); __ addl(Rdst, BitsPerLong - 1); %} ins_pipe(ialu_reg);%}instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (CountTrailingZerosI src)); effect(KILL cr); format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" "JNZ done\n\t" "MOV $dst, 32\n" "done:" %} ins_encode %{ Register Rdst = $dst$$Register; Label done; __ bsfl(Rdst, $src$$Register); __ jccb(Assembler::notZero, done); __ movl(Rdst, BitsPerInt); __ bind(done); %} ins_pipe(ialu_reg);%}instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ match(Set dst (CountTrailingZerosL src)); effect(TEMP dst, KILL cr); format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" "JNZ done\n\t" "BSF $dst, $src.hi\n\t" "JNZ msw_not_zero\n\t" "MOV $dst, 32\n" "msw_not_zero:\n\t" "ADD $dst, 32\n" "done:" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; Label msw_not_zero; Label done; __ bsfl(Rdst, Rsrc); __ jccb(Assembler::notZero, done); __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); __ jccb(Assembler::notZero, msw_not_zero); __ movl(Rdst, BitsPerInt); __ bind(msw_not_zero); __ addl(Rdst, BitsPerInt); __ bind(done); %} ins_pipe(ialu_reg);%}//---------- Population Count Instructions -------------------------------------instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI src)); effect(KILL cr); format %{ "POPCNT $dst, $src" %} ins_encode %{ __ popcntl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg);%}instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI (LoadI mem))); effect(KILL cr); format %{ "POPCNT $dst, $mem" %} ins_encode %{ __ popcntl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg);%}// Note: Long.bitCount(long) returns an int.instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountL src)); effect(KILL cr, TEMP tmp, TEMP dst); format %{ "POPCNT $dst, $src.lo\n\t" "POPCNT $tmp, $src.hi\n\t" "ADD $dst, $tmp" %} ins_encode %{ __ popcntl($dst$$Register, $src$$Register); __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); __ addl($dst$$Register, $tmp$$Register); %} ins_pipe(ialu_reg);%}// Note: Long.bitCount(long) returns an int.instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountL (LoadL mem))); effect(KILL cr, TEMP tmp, TEMP dst); format %{ "POPCNT $dst, $mem\n\t" "POPCNT $tmp, $mem+4\n\t" "ADD $dst, $tmp" %} ins_encode %{ //__ popcntl($dst$$Register, $mem$$Address$$first); //__ popcntl($tmp$$Register, $mem$$Address$$second); __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); __ addl($dst$$Register, $tmp$$Register); %} ins_pipe(ialu_reg);%}//----------Load/Store/Move Instructions---------------------------------------//----------Load Instructions--------------------------------------------------// Load Byte (8bit signed)instruct loadB(xRegI dst, memory mem) %{ match(Set dst (LoadB mem)); ins_cost(125); format %{ "MOVSX8 $dst,$mem\t# byte" %} ins_encode %{ __ movsbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Byte (8bit signed) into Long Registerinstruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ match(Set dst (ConvI2L (LoadB mem))); effect(KILL cr); ins_cost(375); format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" "MOV $dst.hi,$dst.lo\n\t" "SAR $dst.hi,7" %} ins_encode %{ __ movsbl($dst$$Register, $mem$$Address); __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. %} ins_pipe(ialu_reg_mem);%}// Load Unsigned Byte (8bit UNsigned)instruct loadUB(xRegI dst, memory mem) %{ match(Set dst (LoadUB mem)); ins_cost(125); format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} ins_encode %{ __ movzbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Unsigned Byte (8 bit UNsigned) into Long Registerinstruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ match(Set dst (ConvI2L (LoadUB mem))); effect(KILL cr); ins_cost(250); format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzbl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); %} ins_pipe(ialu_reg_mem);%}// Load Unsigned Byte (8 bit UNsigned) with mask into Long Registerinstruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); effect(KILL cr); format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t" "XOR $dst.hi,$dst.hi\n\t" "AND $dst.lo,$mask" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzbl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); __ andl(Rdst, $mask$$constant); %} ins_pipe(ialu_reg_mem);%}// Load Short (16bit signed)instruct loadS(rRegI dst, memory mem) %{ match(Set dst (LoadS mem)); ins_cost(125); format %{ "MOVSX $dst,$mem\t# short" %} ins_encode %{ __ movswl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Short (16 bit signed) to Byte (8 bit signed)instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ins_cost(125); format %{ "MOVSX $dst, $mem\t# short -> byte" %} ins_encode %{ __ movsbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Short (16bit signed) into Long Registerinstruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ match(Set dst (ConvI2L (LoadS mem))); effect(KILL cr); ins_cost(375); format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" "MOV $dst.hi,$dst.lo\n\t" "SAR $dst.hi,15" %} ins_encode %{ __ movswl($dst$$Register, $mem$$Address); __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. %} ins_pipe(ialu_reg_mem);%}// Load Unsigned Short/Char (16bit unsigned)instruct loadUS(rRegI dst, memory mem) %{ match(Set dst (LoadUS mem)); ins_cost(125); format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} ins_encode %{ __ movzwl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); ins_cost(125); format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} ins_encode %{ __ movsbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Unsigned Short/Char (16 bit UNsigned) into Long Registerinstruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ match(Set dst (ConvI2L (LoadUS mem))); effect(KILL cr); ins_cost(250); format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ __ movzwl($dst$$Register, $mem$$Address); __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); %} ins_pipe(ialu_reg_mem);%}// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Registerinstruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); effect(KILL cr); format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzbl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); %} ins_pipe(ialu_reg_mem);%}// Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Registerinstruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); effect(KILL cr); format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t" "XOR $dst.hi,$dst.hi\n\t" "AND $dst.lo,$mask" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzwl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); __ andl(Rdst, $mask$$constant); %} ins_pipe(ialu_reg_mem);%}// Load Integerinstruct loadI(rRegI dst, memory mem) %{ match(Set dst (LoadI mem)); ins_cost(125); format %{ "MOV $dst,$mem\t# int" %} ins_encode %{ __ movl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Integer (32 bit signed) to Byte (8 bit signed)instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ins_cost(125); format %{ "MOVSX $dst, $mem\t# int -> byte" %} ins_encode %{ __ movsbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ match(Set dst (AndI (LoadI mem) mask)); ins_cost(125); format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} ins_encode %{ __ movzbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Integer (32 bit signed) to Short (16 bit signed)instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ins_cost(125); format %{ "MOVSX $dst, $mem\t# int -> short" %} ins_encode %{ __ movswl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ match(Set dst (AndI (LoadI mem) mask)); ins_cost(125); format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} ins_encode %{ __ movzwl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem);%}// Load Integer into Long Registerinstruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ match(Set dst (ConvI2L (LoadI mem))); effect(KILL cr); ins_cost(375); format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" "MOV $dst.hi,$dst.lo\n\t" "SAR $dst.hi,31" %} ins_encode %{ __ movl($dst$$Register, $mem$$Address); __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. __ sarl(HIGH_FROM_LOW($dst$$Register), 31); %} ins_pipe(ialu_reg_mem);%}// Load Integer with mask 0xFF into Long Registerinstruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadI mem) mask))); effect(KILL cr); format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzbl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); %} ins_pipe(ialu_reg_mem);%}// Load Integer with mask 0xFFFF into Long Registerinstruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadI mem) mask))); effect(KILL cr); format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzwl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); %} ins_pipe(ialu_reg_mem);%}// Load Integer with 32-bit mask into Long Registerinstruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadI mem) mask))); effect(KILL cr); format %{ "MOV $dst.lo,$mem\t# int & 32-bit mask -> long\n\t" "XOR $dst.hi,$dst.hi\n\t" "AND $dst.lo,$mask" %} ins_encode %{ Register Rdst = $dst$$Register; __ movl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); __ andl(Rdst, $mask$$constant); %} ins_pipe(ialu_reg_mem);%}// Load Unsigned Integer into Long Registerinstruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); effect(KILL cr); ins_cost(250); format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ __ movl($dst$$Register, $mem$$Address); __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); %} ins_pipe(ialu_reg_mem);%}// Load Long. Cannot clobber address while loading, so restrict address// register to ESIinstruct loadL(eRegL dst, load_long_memory mem) %{ predicate(!((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); ins_cost(250); format %{ "MOV $dst.lo,$mem\t# long\n\t" "MOV $dst.hi,$mem+4" %} ins_encode %{ Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); __ movl($dst$$Register, Amemlo); __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); %} ins_pipe(ialu_reg_long_mem);%}// Volatile Load Long. Must be atomic, so do 64-bit FILD// then store it down to the stack and reload on the int// side.instruct loadL_volatile(stackSlotL dst, memory mem) %{ predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); ins_cost(200); format %{ "FILD $mem\t# Atomic volatile long load\n\t" "FISTp $dst" %} ins_encode(enc_loadL_volatile(mem,dst)); ins_pipe( fpu_reg_mem );%}instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); effect(TEMP tmp); ins_cost(180); format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" "MOVSD $dst,$tmp" %} ins_encode %{ __ movdbl($tmp$$XMMRegister, $mem$$Address); __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); %} ins_pipe( pipe_slow );%}instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); effect(TEMP tmp); ins_cost(160); format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" "MOVD $dst.lo,$tmp\n\t" "PSRLQ $tmp,32\n\t" "MOVD $dst.hi,$tmp" %} ins_encode %{ __ movdbl($tmp$$XMMRegister, $mem$$Address); __ movdl($dst$$Register, $tmp$$XMMRegister); __ psrlq($tmp$$XMMRegister, 32); __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); %} ins_pipe( pipe_slow );%}// Load Rangeinstruct loadRange(rRegI dst, memory mem) %{ match(Set dst (LoadRange mem)); ins_cost(125); format %{ "MOV $dst,$mem" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_mem );%}// Load Pointerinstruct loadP(eRegP dst, memory mem) %{ match(Set dst (LoadP mem)); ins_cost(125); format %{ "MOV $dst,$mem" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_mem );%}// Load Klass Pointerinstruct loadKlass(eRegP dst, memory mem) %{ match(Set dst (LoadKlass mem)); ins_cost(125); format %{ "MOV $dst,$mem" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_mem );%}// Load Doubleinstruct loadDPR(regDPR dst, memory mem) %{ predicate(UseSSE<=1); match(Set dst (LoadD mem)); ins_cost(150); format %{ "FLD_D ST,$mem\n\t" "FSTP $dst" %} opcode(0xDD); /* DD /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_mem );%}// Load Double to XMMinstruct loadD(regD dst, memory mem) %{ predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); match(Set dst (LoadD mem)); ins_cost(145); format %{ "MOVSD $dst,$mem" %} ins_encode %{ __ movdbl ($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow );%}instruct loadD_partial(regD dst, memory mem) %{ predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); match(Set dst (LoadD mem)); ins_cost(145); format %{ "MOVLPD $dst,$mem" %} ins_encode %{ __ movdbl ($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow );%}// Load to XMM register (single-precision floating point)// MOVSS instructioninstruct loadF(regF dst, memory mem) %{ predicate(UseSSE>=1); match(Set dst (LoadF mem)); ins_cost(145); format %{ "MOVSS $dst,$mem" %} ins_encode %{ __ movflt ($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow );%}// Load Floatinstruct loadFPR(regFPR dst, memory mem) %{ predicate(UseSSE==0); match(Set dst (LoadF mem)); ins_cost(150); format %{ "FLD_S ST,$mem\n\t" "FSTP $dst" %} opcode(0xD9); /* D9 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem );%}// Load Effective Addressinstruct leaP8(eRegP dst, indOffset8 mem) %{ match(Set dst mem); ins_cost(110); format %{ "LEA $dst,$mem" %} opcode(0x8D); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_reg_fat );%}instruct leaP32(eRegP dst, indOffset32 mem) %{ match(Set dst mem); ins_cost(110); format %{ "LEA $dst,$mem" %} opcode(0x8D); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_reg_fat );%}instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ match(Set dst mem); ins_cost(110); format %{ "LEA $dst,$mem" %} opcode(0x8D); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_reg_fat );%}instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ match(Set dst mem); ins_cost(110); format %{ "LEA $dst,$mem" %} opcode(0x8D); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_reg_fat );%}instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ match(Set dst mem); ins_cost(110); format %{ "LEA $dst,$mem" %} opcode(0x8D); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_reg_fat );%}// Load Constantinstruct loadConI(rRegI dst, immI src) %{ match(Set dst src); format %{ "MOV $dst,$src" %} ins_encode( LdImmI(dst, src) ); ins_pipe( ialu_reg_fat );%}// Load Constant zeroinstruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ match(Set dst src); effect(KILL cr); ins_cost(50); format %{ "XOR $dst,$dst" %} opcode(0x33); /* + rd */ ins_encode( OpcP, RegReg( dst, dst ) ); ins_pipe( ialu_reg );%}instruct loadConP(eRegP dst, immP src) %{ match(Set dst src); format %{ "MOV $dst,$src" %} opcode(0xB8); /* + rd */ ins_encode( LdImmP(dst, src) ); ins_pipe( ialu_reg_fat );%}instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst src); effect(KILL cr); ins_cost(200); format %{ "MOV $dst.lo,$src.lo\n\t" "MOV $dst.hi,$src.hi" %} opcode(0xB8); ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); ins_pipe( ialu_reg_long_fat );%}instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ match(Set dst src); effect(KILL cr); ins_cost(150); format %{ "XOR $dst.lo,$dst.lo\n\t" "XOR $dst.hi,$dst.hi" %} opcode(0x33,0x33); ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); ins_pipe( ialu_reg_long );%}// The instruction usage is guarded by predicate in operand immFPR().instruct loadConFPR(regFPR dst, immFPR con) %{ match(Set dst con); ins_cost(125); format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" "FSTP $dst" %} ins_encode %{ __ fld_s($constantaddress($con)); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con);%}// The instruction usage is guarded by predicate in operand immFPR0().instruct loadConFPR0(regFPR dst, immFPR0 con) %{ match(Set dst con); ins_cost(125); format %{ "FLDZ ST\n\t" "FSTP $dst" %} ins_encode %{ __ fldz(); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con);%}// The instruction usage is guarded by predicate in operand immFPR1().instruct loadConFPR1(regFPR dst, immFPR1 con) %{ match(Set dst con); ins_cost(125); format %{ "FLD1 ST\n\t" "FSTP $dst" %} ins_encode %{ __ fld1(); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con);%}// The instruction usage is guarded by predicate in operand immF().instruct loadConF(regF dst, immF con) %{ match(Set dst con); ins_cost(125); format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} ins_encode %{ __ movflt($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow);%}// The instruction usage is guarded by predicate in operand immF0().instruct loadConF0(regF dst, immF0 src) %{ match(Set dst src); ins_cost(100); format %{ "XORPS $dst,$dst\t# float 0.0" %} ins_encode %{ __ xorps($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe(pipe_slow);%}// The instruction usage is guarded by predicate in operand immDPR().instruct loadConDPR(regDPR dst, immDPR con) %{ match(Set dst con); ins_cost(125); format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" "FSTP $dst" %} ins_encode %{ __ fld_d($constantaddress($con)); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con);%}// The instruction usage is guarded by predicate in operand immDPR0().instruct loadConDPR0(regDPR dst, immDPR0 con) %{ match(Set dst con); ins_cost(125); format %{ "FLDZ ST\n\t" "FSTP $dst" %} ins_encode %{ __ fldz(); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con);%}// The instruction usage is guarded by predicate in operand immDPR1().instruct loadConDPR1(regDPR dst, immDPR1 con) %{ match(Set dst con); ins_cost(125); format %{ "FLD1 ST\n\t" "FSTP $dst" %} ins_encode %{ __ fld1(); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con);%}// The instruction usage is guarded by predicate in operand immD().instruct loadConD(regD dst, immD con) %{ match(Set dst con); ins_cost(125); format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} ins_encode %{ __ movdbl($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow);%}// The instruction usage is guarded by predicate in operand immD0().instruct loadConD0(regD dst, immD0 src) %{ match(Set dst src); ins_cost(100); format %{ "XORPD $dst,$dst\t# double 0.0" %} ins_encode %{ __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow );%}// Load Stack Slotinstruct loadSSI(rRegI dst, stackSlotI src) %{ match(Set dst src); ins_cost(125); format %{ "MOV $dst,$src" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,src)); ins_pipe( ialu_reg_mem );%}instruct loadSSL(eRegL dst, stackSlotL src) %{ match(Set dst src); ins_cost(200); format %{ "MOV $dst,$src.lo\n\t" "MOV $dst+4,$src.hi" %} opcode(0x8B, 0x8B); ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); ins_pipe( ialu_mem_long_reg );%}// Load Stack Slotinstruct loadSSP(eRegP dst, stackSlotP src) %{ match(Set dst src); ins_cost(125); format %{ "MOV $dst,$src" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,src)); ins_pipe( ialu_reg_mem );%}// Load Stack Slotinstruct loadSSF(regFPR dst, stackSlotF src) %{ match(Set dst src); ins_cost(125); format %{ "FLD_S $src\n\t" "FSTP $dst" %} opcode(0xD9); /* D9 /0, FLD m32real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem );%}// Load Stack Slotinstruct loadSSD(regDPR dst, stackSlotD src) %{ match(Set dst src); ins_cost(125); format %{ "FLD_D $src\n\t" "FSTP $dst" %} opcode(0xDD); /* DD /0, FLD m64real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_mem );%}// Prefetch instructions.// Must be safe to execute with invalid address (cannot fault).instruct prefetchr0( memory mem ) %{ predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch()); match(PrefetchRead mem); ins_cost(0); size(0); format %{ "PREFETCHR (non-SSE is empty encoding)" %} ins_encode(); ins_pipe(empty);%}instruct prefetchr( memory mem ) %{ predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3); match(PrefetchRead mem); ins_cost(100); format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %} ins_encode %{ __ prefetchr($mem$$Address); %} ins_pipe(ialu_mem);%}instruct prefetchrNTA( memory mem ) %{ predicate(UseSSE>=1 && ReadPrefetchInstr==0); match(PrefetchRead mem); ins_cost(100); format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %} ins_encode %{ __ prefetchnta($mem$$Address); %} ins_pipe(ialu_mem);%}instruct prefetchrT0( memory mem ) %{ predicate(UseSSE>=1 && ReadPrefetchInstr==1); match(PrefetchRead mem); ins_cost(100); format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %} ins_encode %{ __ prefetcht0($mem$$Address); %} ins_pipe(ialu_mem);%}instruct prefetchrT2( memory mem ) %{ predicate(UseSSE>=1 && ReadPrefetchInstr==2); match(PrefetchRead mem); ins_cost(100); format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %} ins_encode %{ __ prefetcht2($mem$$Address); %} ins_pipe(ialu_mem);%}instruct prefetchw0( memory mem ) %{ predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch()); match(PrefetchWrite mem); ins_cost(0); size(0); format %{ "Prefetch (non-SSE is empty encoding)" %} ins_encode(); ins_pipe(empty);%}instruct prefetchw( memory mem ) %{ predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch()); match( PrefetchWrite mem ); ins_cost(100); format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %} ins_encode %{ __ prefetchw($mem$$Address); %} ins_pipe(ialu_mem);%}instruct prefetchwNTA( memory mem ) %{ predicate(UseSSE>=1); match(PrefetchWrite mem); ins_cost(100); format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %} ins_encode %{ __ prefetchnta($mem$$Address); %} ins_pipe(ialu_mem);%}// Prefetch instructions for allocation.instruct prefetchAlloc0( memory mem ) %{ predicate(UseSSE==0 && AllocatePrefetchInstr!=3); match(PrefetchAllocation mem); ins_cost(0); size(0); format %{ "Prefetch allocation (non-SSE is empty encoding)" %} ins_encode(); ins_pipe(empty);%}instruct prefetchAlloc( memory mem ) %{ predicate(AllocatePrefetchInstr==3); match( PrefetchAllocation mem ); ins_cost(100); format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} ins_encode %{ __ prefetchw($mem$$Address); %} ins_pipe(ialu_mem);%}instruct prefetchAllocNTA( memory mem ) %{ predicate(UseSSE>=1 && AllocatePrefetchInstr==0); match(PrefetchAllocation mem); ins_cost(100); format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} ins_encode %{ __ prefetchnta($mem$$Address); %} ins_pipe(ialu_mem);%}instruct prefetchAllocT0( memory mem ) %{ predicate(UseSSE>=1 && AllocatePrefetchInstr==1); match(PrefetchAllocation mem); ins_cost(100); format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} ins_encode %{ __ prefetcht0($mem$$Address); %} ins_pipe(ialu_mem);%}instruct prefetchAllocT2( memory mem ) %{ predicate(UseSSE>=1 && AllocatePrefetchInstr==2); match(PrefetchAllocation mem); ins_cost(100); format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} ins_encode %{ __ prefetcht2($mem$$Address); %} ins_pipe(ialu_mem);%}//----------Store Instructions-------------------------------------------------// Store Byteinstruct storeB(memory mem, xRegI src) %{ match(Set mem (StoreB mem src)); ins_cost(125); format %{ "MOV8 $mem,$src" %} opcode(0x88); ins_encode( OpcP, RegMem( src, mem ) ); ins_pipe( ialu_mem_reg );%}// Store Char/Shortinstruct storeC(memory mem, rRegI src) %{ match(Set mem (StoreC mem src)); ins_cost(125); format %{ "MOV16 $mem,$src" %} opcode(0x89, 0x66); ins_encode( OpcS, OpcP, RegMem( src, mem ) ); ins_pipe( ialu_mem_reg );%}// Store Integerinstruct storeI(memory mem, rRegI src) %{ match(Set mem (StoreI mem src)); ins_cost(125); format %{ "MOV $mem,$src" %} opcode(0x89); ins_encode( OpcP, RegMem( src, mem ) ); ins_pipe( ialu_mem_reg );%}// Store Longinstruct storeL(long_memory mem, eRegL src) %{ predicate(!((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); ins_cost(200); format %{ "MOV $mem,$src.lo\n\t" "MOV $mem+4,$src.hi" %} opcode(0x89, 0x89); ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); ins_pipe( ialu_mem_long_reg );%}// Store Long to Integerinstruct storeL2I(memory mem, eRegL src) %{ match(Set mem (StoreI mem (ConvL2I src))); format %{ "MOV $mem,$src.lo\t# long -> int" %} ins_encode %{ __ movl($mem$$Address, $src$$Register); %} ins_pipe(ialu_mem_reg);%}// Volatile Store Long. Must be atomic, so move it into// the FP TOS and then do a 64-bit FIST. Has to probe the// target address before the store (for null-ptr checks)// so the memory operand is used twice in the encoding.instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); effect( KILL cr ); ins_cost(400); format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" "FILD $src\n\t" "FISTp $mem\t # 64-bit atomic volatile long store" %} opcode(0x3B); ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); ins_pipe( fpu_reg_mem );%}instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); effect( TEMP tmp, KILL cr ); ins_cost(380); format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" "MOVSD $tmp,$src\n\t" "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} ins_encode %{ __ cmpl(rax, $mem$$Address); __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); __ movdbl($mem$$Address, $tmp$$XMMRegister); %} ins_pipe( pipe_slow );%}instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); effect( TEMP tmp2 , TEMP tmp, KILL cr ); ins_cost(360); format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" "MOVD $tmp,$src.lo\n\t" "MOVD $tmp2,$src.hi\n\t" "PUNPCKLDQ $tmp,$tmp2\n\t" "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} ins_encode %{ __ cmpl(rax, $mem$$Address); __ movdl($tmp$$XMMRegister, $src$$Register); __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdbl($mem$$Address, $tmp$$XMMRegister); %} ins_pipe( pipe_slow );%}// Store Pointer; for storing unknown oops and raw pointersinstruct storeP(memory mem, anyRegP src) %{ match(Set mem (StoreP mem src)); ins_cost(125); format %{ "MOV $mem,$src" %} opcode(0x89); ins_encode( OpcP, RegMem( src, mem ) ); ins_pipe( ialu_mem_reg );%}// Store Integer Immediateinstruct storeImmI(memory mem, immI src) %{ match(Set mem (StoreI mem src)); ins_cost(150); format %{ "MOV $mem,$src" %} opcode(0xC7); /* C7 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); ins_pipe( ialu_mem_imm );%}// Store Short/Char Immediateinstruct storeImmI16(memory mem, immI16 src) %{ predicate(UseStoreImmI16); match(Set mem (StoreC mem src)); ins_cost(150); format %{ "MOV16 $mem,$src" %} opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); ins_pipe( ialu_mem_imm );%}// Store Pointer Immediate; null pointers or constant oops that do not// need card-mark barriers.instruct storeImmP(memory mem, immP src) %{ match(Set mem (StoreP mem src)); ins_cost(150); format %{ "MOV $mem,$src" %} opcode(0xC7); /* C7 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); ins_pipe( ialu_mem_imm );%}// Store Byte Immediateinstruct storeImmB(memory mem, immI8 src) %{ match(Set mem (StoreB mem src)); ins_cost(150); format %{ "MOV8 $mem,$src" %} opcode(0xC6); /* C6 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); ins_pipe( ialu_mem_imm );%}// Store CMS card-mark Immediateinstruct storeImmCM(memory mem, immI8 src) %{ match(Set mem (StoreCM mem src)); ins_cost(150); format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} opcode(0xC6); /* C6 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); ins_pipe( ialu_mem_imm );%}// Store Doubleinstruct storeDPR( memory mem, regDPR1 src) %{ predicate(UseSSE<=1); match(Set mem (StoreD mem src)); ins_cost(100); format %{ "FST_D $mem,$src" %} opcode(0xDD); /* DD /2 */ ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg );%}// Store double does rounding on x86instruct storeDPR_rounded( memory mem, regDPR1 src) %{ predicate(UseSSE<=1); match(Set mem (StoreD mem (RoundDouble src))); ins_cost(100); format %{ "FST_D $mem,$src\t# round" %} opcode(0xDD); /* DD /2 */ ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg );%}// Store XMM register to memory (double-precision floating points)// MOVSD instructioninstruct storeD(memory mem, regD src) %{ predicate(UseSSE>=2); match(Set mem (StoreD mem src)); ins_cost(95); format %{ "MOVSD $mem,$src" %} ins_encode %{ __ movdbl($mem$$Address, $src$$XMMRegister); %} ins_pipe( pipe_slow );%}// Store XMM register to memory (single-precision floating point)// MOVSS instructioninstruct storeF(memory mem, regF src) %{ predicate(UseSSE>=1); match(Set mem (StoreF mem src)); ins_cost(95); format %{ "MOVSS $mem,$src" %} ins_encode %{ __ movflt($mem$$Address, $src$$XMMRegister); %} ins_pipe( pipe_slow );%}// Store Floatinstruct storeFPR( memory mem, regFPR1 src) %{ predicate(UseSSE==0); match(Set mem (StoreF mem src)); ins_cost(100); format %{ "FST_S $mem,$src" %} opcode(0xD9); /* D9 /2 */ ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg );%}// Store Float does rounding on x86instruct storeFPR_rounded( memory mem, regFPR1 src) %{ predicate(UseSSE==0); match(Set mem (StoreF mem (RoundFloat src))); ins_cost(100); format %{ "FST_S $mem,$src\t# round" %} opcode(0xD9); /* D9 /2 */ ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg );%}// Store Float does rounding on x86instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ predicate(UseSSE<=1); match(Set mem (StoreF mem (ConvD2F src))); ins_cost(100); format %{ "FST_S $mem,$src\t# D-round" %} opcode(0xD9); /* D9 /2 */ ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg );%}// Store immediate Float value (it is faster than store from FPU register)// The instruction usage is guarded by predicate in operand immFPR().instruct storeFPR_imm( memory mem, immFPR src) %{ match(Set mem (StoreF mem src)); ins_cost(50); format %{ "MOV $mem,$src\t# store float" %} opcode(0xC7); /* C7 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); ins_pipe( ialu_mem_imm );%}// Store immediate Float value (it is faster than store from XMM register)// The instruction usage is guarded by predicate in operand immF().instruct storeF_imm( memory mem, immF src) %{ match(Set mem (StoreF mem src)); ins_cost(50); format %{ "MOV $mem,$src\t# store float" %} opcode(0xC7); /* C7 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); ins_pipe( ialu_mem_imm );%}// Store Integer to stack slotinstruct storeSSI(stackSlotI dst, rRegI src) %{ match(Set dst src); ins_cost(100); format %{ "MOV $dst,$src" %} opcode(0x89); ins_encode( OpcPRegSS( dst, src ) ); ins_pipe( ialu_mem_reg );%}// Store Integer to stack slotinstruct storeSSP(stackSlotP dst, eRegP src) %{ match(Set dst src); ins_cost(100); format %{ "MOV $dst,$src" %} opcode(0x89); ins_encode( OpcPRegSS( dst, src ) ); ins_pipe( ialu_mem_reg );%}// Store Long to stack slotinstruct storeSSL(stackSlotL dst, eRegL src) %{ match(Set dst src); ins_cost(200); format %{ "MOV $dst,$src.lo\n\t" "MOV $dst+4,$src.hi" %} opcode(0x89, 0x89); ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); ins_pipe( ialu_mem_long_reg );%}//----------MemBar Instructions-----------------------------------------------// Memory barrier flavorsinstruct membar_acquire() %{ match(MemBarAcquire); ins_cost(400); size(0); format %{ "MEMBAR-acquire ! (empty encoding)" %} ins_encode(); ins_pipe(empty);%}instruct membar_acquire_lock() %{ match(MemBarAcquireLock); ins_cost(0); size(0); format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} ins_encode( ); ins_pipe(empty);%}instruct membar_release() %{ match(MemBarRelease); ins_cost(400); size(0); format %{ "MEMBAR-release ! (empty encoding)" %} ins_encode( ); ins_pipe(empty);%}instruct membar_release_lock() %{ match(MemBarReleaseLock); ins_cost(0); size(0); format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} ins_encode( ); ins_pipe(empty);%}instruct membar_volatile(eFlagsReg cr) %{ match(MemBarVolatile); effect(KILL cr); ins_cost(400); format %{ $$template if (os::is_MP()) { $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" } else { $$emit$$"MEMBAR-volatile ! (empty encoding)" } %} ins_encode %{ __ membar(Assembler::StoreLoad); %} ins_pipe(pipe_slow);%}instruct unnecessary_membar_volatile() %{ match(MemBarVolatile); predicate(Matcher::post_store_load_barrier(n)); ins_cost(0); size(0); format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} ins_encode( ); ins_pipe(empty);%}instruct membar_storestore() %{ match(MemBarStoreStore); ins_cost(0); size(0); format %{ "MEMBAR-storestore (empty encoding)" %} ins_encode( ); ins_pipe(empty);%}//----------Move Instructions--------------------------------------------------instruct castX2P(eAXRegP dst, eAXRegI src) %{ match(Set dst (CastX2P src)); format %{ "# X2P $dst, $src" %} ins_encode( /*empty encoding*/ ); ins_cost(0); ins_pipe(empty);%}instruct castP2X(rRegI dst, eRegP src ) %{ match(Set dst (CastP2X src)); ins_cost(50); format %{ "MOV $dst, $src\t# CastP2X" %} ins_encode( enc_Copy( dst, src) ); ins_pipe( ialu_reg_reg );%}//----------Conditional Move---------------------------------------------------// Conditional moveinstruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ predicate(!VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "J$cop,us skip\t# signed cmove\n\t" "MOV $dst,$src\n" "skip:" %} ins_encode %{ Label Lskip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); __ movl($dst$$Register, $src$$Register); __ bind(Lskip); %} ins_pipe( pipe_cmov_reg );%}instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ predicate(!VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "J$cop,us skip\t# unsigned cmove\n\t" "MOV $dst,$src\n" "skip:" %} ins_encode %{ Label Lskip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); __ movl($dst$$Register, $src$$Register); __ bind(Lskip); %} ins_pipe( pipe_cmov_reg );%}instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg );%}instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg );%}instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ cmovI_regU(cop, cr, dst, src); %}%}// Conditional moveinstruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); format %{ "CMOV$cop $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegMem( dst, src ) ); ins_pipe( pipe_cmov_mem );%}// Conditional moveinstruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); format %{ "CMOV$cop $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegMem( dst, src ) ); ins_pipe( pipe_cmov_mem );%}instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); expand %{ cmovI_memU(cop, cr, dst, src); %}%}// Conditional moveinstruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst,$src\t# ptr" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg );%}// Conditional move (non-P6 version)// Note: a CMoveP is generated for stubs and native wrappers// regardless of whether we are on a P6, so we// emulate a cmov hereinstruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); ins_cost(300); format %{ "Jn$cop skip\n\t" "MOV $dst,$src\t# pointer\n" "skip:" %} opcode(0x8b); ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); ins_pipe( pipe_cmov_reg );%}// Conditional moveinstruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst,$src\t# ptr" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg );%}instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ cmovP_regU(cop, cr, dst, src); %}%}// DISABLED: Requires the ADLC to emit a bottom_type call that// correctly meets the two pointer arguments; one is an incoming// register but the other is a memory operand. ALSO appears to// be buggy with implicit null checks.////// Conditional move//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{// predicate(VM_Version::supports_cmov() );// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));// ins_cost(250);// format %{ "CMOV$cop $dst,$src\t# ptr" %}// opcode(0x0F,0x40);// ins_encode( enc_cmov(cop), RegMem( dst, src ) );// ins_pipe( pipe_cmov_mem );//%}////// Conditional move//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{// predicate(VM_Version::supports_cmov() );// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));// ins_cost(250);// format %{ "CMOV$cop $dst,$src\t# ptr" %}// opcode(0x0F,0x40);// ins_encode( enc_cmov(cop), RegMem( dst, src ) );// ins_pipe( pipe_cmov_mem );//%}// Conditional moveinstruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "FCMOV$cop $dst,$src\t# double" %} opcode(0xDA); ins_encode( enc_cmov_dpr(cop,src) ); ins_pipe( pipe_cmovDPR_reg );%}// Conditional moveinstruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "FCMOV$cop $dst,$src\t# float" %} opcode(0xDA); ins_encode( enc_cmov_dpr(cop,src) ); ins_pipe( pipe_cmovDPR_reg );%}// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOV $dst,$src\t# double\n" "skip:" %} opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( pipe_cmovDPR_reg );%}// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOV $dst,$src\t# float\n" "skip:" %} opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( pipe_cmovDPR_reg );%}// No CMOVE with SSE/SSE2instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ predicate (UseSSE>=1); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOVSS $dst,$src\t# float\n" "skip:" %} ins_encode %{ Label skip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); __ movflt($dst$$XMMRegister, $src$$XMMRegister); __ bind(skip); %} ins_pipe( pipe_slow );%}// No CMOVE with SSE/SSE2instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ predicate (UseSSE>=2); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOVSD $dst,$src\t# float\n" "skip:" %} ins_encode %{ Label skip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); __ movdbl($dst$$XMMRegister, $src$$XMMRegister); __ bind(skip); %} ins_pipe( pipe_slow );%}// unsigned versioninstruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ predicate (UseSSE>=1); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOVSS $dst,$src\t# float\n" "skip:" %} ins_encode %{ Label skip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); __ movflt($dst$$XMMRegister, $src$$XMMRegister); __ bind(skip); %} ins_pipe( pipe_slow );%}instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ predicate (UseSSE>=1); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ fcmovF_regU(cop, cr, dst, src); %}%}// unsigned versioninstruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ predicate (UseSSE>=2); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOVSD $dst,$src\t# float\n" "skip:" %} ins_encode %{ Label skip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); __ movdbl($dst$$XMMRegister, $src$$XMMRegister); __ bind(skip); %} ins_pipe( pipe_slow );%}instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ predicate (UseSSE>=2); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ fcmovD_regU(cop, cr, dst, src); %}%}instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst.lo,$src.lo\n\t" "CMOV$cop $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); ins_pipe( pipe_cmov_reg_long );%}instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst.lo,$src.lo\n\t" "CMOV$cop $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); ins_pipe( pipe_cmov_reg_long );%}instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ cmovL_regU(cop, cr, dst, src); %}%}//----------Arithmetic Instructions--------------------------------------------//----------Addition Instructions----------------------------------------------// Integer Addition Instructionsinstruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AddI dst src)); effect(KILL cr); size(2); format %{ "ADD $dst,$src" %} opcode(0x03); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg );%}instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (AddI dst src)); effect(KILL cr); format %{ "ADD $dst,$src" %} opcode(0x81, 0x00); /* /0 id */ ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg );%}instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ predicate(UseIncDec); match(Set dst (AddI dst src)); effect(KILL cr); size(1); format %{ "INC $dst" %} opcode(0x40); /* */ ins_encode( Opc_plus( primary, dst ) ); ins_pipe( ialu_reg );%}instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ match(Set dst (AddI src0 src1)); ins_cost(110); format %{ "LEA $dst,[$src0 + $src1]" %} opcode(0x8D); /* 0x8D /r */ ins_encode( OpcP, RegLea( dst, src0, src1 ) ); ins_pipe( ialu_reg_reg );%}instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ match(Set dst (AddP src0 src1)); ins_cost(110); format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} opcode(0x8D); /* 0x8D /r */ ins_encode( OpcP, RegLea( dst, src0, src1 ) ); ins_pipe( ialu_reg_reg );%}instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ predicate(UseIncDec); match(Set dst (AddI dst src)); effect(KILL cr); size(1); format %{ "DEC $dst" %} opcode(0x48); /* */ ins_encode( Opc_plus( primary, dst ) ); ins_pipe( ialu_reg );%}instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AddP dst src)); effect(KILL cr); size(2); format %{ "ADD $dst,$src" %} opcode(0x03); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg );%}instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ match(Set dst (AddP dst src)); effect(KILL cr); format %{ "ADD $dst,$src" %} opcode(0x81,0x00); /* Opcode 81 /0 id */ // ins_encode( RegImm( dst, src) ); ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg );%}instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (AddI dst (LoadI src))); effect(KILL cr); ins_cost(125); format %{ "ADD $dst,$src" %} opcode(0x03); ins_encode( OpcP, RegMem( dst, src) ); ins_pipe( ialu_reg_mem );%}instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AddI (LoadI dst) src))); effect(KILL cr); ins_cost(150); format %{ "ADD $dst,$src" %} opcode(0x01); /* Opcode 01 /r */ ins_encode( OpcP, RegMem( src, dst ) ); ins_pipe( ialu_mem_reg );%}// Add Memory with Immediateinstruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AddI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "ADD $dst,$src" %} opcode(0x81); /* Opcode 81 /0 id */ ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); ins_pipe( ialu_mem_imm );%}instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AddI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "INC $dst" %} opcode(0xFF); /* Opcode FF /0 */ ins_encode( OpcP, RMopc_Mem(0x00,dst)); ins_pipe( ialu_mem_imm );%}instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AddI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "DEC $dst" %} opcode(0xFF); /* Opcode FF /1 */ ins_encode( OpcP, RMopc_Mem(0x01,dst)); ins_pipe( ialu_mem_imm );%}instruct checkCastPP( eRegP dst ) %{ match(Set dst (CheckCastPP dst)); size(0); format %{ "#checkcastPP of $dst" %} ins_encode( /*empty encoding*/ ); ins_pipe( empty );%}instruct castPP( eRegP dst ) %{ match(Set dst (CastPP dst)); format %{ "#castPP of $dst" %} ins_encode( /*empty encoding*/ ); ins_pipe( empty );%}instruct castII( rRegI dst ) %{ match(Set dst (CastII dst)); format %{ "#castII of $dst" %} ins_encode( /*empty encoding*/ ); ins_cost(0); ins_pipe( empty );%}// Load-locked - same as a regular pointer load when used with compare-swapinstruct loadPLocked(eRegP dst, memory mem) %{ match(Set dst (LoadPLocked mem)); ins_cost(125); format %{ "MOV $dst,$mem\t# Load ptr. locked" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_mem );%}// Conditional-store of the updated heap-top.// Used during allocation of the shared heap.// Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); // EAX is killed if there is contention, but then it's also unused. // In the common case of no contention, EAX holds the new oop address. format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); ins_pipe( pipe_cmpxchg );%}// Conditional-store of an int value.// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ match(Set cr (StoreIConditional mem (Binary oldval newval))); effect(KILL oldval); format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); ins_pipe( pipe_cmpxchg );%}// Conditional-store of a long value.// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel.instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ match(Set cr (StoreLConditional mem (Binary oldval newval))); effect(KILL oldval); format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" "XCHG EBX,ECX" %} ins_encode %{ // Note: we need to swap rbx, and rcx before and after the // cmpxchg8 instruction because the instruction uses // rcx as the high order word of the new value to store but // our register encoding uses rbx. __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); if( os::is_MP() ) __ lock(); __ cmpxchg8($mem$$Address); __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); %} ins_pipe( pipe_cmpxchg );%}// No flag versions for CompareAndSwap{P,I,L} because matcher can't match theminstruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ predicate(VM_Version::supports_cx8()); match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" "MOV $res,0\n\t" "JNE,s fail\n\t" "MOV $res,1\n" "fail:" %} ins_encode( enc_cmpxchg8(mem_ptr), enc_flags_ne_to_boolean(res) ); ins_pipe( pipe_cmpxchg );%}instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" "MOV $res,0\n\t" "JNE,s fail\n\t" "MOV $res,1\n" "fail:" %} ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); ins_pipe( pipe_cmpxchg );%}instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" "MOV $res,0\n\t" "JNE,s fail\n\t" "MOV $res,1\n" "fail:" %} ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); ins_pipe( pipe_cmpxchg );%}instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ predicate(n->as_LoadStore()->result_not_used()); match(Set dummy (GetAndAddI mem add)); effect(KILL cr); format %{ "ADDL [$mem],$add" %} ins_encode %{ if (os::is_MP()) { __ lock(); } __ addl($mem$$Address, $add$$constant); %} ins_pipe( pipe_cmpxchg );%}instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ match(Set newval (GetAndAddI mem newval)); effect(KILL cr); format %{ "XADDL [$mem],$newval" %} ins_encode %{ if (os::is_MP()) { __ lock(); } __ xaddl($mem$$Address, $newval$$Register); %} ins_pipe( pipe_cmpxchg );%}instruct xchgI( memory mem, rRegI newval) %{ match(Set newval (GetAndSetI mem newval)); format %{ "XCHGL $newval,[$mem]" %} ins_encode %{ __ xchgl($newval$$Register, $mem$$Address); %} ins_pipe( pipe_cmpxchg );%}instruct xchgP( memory mem, pRegP newval) %{ match(Set newval (GetAndSetP mem newval)); format %{ "XCHGL $newval,[$mem]" %} ins_encode %{ __ xchgl($newval$$Register, $mem$$Address); %} ins_pipe( pipe_cmpxchg );%}//----------Subtraction Instructions-------------------------------------------// Integer Subtraction Instructionsinstruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (SubI dst src)); effect(KILL cr); size(2); format %{ "SUB $dst,$src" %} opcode(0x2B); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg );%}instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (SubI dst src)); effect(KILL cr); format %{ "SUB $dst,$src" %} opcode(0x81,0x05); /* Opcode 81 /5 */ // ins_encode( RegImm( dst, src) ); ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg );%}instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (SubI dst (LoadI src))); effect(KILL cr); ins_cost(125); format %{ "SUB $dst,$src" %} opcode(0x2B); ins_encode( OpcP, RegMem( dst, src) ); ins_pipe( ialu_reg_mem );%}instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (SubI (LoadI dst) src))); effect(KILL cr); ins_cost(150); format %{ "SUB $dst,$src" %} opcode(0x29); /* Opcode 29 /r */ ins_encode( OpcP, RegMem( src, dst ) ); ins_pipe( ialu_mem_reg );%}// Subtract from a pointerinstruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ match(Set dst (AddP dst (SubI zero src))); effect(KILL cr); size(2); format %{ "SUB $dst,$src" %} opcode(0x2B); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg );%}instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ match(Set dst (SubI zero dst)); effect(KILL cr); size(2); format %{ "NEG $dst" %} opcode(0xF7,0x03); // Opcode F7 /3 ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg );%}//----------Multiplication/Division Instructions-------------------------------// Integer Multiplication Instructions// Multiply Registerinstruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (MulI dst src)); effect(KILL cr); size(3); ins_cost(300); format %{ "IMUL $dst,$src" %} opcode(0xAF, 0x0F); ins_encode( OpcS, OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg_alu0 );%}// Multiply 32-bit Immediateinstruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ match(Set dst (MulI src imm)); effect(KILL cr); ins_cost(300); format %{ "IMUL $dst,$src,$imm" %} opcode(0x69); /* 69 /r id */ ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); ins_pipe( ialu_reg_reg_alu0 );%}instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ match(Set dst src); effect(KILL cr); // Note that this is artificially increased to make it more expensive than loadConL ins_cost(250); format %{ "MOV EAX,$src\t// low word only" %} opcode(0xB8); ins_encode( LdImmL_Lo(dst, src) ); ins_pipe( ialu_reg_fat );%}// Multiply by 32-bit Immediate, taking the shifted high order results// (special case for shift by 32)instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); effect(USE src1, KILL cr); // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only ins_cost(0*100 + 1*400 - 150); format %{ "IMUL EDX:EAX,$src1" %} ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); ins_pipe( pipe_slow );%}// Multiply by 32-bit Immediate, taking the shifted high order resultsinstruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); effect(USE src1, KILL cr); // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only ins_cost(1*100 + 1*400 - 150); format %{ "IMUL EDX:EAX,$src1\n\t" "SAR EDX,$cnt-32" %} ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); ins_pipe( pipe_slow );%}// Multiply Memory 32-bit Immediateinstruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ match(Set dst (MulI (LoadI src) imm)); effect(KILL cr); ins_cost(300); format %{ "IMUL $dst,$src,$imm" %} opcode(0x69); /* 69 /r id */ ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); ins_pipe( ialu_reg_mem_alu0 );%}// Multiply Memoryinstruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (MulI dst (LoadI src))); effect(KILL cr); ins_cost(350); format %{ "IMUL $dst,$src" %} opcode(0xAF, 0x0F); ins_encode( OpcS, OpcP, RegMem( dst, src) ); ins_pipe( ialu_reg_mem_alu0 );%}// Multiply Register Int to Longinstruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ // Basic Idea: long = (long)int * (long)int match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); effect(DEF dst, USE src, USE src1, KILL flags); ins_cost(300); format %{ "IMUL $dst,$src1" %} ins_encode( long_int_multiply( dst, src1 ) ); ins_pipe( ialu_reg_reg_alu0 );%}instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); effect(KILL flags); ins_cost(300); format %{ "MUL $dst,$src1" %} ins_encode( long_uint_multiply(dst, src1) ); ins_pipe( ialu_reg_reg_alu0 );%}// Multiply Register Longinstruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(4*100+3*400);// Basic idea: lo(result) = lo(x_lo * y_lo)// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) format %{ "MOV $tmp,$src.lo\n\t" "IMUL $tmp,EDX\n\t" "MOV EDX,$src.hi\n\t" "IMUL EDX,EAX\n\t" "ADD $tmp,EDX\n\t" "MUL EDX:EAX,$src.lo\n\t" "ADD EDX,$tmp" %} ins_encode( long_multiply( dst, src, tmp ) ); ins_pipe( pipe_slow );%}// Multiply Register Long where the left operand's high 32 bits are zeroinstruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(is_operand_hi32_zero(n->in(1))); match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(2*100+2*400);// Basic idea: lo(result) = lo(x_lo * y_lo)// hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 format %{ "MOV $tmp,$src.hi\n\t" "IMUL $tmp,EAX\n\t" "MUL EDX:EAX,$src.lo\n\t" "ADD EDX,$tmp" %} ins_encode %{ __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); __ imull($tmp$$Register, rax); __ mull($src$$Register); __ addl(rdx, $tmp$$Register); %} ins_pipe( pipe_slow );%}// Multiply Register Long where the right operand's high 32 bits are zeroinstruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(is_operand_hi32_zero(n->in(2))); match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(2*100+2*400);// Basic idea: lo(result) = lo(x_lo * y_lo)// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 format %{ "MOV $tmp,$src.lo\n\t" "IMUL $tmp,EDX\n\t" "MUL EDX:EAX,$src.lo\n\t" "ADD EDX,$tmp" %} ins_encode %{ __ movl($tmp$$Register, $src$$Register); __ imull($tmp$$Register, rdx); __ mull($src$$Register); __ addl(rdx, $tmp$$Register); %} ins_pipe( pipe_slow );%}// Multiply Register Long where the left and the right operands' high 32 bits are zeroinstruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); match(Set dst (MulL dst src)); effect(KILL cr); ins_cost(1*400);// Basic idea: lo(result) = lo(x_lo * y_lo)// hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 format %{ "MUL EDX:EAX,$src.lo\n\t" %} ins_encode %{ __ mull($src$$Register); %} ins_pipe( pipe_slow );%}// Multiply Register Long by small constantinstruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(2*100+2*400); size(12);// Basic idea: lo(result) = lo(src * EAX)// hi(result) = hi(src * EAX) + lo(src * EDX) format %{ "IMUL $tmp,EDX,$src\n\t" "MOV EDX,$src\n\t" "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" "ADD EDX,$tmp" %} ins_encode( long_multiply_con( dst, src, tmp ) ); ins_pipe( pipe_slow );%}// Integer DIV with Registerinstruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ match(Set rax (DivI rax div)); effect(KILL rdx, KILL cr); size(26); ins_cost(30*100+10*100); format %{ "CMP EAX,0x80000000\n\t" "JNE,s normal\n\t" "XOR EDX,EDX\n\t" "CMP ECX,-1\n\t" "JE,s done\n" "normal: CDQ\n\t" "IDIV $div\n\t" "done:" %} opcode(0xF7, 0x7); /* Opcode F7 /7 */ ins_encode( cdq_enc, OpcP, RegOpc(div) ); ins_pipe( ialu_reg_reg_alu0 );%}// Divide Register Longinstruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ match(Set dst (DivL src1 src2)); effect( KILL cr, KILL cx, KILL bx ); ins_cost(10000); format %{ "PUSH $src1.hi\n\t" "PUSH $src1.lo\n\t" "PUSH $src2.hi\n\t" "PUSH $src2.lo\n\t" "CALL SharedRuntime::ldiv\n\t" "ADD ESP,16" %} ins_encode( long_div(src1,src2) ); ins_pipe( pipe_slow );%}// Integer DIVMOD with Register, both quotient and mod resultsinstruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ match(DivModI rax div); effect(KILL cr); size(26); ins_cost(30*100+10*100); format %{ "CMP EAX,0x80000000\n\t" "JNE,s normal\n\t" "XOR EDX,EDX\n\t" "CMP ECX,-1\n\t" "JE,s done\n" "normal: CDQ\n\t" "IDIV $div\n\t" "done:" %} opcode(0xF7, 0x7); /* Opcode F7 /7 */ ins_encode( cdq_enc, OpcP, RegOpc(div) ); ins_pipe( pipe_slow );%}// Integer MOD with Registerinstruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ match(Set rdx (ModI rax div)); effect(KILL rax, KILL cr); size(26); ins_cost(300); format %{ "CDQ\n\t" "IDIV $div" %} opcode(0xF7, 0x7); /* Opcode F7 /7 */ ins_encode( cdq_enc, OpcP, RegOpc(div) ); ins_pipe( ialu_reg_reg_alu0 );%}// Remainder Register Longinstruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ match(Set dst (ModL src1 src2)); effect( KILL cr, KILL cx, KILL bx ); ins_cost(10000); format %{ "PUSH $src1.hi\n\t" "PUSH $src1.lo\n\t" "PUSH $src2.hi\n\t" "PUSH $src2.lo\n\t" "CALL SharedRuntime::lrem\n\t" "ADD ESP,16" %} ins_encode( long_mod(src1,src2) ); ins_pipe( pipe_slow );%}// Divide Register Long (no special case since divisor != -1)instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ match(Set dst (DivL dst imm)); effect( TEMP tmp, TEMP tmp2, KILL cr ); ins_cost(1000); format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" "XOR $tmp2,$tmp2\n\t" "CMP $tmp,EDX\n\t" "JA,s fast\n\t" "MOV $tmp2,EAX\n\t" "MOV EAX,EDX\n\t" "MOV EDX,0\n\t" "JLE,s pos\n\t" "LNEG EAX : $tmp2\n\t" "DIV $tmp # unsigned division\n\t" "XCHG EAX,$tmp2\n\t" "DIV $tmp\n\t" "LNEG $tmp2 : EAX\n\t" "JMP,s done\n" "pos:\n\t" "DIV $tmp\n\t" "XCHG EAX,$tmp2\n" "fast:\n\t" "DIV $tmp\n" "done:\n\t" "MOV EDX,$tmp2\n\t" "NEG EDX:EAX # if $imm < 0" %} ins_encode %{ int con = (int)$imm$$constant; assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); int pcon = (con > 0) ? con : -con; Label Lfast, Lpos, Ldone; __ movl($tmp$$Register, pcon); __ xorl($tmp2$$Register,$tmp2$$Register); __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); __ jccb(Assembler::above, Lfast); // result fits into 32 bit __ movl($tmp2$$Register, $dst$$Register); // save __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags __ jccb(Assembler::lessEqual, Lpos); // result is positive // Negative dividend. // convert value to positive to use unsigned division __ lneg($dst$$Register, $tmp2$$Register); __ divl($tmp$$Register); __ xchgl($dst$$Register, $tmp2$$Register); __ divl($tmp$$Register); // revert result back to negative __ lneg($tmp2$$Register, $dst$$Register); __ jmpb(Ldone); __ bind(Lpos); __ divl($tmp$$Register); // Use unsigned division __ xchgl($dst$$Register, $tmp2$$Register); // Fallthrow for final divide, tmp2 has 32 bit hi result __ bind(Lfast); // fast path: src is positive __ divl($tmp$$Register); // Use unsigned division __ bind(Ldone); __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); if (con < 0) { __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); } %} ins_pipe( pipe_slow );%}// Remainder Register Long (remainder fit into 32 bits)instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ match(Set dst (ModL dst imm)); effect( TEMP tmp, TEMP tmp2, KILL cr ); ins_cost(1000); format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" "CMP $tmp,EDX\n\t" "JA,s fast\n\t" "MOV $tmp2,EAX\n\t" "MOV EAX,EDX\n\t" "MOV EDX,0\n\t" "JLE,s pos\n\t" "LNEG EAX : $tmp2\n\t" "DIV $tmp # unsigned division\n\t" "MOV EAX,$tmp2\n\t" "DIV $tmp\n\t" "NEG EDX\n\t" "JMP,s done\n" "pos:\n\t" "DIV $tmp\n\t" "MOV EAX,$tmp2\n" "fast:\n\t" "DIV $tmp\n" "done:\n\t" "MOV EAX,EDX\n\t" "SAR EDX,31\n\t" %} ins_encode %{ int con = (int)$imm$$constant; assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); int pcon = (con > 0) ? con : -con; Label Lfast, Lpos, Ldone; __ movl($tmp$$Register, pcon); __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit __ movl($tmp2$$Register, $dst$$Register); // save __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags __ jccb(Assembler::lessEqual, Lpos); // result is positive // Negative dividend. // convert value to positive to use unsigned division __ lneg($dst$$Register, $tmp2$$Register); __ divl($tmp$$Register); __ movl($dst$$Register, $tmp2$$Register); __ divl($tmp$$Register); // revert remainder back to negative __ negl(HIGH_FROM_LOW($dst$$Register)); __ jmpb(Ldone); __ bind(Lpos); __ divl($tmp$$Register); __ movl($dst$$Register, $tmp2$$Register); __ bind(Lfast); // fast path: src is positive __ divl($tmp$$Register); __ bind(Ldone); __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign %} ins_pipe( pipe_slow );%}// Integer Shift Instructions// Shift Left by oneinstruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); size(2); format %{ "SHL $dst,$shift" %} opcode(0xD1, 0x4); /* D1 /4 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg );%}// Shift Left by 8-bit immediateinstruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); size(3); format %{ "SHL $dst,$shift" %} opcode(0xC1, 0x4); /* C1 /4 ib */ ins_encode( RegOpcImm( dst, shift) ); ins_pipe( ialu_reg );%}// Shift Left by variableinstruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); size(2); format %{ "SHL $dst,$shift" %} opcode(0xD3, 0x4); /* D3 /4 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg_reg );%}// Arithmetic shift right by oneinstruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); size(2); format %{ "SAR $dst,$shift" %} opcode(0xD1, 0x7); /* D1 /7 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg );%}// Arithmetic shift right by oneinstruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); effect(KILL cr); format %{ "SAR $dst,$shift" %} opcode(0xD1, 0x7); /* D1 /7 */ ins_encode( OpcP, RMopc_Mem(secondary,dst) ); ins_pipe( ialu_mem_imm );%}// Arithmetic Shift Right by 8-bit immediateinstruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); size(3); format %{ "SAR $dst,$shift" %} opcode(0xC1, 0x7); /* C1 /7 ib */ ins_encode( RegOpcImm( dst, shift ) ); ins_pipe( ialu_mem_imm );%}// Arithmetic Shift Right by 8-bit immediateinstruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); effect(KILL cr); format %{ "SAR $dst,$shift" %} opcode(0xC1, 0x7); /* C1 /7 ib */ ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); ins_pipe( ialu_mem_imm );%}// Arithmetic Shift Right by variableinstruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); size(2); format %{ "SAR $dst,$shift" %} opcode(0xD3, 0x7); /* D3 /7 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg_reg );%}// Logical shift right by oneinstruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); size(2); format %{ "SHR $dst,$shift" %} opcode(0xD1, 0x5); /* D1 /5 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg );%}// Logical Shift Right by 8-bit immediateinstruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); size(3); format %{ "SHR $dst,$shift" %} opcode(0xC1, 0x5); /* C1 /5 ib */ ins_encode( RegOpcImm( dst, shift) ); ins_pipe( ialu_reg );%}// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.// This idiom is used by the compiler for the i2b bytecode.instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); size(3); format %{ "MOVSX $dst,$src :8" %} ins_encode %{ __ movsbl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg_reg);%}// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.// This idiom is used by the compiler the i2s bytecode.instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); size(3); format %{ "MOVSX $dst,$src :16" %} ins_encode %{ __ movswl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg_reg);%}// Logical Shift Right by variableinstruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); size(2); format %{ "SHR $dst,$shift" %} opcode(0xD3, 0x5); /* D3 /5 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg_reg );%}//----------Logical Instructions-----------------------------------------------//----------Integer Logical Instructions---------------------------------------// And Instructions// And Register with Registerinstruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AndI dst src)); effect(KILL cr); size(2); format %{ "AND $dst,$src" %} opcode(0x23); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg );%}// And Register with Immediateinstruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (AndI dst src)); effect(KILL cr); format %{ "AND $dst,$src" %} opcode(0x81,0x04); /* Opcode 81 /4 */ // ins_encode( RegImm( dst, src) ); ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg );%}// And Register with Memoryinstruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (AndI dst (LoadI src))); effect(KILL cr); ins_cost(125); format %{ "AND $dst,$src" %} opcode(0x23); ins_encode( OpcP, RegMem( dst, src) ); ins_pipe( ialu_reg_mem );%}// And Memory with Registerinstruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AndI (LoadI dst) src))); effect(KILL cr); ins_cost(150); format %{ "AND $dst,$src" %} opcode(0x21); /* Opcode 21 /r */ ins_encode( OpcP, RegMem( src, dst ) ); ins_pipe( ialu_mem_reg );%}// And Memory with Immediateinstruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AndI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "AND $dst,$src" %} opcode(0x81, 0x4); /* Opcode 81 /4 id */ // ins_encode( MemImm( dst, src) ); ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); ins_pipe( ialu_mem_imm );%}// Or Instructions// Or Register with Registerinstruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (OrI dst src)); effect(KILL cr); size(2); format %{ "OR $dst,$src" %} opcode(0x0B); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg );%}instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ match(Set dst (OrI dst (CastP2X src))); effect(KILL cr); size(2); format %{ "OR $dst,$src" %} opcode(0x0B); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg );%}// Or Register with Immediateinstruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (OrI dst src)); effect(KILL cr); format %{ "OR $dst,$src" %} opcode(0x81,0x01); /* Opcode 81 /1 id */ // ins_encode( RegImm( dst, src) ); ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg );%}// Or Register with Memoryinstruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (OrI dst (LoadI src))); effect(KILL cr); ins_cost(125); format %{ "OR $dst,$src" %} opcode(0x0B); ins_encode( OpcP, RegMem( dst, src) ); ins_pipe( ialu_reg_mem );%}// Or Memory with Registerinstruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (OrI (LoadI dst) src))); effect(KILL cr); ins_cost(150); format %{ "OR $dst,$src" %} opcode(0x09); /* Opcode 09 /r */ ins_encode( OpcP, RegMem( src, dst ) ); ins_pipe( ialu_mem_reg );%}// Or Memory with Immediateinstruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (OrI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "OR $dst,$src" %} opcode(0x81,0x1); /* Opcode 81 /1 id */ // ins_encode( MemImm( dst, src) ); ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); ins_pipe( ialu_mem_imm );%}// ROL/ROR// ROL expandinstruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROL $dst, $shift" %} opcode(0xD1, 0x0); /* Opcode D1 /0 */ ins_encode( OpcP, RegOpc( dst )); ins_pipe( ialu_reg );%}instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROL $dst, $shift" %} opcode(0xC1, 0x0); /*Opcode /C1 /0 */ ins_encode( RegOpcImm(dst, shift) ); ins_pipe(ialu_reg);%}instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROL $dst, $shift" %} opcode(0xD3, 0x0); /* Opcode D3 /0 */ ins_encode(OpcP, RegOpc(dst)); ins_pipe( ialu_reg_reg );%}// end of ROL expand// ROL 32bit by one onceinstruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); expand %{ rolI_eReg_imm1(dst, lshift, cr); %}%}// ROL 32bit var by imm8 onceinstruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); expand %{ rolI_eReg_imm8(dst, lshift, cr); %}%}// ROL 32bit var by var onceinstruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); expand %{ rolI_eReg_CL(dst, shift, cr); %}%}// ROL 32bit var by var onceinstruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); expand %{ rolI_eReg_CL(dst, shift, cr); %}%}// ROR expandinstruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROR $dst, $shift" %} opcode(0xD1,0x1); /* Opcode D1 /1 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg );%}instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ effect (USE_DEF dst, USE shift, KILL cr); format %{ "ROR $dst, $shift" %} opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ ins_encode( RegOpcImm(dst, shift) ); ins_pipe( ialu_reg );%}instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROR $dst, $shift" %} opcode(0xD3, 0x1); /* Opcode D3 /1 */ ins_encode(OpcP, RegOpc(dst)); ins_pipe( ialu_reg_reg );%}// end of ROR expand// ROR right onceinstruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); expand %{ rorI_eReg_imm1(dst, rshift, cr); %}%}// ROR 32bit by immI8 onceinstruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); expand %{ rorI_eReg_imm8(dst, rshift, cr); %}%}// ROR 32bit var by var onceinstruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); expand %{ rorI_eReg_CL(dst, shift, cr); %}%}// ROR 32bit var by var onceinstruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); expand %{ rorI_eReg_CL(dst, shift, cr); %}%}// Xor Instructions// Xor Register with Registerinstruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (XorI dst src)); effect(KILL cr); size(2); format %{ "XOR $dst,$src" %} opcode(0x33); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg );%}// Xor Register with Immediate -1instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ match(Set dst (XorI dst imm)); size(2); format %{ "NOT $dst" %} ins_encode %{ __ notl($dst$$Register); %} ins_pipe( ialu_reg );%}// Xor Register with Immediateinstruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (XorI dst src)); effect(KILL cr); format %{ "XOR $dst,$src" %} opcode(0x81,0x06); /* Opcode 81 /6 id */ // ins_encode( RegImm( dst, src) ); ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg );%}// Xor Register with Memoryinstruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (XorI dst (LoadI src))); effect(KILL cr); ins_cost(125); format %{ "XOR $dst,$src" %} opcode(0x33); ins_encode( OpcP, RegMem(dst, src) ); ins_pipe( ialu_reg_mem );%}// Xor Memory with Registerinstruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (XorI (LoadI dst) src))); effect(KILL cr); ins_cost(150); format %{ "XOR $dst,$src" %} opcode(0x31); /* Opcode 31 /r */ ins_encode( OpcP, RegMem( src, dst ) ); ins_pipe( ialu_mem_reg );%}// Xor Memory with Immediateinstruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (XorI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "XOR $dst,$src" %} opcode(0x81,0x6); /* Opcode 81 /6 id */ ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); ins_pipe( ialu_mem_imm );%}//----------Convert Int to Boolean---------------------------------------------instruct movI_nocopy(rRegI dst, rRegI src) %{ effect( DEF dst, USE src ); format %{ "MOV $dst,$src" %} ins_encode( enc_Copy( dst, src) ); ins_pipe( ialu_reg_reg );%}instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ effect( USE_DEF dst, USE src, KILL cr ); size(4); format %{ "NEG $dst\n\t" "ADC $dst,$src" %} ins_encode( neg_reg(dst), OpcRegReg(0x13,dst,src) ); ins_pipe( ialu_reg_reg_long );%}instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ match(Set dst (Conv2B src)); expand %{ movI_nocopy(dst,src); ci2b(dst,src,cr); %}%}instruct movP_nocopy(rRegI dst, eRegP src) %{ effect( DEF dst, USE src ); format %{ "MOV $dst,$src" %} ins_encode( enc_Copy( dst, src) ); ins_pipe( ialu_reg_reg );%}instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ effect( USE_DEF dst, USE src, KILL cr ); format %{ "NEG $dst\n\t" "ADC $dst,$src" %} ins_encode( neg_reg(dst), OpcRegReg(0x13,dst,src) ); ins_pipe( ialu_reg_reg_long );%}instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ match(Set dst (Conv2B src)); expand %{ movP_nocopy(dst,src); cp2b(dst,src,cr); %}%}instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ match(Set dst (CmpLTMask p q)); effect(KILL cr); ins_cost(400); // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination format %{ "XOR $dst,$dst\n\t" "CMP $p,$q\n\t" "SETlt $dst\n\t" "NEG $dst" %} ins_encode %{ Register Rp = $p$$Register; Register Rq = $q$$Register; Register Rd = $dst$$Register; Label done; __ xorl(Rd, Rd); __ cmpl(Rp, Rq); __ setb(Assembler::less, Rd); __ negl(Rd); %} ins_pipe(pipe_slow);%}instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ match(Set dst (CmpLTMask dst zero)); effect(DEF dst, KILL cr); ins_cost(100); format %{ "SAR $dst,31\t# cmpLTMask0" %} ins_encode %{ __ sarl($dst$$Register, 31); %} ins_pipe(ialu_reg);%}/* better to save a register than avoid a branch */instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); effect(KILL cr); ins_cost(400); format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" "JGE done\n\t" "ADD $p,$y\n" "done: " %} ins_encode %{ Register Rp = $p$$Register; Register Rq = $q$$Register; Register Ry = $y$$Register; Label done; __ subl(Rp, Rq); __ jccb(Assembler::greaterEqual, done); __ addl(Rp, Ry); __ bind(done); %} ins_pipe(pipe_cmplt);%}/* better to save a register than avoid a branch */instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ match(Set y (AndI (CmpLTMask p q) y)); effect(KILL cr); ins_cost(300); format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" "JLT done\n\t" "XORL $y, $y\n" "done: " %} ins_encode %{ Register Rp = $p$$Register; Register Rq = $q$$Register; Register Ry = $y$$Register; Label done; __ cmpl(Rp, Rq); __ jccb(Assembler::less, done); __ xorl(Ry, Ry); __ bind(done); %} ins_pipe(pipe_cmplt);%}/* If I enable this, I encourage spilling in the inner loop of compress.instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));*///----------Long Instructions------------------------------------------------// Add Long Register with Registerinstruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (AddL dst src)); effect(KILL cr); ins_cost(200); format %{ "ADD $dst.lo,$src.lo\n\t" "ADC $dst.hi,$src.hi" %} opcode(0x03, 0x13); ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); ins_pipe( ialu_reg_reg_long );%}// Add Long Register with Immediateinstruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (AddL dst src)); effect(KILL cr); format %{ "ADD $dst.lo,$src.lo\n\t" "ADC $dst.hi,$src.hi" %} opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); ins_pipe( ialu_reg_long );%}// Add Long Register with Memoryinstruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ match(Set dst (AddL dst (LoadL mem))); effect(KILL cr); ins_cost(125); format %{ "ADD $dst.lo,$mem\n\t" "ADC $dst.hi,$mem+4" %} opcode(0x03, 0x13); ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); ins_pipe( ialu_reg_long_mem );%}// Subtract Long Register with Register.instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (SubL dst src)); effect(KILL cr); ins_cost(200); format %{ "SUB $dst.lo,$src.lo\n\t" "SBB $dst.hi,$src.hi" %} opcode(0x2B, 0x1B); ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); ins_pipe( ialu_reg_reg_long );%}// Subtract Long Register with Immediateinstruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (SubL dst src)); effect(KILL cr); format %{ "SUB $dst.lo,$src.lo\n\t" "SBB $dst.hi,$src.hi" %} opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); ins_pipe( ialu_reg_long );%}// Subtract Long Register with Memoryinstruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ match(Set dst (SubL dst (LoadL mem))); effect(KILL cr); ins_cost(125); format %{ "SUB $dst.lo,$mem\n\t" "SBB $dst.hi,$mem+4" %} opcode(0x2B, 0x1B); ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); ins_pipe( ialu_reg_long_mem );%}instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ match(Set dst (SubL zero dst)); effect(KILL cr); ins_cost(300); format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} ins_encode( neg_long(dst) ); ins_pipe( ialu_reg_reg_long );%}// And Long Register with Registerinstruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (AndL dst src)); effect(KILL cr); format %{ "AND $dst.lo,$src.lo\n\t" "AND $dst.hi,$src.hi" %} opcode(0x23,0x23); ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); ins_pipe( ialu_reg_reg_long );%}// And Long Register with Immediateinstruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (AndL dst src)); effect(KILL cr); format %{ "AND $dst.lo,$src.lo\n\t" "AND $dst.hi,$src.hi" %} opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); ins_pipe( ialu_reg_long );%}// And Long Register with Memoryinstruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ match(Set dst (AndL dst (LoadL mem))); effect(KILL cr); ins_cost(125); format %{ "AND $dst.lo,$mem\n\t" "AND $dst.hi,$mem+4" %} opcode(0x23, 0x23); ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); ins_pipe( ialu_reg_long_mem );%}// Or Long Register with Registerinstruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (OrL dst src)); effect(KILL cr); format %{ "OR $dst.lo,$src.lo\n\t" "OR $dst.hi,$src.hi" %} opcode(0x0B,0x0B); ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); ins_pipe( ialu_reg_reg_long );%}// Or Long Register with Immediateinstruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (OrL dst src)); effect(KILL cr); format %{ "OR $dst.lo,$src.lo\n\t" "OR $dst.hi,$src.hi" %} opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); ins_pipe( ialu_reg_long );%}// Or Long Register with Memoryinstruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ match(Set dst (OrL dst (LoadL mem))); effect(KILL cr); ins_cost(125); format %{ "OR $dst.lo,$mem\n\t" "OR $dst.hi,$mem+4" %} opcode(0x0B,0x0B); ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); ins_pipe( ialu_reg_long_mem );%}// Xor Long Register with Registerinstruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (XorL dst src)); effect(KILL cr); format %{ "XOR $dst.lo,$src.lo\n\t" "XOR $dst.hi,$src.hi" %} opcode(0x33,0x33); ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); ins_pipe( ialu_reg_reg_long );%}// Xor Long Register with Immediate -1instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ match(Set dst (XorL dst imm)); format %{ "NOT $dst.lo\n\t" "NOT $dst.hi" %} ins_encode %{ __ notl($dst$$Register); __ notl(HIGH_FROM_LOW($dst$$Register)); %} ins_pipe( ialu_reg_long );%}// Xor Long Register with Immediateinstruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (XorL dst src)); effect(KILL cr); format %{ "XOR $dst.lo,$src.lo\n\t" "XOR $dst.hi,$src.hi" %} opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); ins_pipe( ialu_reg_long );%}// Xor Long Register with Memoryinstruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ match(Set dst (XorL dst (LoadL mem))); effect(KILL cr); ins_cost(125); format %{ "XOR $dst.lo,$mem\n\t" "XOR $dst.hi,$mem+4" %} opcode(0x33,0x33); ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); ins_pipe( ialu_reg_long_mem );%}// Shift Left Long by 1instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ predicate(UseNewLongLShift); match(Set dst (LShiftL dst cnt)); effect(KILL cr); ins_cost(100); format %{ "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi" %} ins_encode %{ __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); %} ins_pipe( ialu_reg_long );%}// Shift Left Long by 2instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ predicate(UseNewLongLShift); match(Set dst (LShiftL dst cnt)); effect(KILL cr); ins_cost(100); format %{ "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi\n\t" "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi" %} ins_encode %{ __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); %} ins_pipe( ialu_reg_long );%}// Shift Left Long by 3instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ predicate(UseNewLongLShift); match(Set dst (LShiftL dst cnt)); effect(KILL cr); ins_cost(100); format %{ "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi\n\t" "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi\n\t" "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi" %} ins_encode %{ __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); %} ins_pipe( ialu_reg_long );%}// Shift Left Long by 1-31instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ match(Set dst (LShiftL dst cnt)); effect(KILL cr); ins_cost(200); format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" "SHL $dst.lo,$cnt" %} opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ ins_encode( move_long_small_shift(dst,cnt) ); ins_pipe( ialu_reg_long );%}// Shift Left Long by 32-63instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ match(Set dst (LShiftL dst cnt)); effect(KILL cr); ins_cost(300); format %{ "MOV $dst.hi,$dst.lo\n" "\tSHL $dst.hi,$cnt-32\n" "\tXOR $dst.lo,$dst.lo" %} opcode(0xC1, 0x4); /* C1 /4 ib */ ins_encode( move_long_big_shift_clr(dst,cnt) ); ins_pipe( ialu_reg_long );%}// Shift Left Long by variableinstruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (LShiftL dst shift)); effect(KILL cr); ins_cost(500+200); size(17); format %{ "TEST $shift,32\n\t" "JEQ,s small\n\t" "MOV $dst.hi,$dst.lo\n\t" "XOR $dst.lo,$dst.lo\n" "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" "SHL $dst.lo,$shift" %} ins_encode( shift_left_long( dst, shift ) ); ins_pipe( pipe_slow );%}// Shift Right Long by 1-31instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ match(Set dst (URShiftL dst cnt)); effect(KILL cr); ins_cost(200); format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" "SHR $dst.hi,$cnt" %} opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ ins_encode( move_long_small_shift(dst,cnt) ); ins_pipe( ialu_reg_long );%}// Shift Right Long by 32-63instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ match(Set dst (URShiftL dst cnt)); effect(KILL cr); ins_cost(300); format %{ "MOV $dst.lo,$dst.hi\n" "\tSHR $dst.lo,$cnt-32\n" "\tXOR $dst.hi,$dst.hi" %} opcode(0xC1, 0x5); /* C1 /5 ib */ ins_encode( move_long_big_shift_clr(dst,cnt) ); ins_pipe( ialu_reg_long );%}// Shift Right Long by variableinstruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (URShiftL dst shift)); effect(KILL cr); ins_cost(600); size(17); format %{ "TEST $shift,32\n\t" "JEQ,s small\n\t" "MOV $dst.lo,$dst.hi\n\t" "XOR $dst.hi,$dst.hi\n" "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" "SHR $dst.hi,$shift" %} ins_encode( shift_right_long( dst, shift ) ); ins_pipe( pipe_slow );%}// Shift Right Long by 1-31instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ match(Set dst (RShiftL dst cnt)); effect(KILL cr); ins_cost(200); format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" "SAR $dst.hi,$cnt" %} opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ ins_encode( move_long_small_shift(dst,cnt) ); ins_pipe( ialu_reg_long );%}// Shift Right Long by 32-63instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ match(Set dst (RShiftL dst cnt)); effect(KILL cr); ins_cost(300); format %{ "MOV $dst.lo,$dst.hi\n" "\tSAR $dst.lo,$cnt-32\n" "\tSAR $dst.hi,31" %} opcode(0xC1, 0x7); /* C1 /7 ib */ ins_encode( move_long_big_shift_sign(dst,cnt) ); ins_pipe( ialu_reg_long );%}// Shift Right arithmetic Long by variableinstruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (RShiftL dst shift)); effect(KILL cr); ins_cost(600); size(18); format %{ "TEST $shift,32\n\t" "JEQ,s small\n\t" "MOV $dst.lo,$dst.hi\n\t" "SAR $dst.hi,31\n" "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" "SAR $dst.hi,$shift" %} ins_encode( shift_right_arith_long( dst, shift ) ); ins_pipe( pipe_slow );%}//----------Double Instructions------------------------------------------------// Double Math// Compare & branch// P6 version of float compare, sets condition codes in EFLAGSinstruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ predicate(VM_Version::supports_cmov() && UseSSE <=1); match(Set cr (CmpD src1 src2)); effect(KILL rax); ins_cost(150); format %{ "FLD $src1\n\t" "FUCOMIP ST,$src2 // P6 instruction\n\t" "JNP exit\n\t" "MOV ah,1 // saw a NaN, set CF\n\t" "SAHF\n" "exit:\tNOP // avoid branch to branch" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), cmpF_P6_fixup ); ins_pipe( pipe_slow );%}instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ predicate(VM_Version::supports_cmov() && UseSSE <=1); match(Set cr (CmpD src1 src2)); ins_cost(150); format %{ "FLD $src1\n\t" "FUCOMIP ST,$src2 // P6 instruction" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2)); ins_pipe( pipe_slow );%}// Compare & branchinstruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ predicate(UseSSE<=1); match(Set cr (CmpD src1 src2)); effect(KILL rax); ins_cost(200); format %{ "FLD $src1\n\t" "FCOMp $src2\n\t" "FNSTSW AX\n\t" "TEST AX,0x400\n\t" "JZ,s flags\n\t" "MOV AH,1\t# unordered treat as LT\n" "flags:\tSAHF" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), fpu_flags); ins_pipe( pipe_slow );%}// Compare vs zero into -1,0,1instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (CmpD3 src1 zero)); effect(KILL cr, KILL rax); ins_cost(280); format %{ "FTSTD $dst,$src1" %} opcode(0xE4, 0xD9); ins_encode( Push_Reg_DPR(src1), OpcS, OpcP, PopFPU, CmpF_Result(dst)); ins_pipe( pipe_slow );%}// Compare into -1,0,1instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (CmpD3 src1 src2)); effect(KILL cr, KILL rax); ins_cost(300); format %{ "FCMPD $dst,$src1,$src2" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), CmpF_Result(dst)); ins_pipe( pipe_slow );%}// float compare and set condition codes in EFLAGS by XMM regsinstruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ predicate(UseSSE>=2); match(Set cr (CmpD src1 src2)); ins_cost(145); format %{ "UCOMISD $src1,$src2\n\t" "JNP,s exit\n\t" "PUSHF\t# saw NaN, set CF\n\t" "AND [rsp], #0xffffff2b\n\t" "POPF\n" "exit:" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); emit_cmpfp_fixup(_masm); %} ins_pipe( pipe_slow );%}instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ predicate(UseSSE>=2); match(Set cr (CmpD src1 src2)); ins_cost(100); format %{ "UCOMISD $src1,$src2" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow );%}// float compare and set condition codes in EFLAGS by XMM regsinstruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ predicate(UseSSE>=2); match(Set cr (CmpD src1 (LoadD src2))); ins_cost(145); format %{ "UCOMISD $src1,$src2\n\t" "JNP,s exit\n\t" "PUSHF\t# saw NaN, set CF\n\t" "AND [rsp], #0xffffff2b\n\t" "POPF\n" "exit:" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$Address); emit_cmpfp_fixup(_masm); %} ins_pipe( pipe_slow );%}instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ predicate(UseSSE>=2); match(Set cr (CmpD src1 (LoadD src2))); ins_cost(100); format %{ "UCOMISD $src1,$src2" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$Address); %} ins_pipe( pipe_slow );%}// Compare into -1,0,1 in XMMinstruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ predicate(UseSSE>=2); match(Set dst (CmpD3 src1 src2)); effect(KILL cr); ins_cost(255); format %{ "UCOMISD $src1, $src2\n\t" "MOV $dst, #-1\n\t" "JP,s done\n\t" "JB,s done\n\t" "SETNE $dst\n\t" "MOVZB $dst, $dst\n" "done:" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); emit_cmpfp3(_masm, $dst$$Register); %} ins_pipe( pipe_slow );%}// Compare into -1,0,1 in XMM and memoryinstruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ predicate(UseSSE>=2); match(Set dst (CmpD3 src1 (LoadD src2))); effect(KILL cr); ins_cost(275); format %{ "UCOMISD $src1, $src2\n\t" "MOV $dst, #-1\n\t" "JP,s done\n\t" "JB,s done\n\t" "SETNE $dst\n\t" "MOVZB $dst, $dst\n" "done:" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$Address); emit_cmpfp3(_masm, $dst$$Register); %} ins_pipe( pipe_slow );%}instruct subDPR_reg(regDPR dst, regDPR src) %{ predicate (UseSSE <=1); match(Set dst (SubD dst src)); format %{ "FLD $src\n\t" "DSUBp $dst,ST" %} opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ ins_cost(150); ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg );%}instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ predicate (UseSSE <=1); match(Set dst (RoundDouble (SubD src1 src2))); ins_cost(250); format %{ "FLD $src2\n\t" "DSUB ST,$src1\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xD8, 0x5); ins_encode( Push_Reg_DPR(src2), OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); ins_pipe( fpu_mem_reg_reg );%}instruct subDPR_reg_mem(regDPR dst, memory src) %{ predicate (UseSSE <=1); match(Set dst (SubD dst (LoadD src))); ins_cost(150); format %{ "FLD $src\n\t" "DSUBp $dst,ST" %} opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_mem );%}instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst (AbsD src)); ins_cost(100); format %{ "FABS" %} opcode(0xE1, 0xD9); ins_encode( OpcS, OpcP ); ins_pipe( fpu_reg_reg );%}instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate(UseSSE<=1); match(Set dst (NegD src)); ins_cost(100); format %{ "FCHS" %} opcode(0xE0, 0xD9); ins_encode( OpcS, OpcP ); ins_pipe( fpu_reg_reg );%}instruct addDPR_reg(regDPR dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (AddD dst src)); format %{ "FLD $src\n\t" "DADD $dst,ST" %} size(4); ins_cost(150); opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg );%}instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ predicate(UseSSE<=1); match(Set dst (RoundDouble (AddD src1 src2))); ins_cost(250); format %{ "FLD $src2\n\t" "DADD ST,$src1\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ ins_encode( Push_Reg_DPR(src2), OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); ins_pipe( fpu_mem_reg_reg );%}instruct addDPR_reg_mem(regDPR dst, memory src) %{ predicate(UseSSE<=1); match(Set dst (AddD dst (LoadD src))); ins_cost(150); format %{ "FLD $src\n\t" "DADDp $dst,ST" %} opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_mem );%}// add-to-memoryinstruct addDPR_mem_reg(memory dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); ins_cost(150); format %{ "FLD_D $dst\n\t" "DADD ST,$src\n\t" "FST_D $dst" %} opcode(0xDD, 0x0); ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), Opcode(0xD8), RegOpc(src), set_instruction_start, Opcode(0xDD), RMopc_Mem(0x03,dst) ); ins_pipe( fpu_reg_mem );%}instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ predicate(UseSSE<=1); match(Set dst (AddD dst con)); ins_cost(125); format %{ "FLD1\n\t" "DADDp $dst,ST" %} ins_encode %{ __ fld1(); __ faddp($dst$$reg); %} ins_pipe(fpu_reg);%}instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); match(Set dst (AddD dst con)); ins_cost(200); format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" "DADDp $dst,ST" %} ins_encode %{ __ fld_d($constantaddress($con)); __ faddp($dst$$reg); %} ins_pipe(fpu_reg_mem);%}instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); match(Set dst (RoundDouble (AddD src con))); ins_cost(200); format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" "DADD ST,$src\n\t" "FSTP_D $dst\t# D-round" %} ins_encode %{ __ fld_d($constantaddress($con)); __ fadd($src$$reg); __ fstp_d(Address(rsp, $dst$$disp)); %} ins_pipe(fpu_mem_reg_con);%}instruct mulDPR_reg(regDPR dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (MulD dst src)); format %{ "FLD $src\n\t" "DMULp $dst,ST" %} opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ ins_cost(150); ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg );%}// Strict FP instruction biases argument before multiply then// biases result to avoid double rounding of subnormals.//// scale arg1 by multiplying arg1 by 2^(-15360)// load arg2// multiply scaled arg1 by arg2// rescale product by 2^(15360)//instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); match(Set dst (MulD dst src)); ins_cost(1); // Select this instruction for all strict FP double multiplies format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" "DMULp $dst,ST\n\t" "FLD $src\n\t" "DMULp $dst,ST\n\t" "FLD StubRoutines::_fpu_subnormal_bias2\n\t" "DMULp $dst,ST\n\t" %} opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ ins_encode( strictfp_bias1(dst), Push_Reg_DPR(src), OpcP, RegOpc(dst), strictfp_bias2(dst) ); ins_pipe( fpu_reg_reg );%}instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); match(Set dst (MulD dst con)); ins_cost(200); format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" "DMULp $dst,ST" %} ins_encode %{ __ fld_d($constantaddress($con)); __ fmulp($dst$$reg); %} ins_pipe(fpu_reg_mem);%}instruct mulDPR_reg_mem(regDPR dst, memory src) %{ predicate( UseSSE<=1 ); match(Set dst (MulD dst (LoadD src))); ins_cost(200); format %{ "FLD_D $src\n\t" "DMULp $dst,ST" %} opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_mem );%}//// Cisc-alternate to reg-reg multiplyinstruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ predicate( UseSSE<=1 ); match(Set dst (MulD src (LoadD mem))); ins_cost(250); format %{ "FLD_D $mem\n\t" "DMUL ST,$src\n\t" "FSTP_D $dst" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), OpcReg_FPR(src), Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_reg_mem );%}// MACRO3 -- addDPR a mulDPR// This instruction is a '2-address' instruction in that the result goes// back to src2. This eliminates a move from the macro; possibly the// register allocator will have to add it back (and maybe not).instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ predicate( UseSSE<=1 ); match(Set src2 (AddD (MulD src0 src1) src2)); format %{ "FLD $src0\t# ===MACRO3d===\n\t" "DMUL ST,$src1\n\t" "DADDp $src2,ST" %} ins_cost(250); opcode(0xDD); /* LoadD DD /0 */ ins_encode( Push_Reg_FPR(src0), FMul_ST_reg(src1), FAddP_reg_ST(src2) ); ins_pipe( fpu_reg_reg_reg );%}// MACRO3 -- subDPR a mulDPRinstruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ predicate( UseSSE<=1 ); match(Set src2 (SubD (MulD src0 src1) src2)); format %{ "FLD $src0\t# ===MACRO3d===\n\t" "DMUL ST,$src1\n\t" "DSUBRp $src2,ST" %} ins_cost(250); ins_encode( Push_Reg_FPR(src0), FMul_ST_reg(src1), Opcode(0xDE), Opc_plus(0xE0,src2)); ins_pipe( fpu_reg_reg_reg );%}instruct divDPR_reg(regDPR dst, regDPR src) %{ predicate( UseSSE<=1 ); match(Set dst (DivD dst src)); format %{ "FLD $src\n\t" "FDIVp $dst,ST" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ ins_cost(150); ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg );%}// Strict FP instruction biases argument before division then// biases result, to avoid double rounding of subnormals.//// scale dividend by multiplying dividend by 2^(-15360)// load divisor// divide scaled dividend by divisor// rescale quotient by 2^(15360)//instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ predicate (UseSSE<=1); match(Set dst (DivD dst src)); predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); ins_cost(01); format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" "DMULp $dst,ST\n\t" "FLD $src\n\t" "FDIVp $dst,ST\n\t" "FLD StubRoutines::_fpu_subnormal_bias2\n\t" "DMULp $dst,ST\n\t" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ ins_encode( strictfp_bias1(dst), Push_Reg_DPR(src), OpcP, RegOpc(dst), strictfp_bias2(dst) ); ins_pipe( fpu_reg_reg );%}instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); match(Set dst (RoundDouble (DivD src1 src2))); format %{ "FLD $src1\n\t" "FDIV ST,$src2\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); ins_pipe( fpu_mem_reg_reg );%}instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (ModD dst src)); effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS format %{ "DMOD $dst,$src" %} ins_cost(250); ins_encode(Push_Reg_Mod_DPR(dst, src), emitModDPR(), Push_Result_Mod_DPR(src), Pop_Reg_DPR(dst)); ins_pipe( pipe_slow );%}instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE>=2); match(Set dst (ModD src0 src1)); effect(KILL rax, KILL cr); format %{ "SUB ESP,8\t # DMOD\n" "\tMOVSD [ESP+0],$src1\n" "\tFLD_D [ESP+0]\n" "\tMOVSD [ESP+0],$src0\n" "\tFLD_D [ESP+0]\n" "loop:\tFPREM\n" "\tFWAIT\n" "\tFNSTSW AX\n" "\tSAHF\n" "\tJP loop\n" "\tFSTP_D [ESP+0]\n" "\tMOVSD $dst,[ESP+0]\n" "\tADD ESP,8\n" "\tFSTP ST0\t # Restore FPU Stack" %} ins_cost(250); ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); ins_pipe( pipe_slow );%}instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst (SinD src)); ins_cost(1800); format %{ "DSIN $dst" %} opcode(0xD9, 0xFE); ins_encode( OpcP, OpcS ); ins_pipe( pipe_slow );%}instruct sinD_reg(regD dst, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (SinD dst)); effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" ins_cost(1800); format %{ "DSIN $dst" %} opcode(0xD9, 0xFE); ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); ins_pipe( pipe_slow );%}instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst (CosD src)); ins_cost(1800); format %{ "DCOS $dst" %} opcode(0xD9, 0xFF); ins_encode( OpcP, OpcS ); ins_pipe( pipe_slow );%}instruct cosD_reg(regD dst, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (CosD dst)); effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" ins_cost(1800); format %{ "DCOS $dst" %} opcode(0xD9, 0xFF); ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); ins_pipe( pipe_slow );%}instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst(TanD src)); format %{ "DTAN $dst" %} ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan Opcode(0xDD), Opcode(0xD8)); // fstp st ins_pipe( pipe_slow );%}instruct tanD_reg(regD dst, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst(TanD dst)); effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" format %{ "DTAN $dst" %} ins_encode( Push_SrcD(dst), Opcode(0xD9), Opcode(0xF2), // fptan Opcode(0xDD), Opcode(0xD8), // fstp st Push_ResultD(dst) ); ins_pipe( pipe_slow );%}instruct atanDPR_reg(regDPR dst, regDPR src) %{ predicate (UseSSE<=1); match(Set dst(AtanD dst src)); format %{ "DATA $dst,$src" %} opcode(0xD9, 0xF3); ins_encode( Push_Reg_DPR(src), OpcP, OpcS, RegOpc(dst) ); ins_pipe( pipe_slow );%}instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst(AtanD dst src)); effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" format %{ "DATA $dst,$src" %} opcode(0xD9, 0xF3); ins_encode( Push_SrcD(src), OpcP, OpcS, Push_ResultD(dst) ); ins_pipe( pipe_slow );%}instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ predicate (UseSSE<=1); match(Set dst (SqrtD src)); format %{ "DSQRT $dst,$src" %} opcode(0xFA, 0xD9); ins_encode( Push_Reg_DPR(src), OpcS, OpcP, Pop_Reg_DPR(dst) ); ins_pipe( pipe_slow );%}instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ predicate (UseSSE<=1); match(Set Y (PowD X Y)); // Raise X to the Yth power effect(KILL rax, KILL rdx, KILL rcx, KILL cr); format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} ins_encode %{ __ subptr(rsp, 8); __ fld_s($X$$reg - 1); __ fast_pow(); __ addptr(rsp, 8); %} ins_pipe( pipe_slow );%}instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power effect(KILL rax, KILL rdx, KILL rcx, KILL cr); format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} ins_encode %{ __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src1$$XMMRegister); __ fld_d(Address(rsp, 0)); __ movdbl(Address(rsp, 0), $src0$$XMMRegister); __ fld_d(Address(rsp, 0)); __ fast_pow(); __ fstp_d(Address(rsp, 0)); __ movdbl($dst$$XMMRegister, Address(rsp, 0)); __ addptr(rsp, 8); %} ins_pipe( pipe_slow );%}instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ predicate (UseSSE<=1); match(Set dpr1 (ExpD dpr1)); effect(KILL rax, KILL rcx, KILL rdx, KILL cr); format %{ "fast_exp $dpr1 -> $dpr1 // KILL $rax, $rcx, $rdx" %} ins_encode %{ __ fast_exp(); %} ins_pipe( pipe_slow );%}instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (ExpD src)); effect(KILL rax, KILL rcx, KILL rdx, KILL cr); format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %} ins_encode %{ __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); __ fast_exp(); __ fstp_d(Address(rsp, 0)); __ movdbl($dst$$XMMRegister, Address(rsp, 0)); __ addptr(rsp, 8); %} ins_pipe( pipe_slow );%}instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); // The source Double operand on FPU stack match(Set dst (Log10D src)); // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number // fxch ; swap ST(0) with ST(1) // fyl2x ; compute log_10(2) * log_2(x) format %{ "FLDLG2 \t\t\t#Log10\n\t" "FXCH \n\t" "FYL2X \t\t\t# Q=Log10*Log_2(x)" %} ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 Opcode(0xD9), Opcode(0xC9), // fxch Opcode(0xD9), Opcode(0xF1)); // fyl2x ins_pipe( pipe_slow );%}instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ predicate (UseSSE>=2); effect(KILL cr); match(Set dst (Log10D src)); // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number // fyl2x ; compute log_10(2) * log_2(x) format %{ "FLDLG2 \t\t\t#Log10\n\t" "FYL2X \t\t\t# Q=Log10*Log_2(x)" %} ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 Push_SrcD(src), Opcode(0xD9), Opcode(0xF1), // fyl2x Push_ResultD(dst)); ins_pipe( pipe_slow );%}instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); // The source Double operand on FPU stack match(Set dst (LogD src)); // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number // fxch ; swap ST(0) with ST(1) // fyl2x ; compute log_e(2) * log_2(x) format %{ "FLDLN2 \t\t\t#Log_e\n\t" "FXCH \n\t" "FYL2X \t\t\t# Q=Log_e*Log_2(x)" %} ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 Opcode(0xD9), Opcode(0xC9), // fxch Opcode(0xD9), Opcode(0xF1)); // fyl2x ins_pipe( pipe_slow );%}instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ predicate (UseSSE>=2); effect(KILL cr); // The source and result Double operands in XMM registers match(Set dst (LogD src)); // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number // fyl2x ; compute log_e(2) * log_2(x) format %{ "FLDLN2 \t\t\t#Log_e\n\t" "FYL2X \t\t\t# Q=Log_e*Log_2(x)" %} ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 Push_SrcD(src), Opcode(0xD9), Opcode(0xF1), // fyl2x Push_ResultD(dst)); ins_pipe( pipe_slow );%}//-------------Float Instructions-------------------------------// Float Math// Code for float compare:// fcompp();// fwait(); fnstsw_ax();// sahf();// movl(dst, unordered_result);// jcc(Assembler::parity, exit);// movl(dst, less_result);// jcc(Assembler::below, exit);// movl(dst, equal_result);// jcc(Assembler::equal, exit);// movl(dst, greater_result);// exit:// P6 version of float compare, sets condition codes in EFLAGSinstruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ predicate(VM_Version::supports_cmov() && UseSSE == 0); match(Set cr (CmpF src1 src2)); effect(KILL rax); ins_cost(150); format %{ "FLD $src1\n\t" "FUCOMIP ST,$src2 // P6 instruction\n\t" "JNP exit\n\t" "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" "SAHF\n" "exit:\tNOP // avoid branch to branch" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), cmpF_P6_fixup ); ins_pipe( pipe_slow );%}instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ predicate(VM_Version::supports_cmov() && UseSSE == 0); match(Set cr (CmpF src1 src2)); ins_cost(100); format %{ "FLD $src1\n\t" "FUCOMIP ST,$src2 // P6 instruction" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2)); ins_pipe( pipe_slow );%}// Compare & branchinstruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ predicate(UseSSE == 0); match(Set cr (CmpF src1 src2)); effect(KILL rax); ins_cost(200); format %{ "FLD $src1\n\t" "FCOMp $src2\n\t" "FNSTSW AX\n\t" "TEST AX,0x400\n\t" "JZ,s flags\n\t" "MOV AH,1\t# unordered treat as LT\n" "flags:\tSAHF" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), fpu_flags); ins_pipe( pipe_slow );%}// Compare vs zero into -1,0,1instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE == 0); match(Set dst (CmpF3 src1 zero)); effect(KILL cr, KILL rax); ins_cost(280); format %{ "FTSTF $dst,$src1" %} opcode(0xE4, 0xD9); ins_encode( Push_Reg_DPR(src1), OpcS, OpcP, PopFPU, CmpF_Result(dst)); ins_pipe( pipe_slow );%}// Compare into -1,0,1instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE == 0); match(Set dst (CmpF3 src1 src2)); effect(KILL cr, KILL rax); ins_cost(300); format %{ "FCMPF $dst,$src1,$src2" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), CmpF_Result(dst)); ins_pipe( pipe_slow );%}// float compare and set condition codes in EFLAGS by XMM regsinstruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ predicate(UseSSE>=1); match(Set cr (CmpF src1 src2)); ins_cost(145); format %{ "UCOMISS $src1,$src2\n\t" "JNP,s exit\n\t" "PUSHF\t# saw NaN, set CF\n\t" "AND [rsp], #0xffffff2b\n\t" "POPF\n" "exit:" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); emit_cmpfp_fixup(_masm); %} ins_pipe( pipe_slow );%}instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ predicate(UseSSE>=1); match(Set cr (CmpF src1 src2)); ins_cost(100); format %{ "UCOMISS $src1,$src2" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow );%}// float compare and set condition codes in EFLAGS by XMM regsinstruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ predicate(UseSSE>=1); match(Set cr (CmpF src1 (LoadF src2))); ins_cost(165); format %{ "UCOMISS $src1,$src2\n\t" "JNP,s exit\n\t" "PUSHF\t# saw NaN, set CF\n\t" "AND [rsp], #0xffffff2b\n\t" "POPF\n" "exit:" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$Address); emit_cmpfp_fixup(_masm); %} ins_pipe( pipe_slow );%}instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ predicate(UseSSE>=1); match(Set cr (CmpF src1 (LoadF src2))); ins_cost(100); format %{ "UCOMISS $src1,$src2" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$Address); %} ins_pipe( pipe_slow );%}// Compare into -1,0,1 in XMMinstruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ predicate(UseSSE>=1); match(Set dst (CmpF3 src1 src2)); effect(KILL cr); ins_cost(255); format %{ "UCOMISS $src1, $src2\n\t" "MOV $dst, #-1\n\t" "JP,s done\n\t" "JB,s done\n\t" "SETNE $dst\n\t" "MOVZB $dst, $dst\n" "done:" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); emit_cmpfp3(_masm, $dst$$Register); %} ins_pipe( pipe_slow );%}// Compare into -1,0,1 in XMM and memoryinstruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ predicate(UseSSE>=1); match(Set dst (CmpF3 src1 (LoadF src2))); effect(KILL cr); ins_cost(275); format %{ "UCOMISS $src1, $src2\n\t" "MOV $dst, #-1\n\t" "JP,s done\n\t" "JB,s done\n\t" "SETNE $dst\n\t" "MOVZB $dst, $dst\n" "done:" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$Address); emit_cmpfp3(_masm, $dst$$Register); %} ins_pipe( pipe_slow );%}// Spill to obtain 24-bit precisioninstruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (SubF src1 src2)); format %{ "FSUB $dst,$src1 - $src2" %} opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ ins_encode( Push_Reg_FPR(src1), OpcReg_FPR(src2), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg );%}//// This instruction does not round to 24-bitsinstruct subFPR_reg(regFPR dst, regFPR src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (SubF dst src)); format %{ "FSUB $dst,$src" %} opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ ins_encode( Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg );%}// Spill to obtain 24-bit precisioninstruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 src2)); format %{ "FADD $dst,$src1,$src2" %} opcode(0xD8, 0x0); /* D8 C0+i */ ins_encode( Push_Reg_FPR(src2), OpcReg_FPR(src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg );%}//// This instruction does not round to 24-bitsinstruct addFPR_reg(regFPR dst, regFPR src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF dst src)); format %{ "FLD $src\n\t" "FADDp $dst,ST" %} opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ ins_encode( Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg );%}instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ predicate(UseSSE==0); match(Set dst (AbsF src)); ins_cost(100); format %{ "FABS" %} opcode(0xE1, 0xD9); ins_encode( OpcS, OpcP ); ins_pipe( fpu_reg_reg );%}instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ predicate(UseSSE==0); match(Set dst (NegF src)); ins_cost(100); format %{ "FCHS" %} opcode(0xE0, 0xD9); ins_encode( OpcS, OpcP ); ins_pipe( fpu_reg_reg );%}// Cisc-alternate to addFPR_reg// Spill to obtain 24-bit precisioninstruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 (LoadF src2))); format %{ "FLD $src2\n\t" "FADD ST,$src1\n\t" "FSTP_S $dst" %} opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), OpcReg_FPR(src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_mem );%}//// Cisc-alternate to addFPR_reg// This instruction does not round to 24-bitsinstruct addFPR_reg_mem(regFPR dst, memory src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF dst (LoadF src))); format %{ "FADD $dst,$src" %} opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_mem );%}// // Following two instructions for _222_mpegaudio// Spill to obtain 24-bit precisioninstruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 src2)); format %{ "FADD $dst,$src1,$src2" %} opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), OpcReg_FPR(src2), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_mem );%}// Cisc-spill variant// Spill to obtain 24-bit precisioninstruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 (LoadF src2))); format %{ "FADD $dst,$src1,$src2 cisc" %} opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), set_instruction_start, OpcP, RMopc_Mem(secondary,src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_mem_mem );%}// Spill to obtain 24-bit precisioninstruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 src2)); format %{ "FADD $dst,$src1,$src2" %} opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), set_instruction_start, OpcP, RMopc_Mem(secondary,src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_mem_mem );%}// Spill to obtain 24-bit precisioninstruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src con)); format %{ "FLD $src\n\t" "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" "FSTP_S $dst" %} ins_encode %{ __ fld_s($src$$reg - 1); // FLD ST(i-1) __ fadd_s($constantaddress($con)); __ fstp_s(Address(rsp, $dst$$disp)); %} ins_pipe(fpu_mem_reg_con);%}//// This instruction does not round to 24-bitsinstruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF src con)); format %{ "FLD $src\n\t" "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" "FSTP $dst" %} ins_encode %{ __ fld_s($src$$reg - 1); // FLD ST(i-1) __ fadd_s($constantaddress($con)); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_reg_con);%}// Spill to obtain 24-bit precisioninstruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 src2)); format %{ "FLD $src1\n\t" "FMUL $src2\n\t" "FSTP_S $dst" %} opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ ins_encode( Push_Reg_FPR(src1), OpcReg_FPR(src2), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg );%}//// This instruction does not round to 24-bitsinstruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 src2)); format %{ "FLD $src1\n\t" "FMUL $src2\n\t" "FSTP_S $dst" %} opcode(0xD8, 0x1); /* D8 C8+i */ ins_encode( Push_Reg_FPR(src2), OpcReg_FPR(src1), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_reg );%}// Spill to obtain 24-bit precision// Cisc-alternate to reg-reg multiplyinstruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 (LoadF src2))); format %{ "FLD_S $src2\n\t" "FMUL $src1\n\t" "FSTP_S $dst" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), OpcReg_FPR(src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_mem );%}//// This instruction does not round to 24-bits// Cisc-alternate to reg-reg multiplyinstruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 (LoadF src2))); format %{ "FMUL $dst,$src1,$src2" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), OpcReg_FPR(src1), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_mem );%}// Spill to obtain 24-bit precisioninstruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 src2)); format %{ "FMUL $dst,$src1,$src2" %} opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), set_instruction_start, OpcP, RMopc_Mem(secondary,src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_mem_mem );%}// Spill to obtain 24-bit precisioninstruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src con)); format %{ "FLD $src\n\t" "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" "FSTP_S $dst" %} ins_encode %{ __ fld_s($src$$reg - 1); // FLD ST(i-1) __ fmul_s($constantaddress($con)); __ fstp_s(Address(rsp, $dst$$disp)); %} ins_pipe(fpu_mem_reg_con);%}//// This instruction does not round to 24-bitsinstruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF src con)); format %{ "FLD $src\n\t" "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" "FSTP $dst" %} ins_encode %{ __ fld_s($src$$reg - 1); // FLD ST(i-1) __ fmul_s($constantaddress($con)); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_reg_con);%}//// MACRO1 -- subsume unshared load into mulFPR// This instruction does not round to 24-bitsinstruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF (LoadF mem1) src)); format %{ "FLD $mem1 ===MACRO1===\n\t" "FMUL ST,$src\n\t" "FSTP $dst" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), OpcReg_FPR(src), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_mem );%}//// MACRO2 -- addFPR a mulFPR which subsumed an unshared load// This instruction does not round to 24-bitsinstruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); ins_cost(95); format %{ "FLD $mem1 ===MACRO2===\n\t" "FMUL ST,$src1 subsume mulFPR left load\n\t" "FADD ST,$src2\n\t" "FSTP $dst" %} opcode(0xD9); /* LoadF D9 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem1), FMul_ST_reg(src1), FAdd_ST_reg(src2), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem_reg_reg );%}// MACRO3 -- addFPR a mulFPR// This instruction does not round to 24-bits. It is a '2-address'// instruction in that the result goes back to src2. This eliminates// a move from the macro; possibly the register allocator will have// to add it back (and maybe not).instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set src2 (AddF (MulF src0 src1) src2)); format %{ "FLD $src0 ===MACRO3===\n\t" "FMUL ST,$src1\n\t" "FADDP $src2,ST" %} opcode(0xD9); /* LoadF D9 /0 */ ins_encode( Push_Reg_FPR(src0), FMul_ST_reg(src1), FAddP_reg_ST(src2) ); ins_pipe( fpu_reg_reg_reg );%}// MACRO4 -- divFPR subFPR// This instruction does not round to 24-bitsinstruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (DivF (SubF src2 src1) src3)); format %{ "FLD $src2 ===MACRO4===\n\t" "FSUB ST,$src1\n\t" "FDIV ST,$src3\n\t" "FSTP $dst" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ ins_encode( Push_Reg_FPR(src2), subFPR_divFPR_encode(src1,src3), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_reg_reg );%}// Spill to obtain 24-bit precisioninstruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (DivF src1 src2)); format %{ "FDIV $dst,$src1,$src2" %} opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ ins_encode( Push_Reg_FPR(src1), OpcReg_FPR(src2), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg );%}//// This instruction does not round to 24-bitsinstruct divFPR_reg(regFPR dst, regFPR src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (DivF dst src)); format %{ "FDIV $dst,$src" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ ins_encode( Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg );%}// Spill to obtain 24-bit precisioninstruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (ModF src1 src2)); effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS format %{ "FMOD $dst,$src1,$src2" %} ins_encode( Push_Reg_Mod_DPR(src1, src2), emitModDPR(), Push_Result_Mod_DPR(src2), Pop_Mem_FPR(dst)); ins_pipe( pipe_slow );%}//// This instruction does not round to 24-bitsinstruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (ModF dst src)); effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS format %{ "FMOD $dst,$src" %} ins_encode(Push_Reg_Mod_DPR(dst, src), emitModDPR(), Push_Result_Mod_DPR(src), Pop_Reg_FPR(dst)); ins_pipe( pipe_slow );%}instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE>=1); match(Set dst (ModF src0 src1)); effect(KILL rax, KILL cr); format %{ "SUB ESP,4\t # FMOD\n" "\tMOVSS [ESP+0],$src1\n" "\tFLD_S [ESP+0]\n" "\tMOVSS [ESP+0],$src0\n" "\tFLD_S [ESP+0]\n" "loop:\tFPREM\n" "\tFWAIT\n" "\tFNSTSW AX\n" "\tSAHF\n" "\tJP loop\n" "\tFSTP_S [ESP+0]\n" "\tMOVSS $dst,[ESP+0]\n" "\tADD ESP,4\n" "\tFSTP ST0\t # Restore FPU Stack" %} ins_cost(250); ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); ins_pipe( pipe_slow );%}//----------Arithmetic Conversion Instructions---------------------------------// The conversions operations are all Alpha sorted. Please keep it that way!instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (RoundFloat src)); ins_cost(125); format %{ "FST_S $dst,$src\t# F-round" %} ins_encode( Pop_Mem_Reg_FPR(dst, src) ); ins_pipe( fpu_mem_reg );%}instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (RoundDouble src)); ins_cost(125); format %{ "FST_D $dst,$src\t# D-round" %} ins_encode( Pop_Mem_Reg_DPR(dst, src) ); ins_pipe( fpu_mem_reg );%}// Force rounding to 24-bit precision and 6-bit exponentinstruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ predicate(UseSSE==0); match(Set dst (ConvD2F src)); format %{ "FST_S $dst,$src\t# F-round" %} expand %{ roundFloat_mem_reg(dst,src); %}%}// Force rounding to 24-bit precision and 6-bit exponentinstruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ predicate(UseSSE==1); match(Set dst (ConvD2F src)); effect( KILL cr ); format %{ "SUB ESP,4\n\t" "FST_S [ESP],$src\t# F-round\n\t" "MOVSS $dst,[ESP]\n\t" "ADD ESP,4" %} ins_encode %{ __ subptr(rsp, 4); if ($src$$reg != FPR1L_enc) { __ fld_s($src$$reg-1); __ fstp_s(Address(rsp, 0)); } else { __ fst_s(Address(rsp, 0)); } __ movflt($dst$$XMMRegister, Address(rsp, 0)); __ addptr(rsp, 4); %} ins_pipe( pipe_slow );%}// Force rounding double precision to single precisioninstruct convD2F_reg(regF dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (ConvD2F src)); format %{ "CVTSD2SS $dst,$src\t# F-round" %} ins_encode %{ __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow );%}instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (ConvF2D src)); format %{ "FST_S $dst,$src\t# D-round" %} ins_encode( Pop_Reg_Reg_DPR(dst, src)); ins_pipe( fpu_reg_reg );%}instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ predicate(UseSSE==1); match(Set dst (ConvF2D src)); format %{ "FST_D $dst,$src\t# D-round" %} expand %{ roundDouble_mem_reg(dst,src); %}%}instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ predicate(UseSSE==1); match(Set dst (ConvF2D src)); effect( KILL cr ); format %{ "SUB ESP,4\n\t" "MOVSS [ESP] $src\n\t" "FLD_S [ESP]\n\t" "ADD ESP,4\n\t" "FSTP $dst\t# D-round" %} ins_encode %{ __ subptr(rsp, 4); __ movflt(Address(rsp, 0), $src$$XMMRegister); __ fld_s(Address(rsp, 0)); __ addptr(rsp, 4); __ fstp_d($dst$$reg); %} ins_pipe( pipe_slow );%}instruct convF2D_reg(regD dst, regF src) %{ predicate(UseSSE>=2); match(Set dst (ConvF2D src)); format %{ "CVTSS2SD $dst,$src\t# D-round" %} ins_encode %{ __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow );%}// Convert a double to an int. If the double is a NAN, stuff a zero in instead.instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ predicate(UseSSE<=1); match(Set dst (ConvD2I src)); effect( KILL tmp, KILL cr ); format %{ "FLD $src\t# Convert double to int \n\t" "FLDCW trunc mode\n\t" "SUB ESP,4\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "CMP EAX,0x80000000\n\t" "JNE,s fast\n\t" "FLD_D $src\n\t" "CALL d2i_wrapper\n" "fast:" %} ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); ins_pipe( pipe_slow );%}// Convert a double to an int. If the double is a NAN, stuff a zero in instead.instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ predicate(UseSSE>=2); match(Set dst (ConvD2I src)); effect( KILL tmp, KILL cr ); format %{ "CVTTSD2SI $dst, $src\n\t" "CMP $dst,0x80000000\n\t" "JNE,s fast\n\t" "SUB ESP, 8\n\t" "MOVSD [ESP], $src\n\t" "FLD_D [ESP]\n\t" "ADD ESP, 8\n\t" "CALL d2i_wrapper\n" "fast:" %} ins_encode %{ Label fast; __ cvttsd2sil($dst$$Register, $src$$XMMRegister); __ cmpl($dst$$Register, 0x80000000); __ jccb(Assembler::notEqual, fast); __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); __ addptr(rsp, 8); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); __ bind(fast); %} ins_pipe( pipe_slow );%}instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ predicate(UseSSE<=1); match(Set dst (ConvD2L src)); effect( KILL cr ); format %{ "FLD $src\t# Convert double to long\n\t" "FLDCW trunc mode\n\t" "SUB ESP,8\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "POP EDX\n\t" "CMP EDX,0x80000000\n\t" "JNE,s fast\n\t" "TEST EAX,EAX\n\t" "JNE,s fast\n\t" "FLD $src\n\t" "CALL d2l_wrapper\n" "fast:" %} ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); ins_pipe( pipe_slow );%}// XMM lacks a float/double->long conversion, so use the old FPU stack.instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ predicate (UseSSE>=2); match(Set dst (ConvD2L src)); effect( KILL cr ); format %{ "SUB ESP,8\t# Convert double to long\n\t" "MOVSD [ESP],$src\n\t" "FLD_D [ESP]\n\t" "FLDCW trunc mode\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "POP EDX\n\t" "CMP EDX,0x80000000\n\t" "JNE,s fast\n\t" "TEST EAX,EAX\n\t" "JNE,s fast\n\t" "SUB ESP,8\n\t" "MOVSD [ESP],$src\n\t" "FLD_D [ESP]\n\t" "ADD ESP,8\n\t" "CALL d2l_wrapper\n" "fast:" %} ins_encode %{ Label fast; __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); __ fistp_d(Address(rsp, 0)); // Restore the rounding mode, mask the exception if (Compile::current()->in_24_bit_fp_mode()) { __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); } else { __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); } // Load the converted long, adjust CPU stack __ pop(rax); __ pop(rdx); __ cmpl(rdx, 0x80000000); __ jccb(Assembler::notEqual, fast); __ testl(rax, rax); __ jccb(Assembler::notEqual, fast); __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); __ addptr(rsp, 8); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); __ bind(fast); %} ins_pipe( pipe_slow );%}// Convert a double to an int. Java semantics require we do complex// manglations in the corner cases. So we set the rounding mode to// 'zero', store the darned double down as an int, and reset the// rounding mode to 'nearest'. The hardware stores a flag value down// if we would overflow or converted a NAN; we check for this and// and go the slow path if needed.instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ predicate(UseSSE==0); match(Set dst (ConvF2I src)); effect( KILL tmp, KILL cr ); format %{ "FLD $src\t# Convert float to int \n\t" "FLDCW trunc mode\n\t" "SUB ESP,4\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "CMP EAX,0x80000000\n\t" "JNE,s fast\n\t" "FLD $src\n\t" "CALL d2i_wrapper\n" "fast:" %} // DPR2I_encoding works for FPR2I ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); ins_pipe( pipe_slow );%}// Convert a float in xmm to an int reg.instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ predicate(UseSSE>=1); match(Set dst (ConvF2I src)); effect( KILL tmp, KILL cr ); format %{ "CVTTSS2SI $dst, $src\n\t" "CMP $dst,0x80000000\n\t" "JNE,s fast\n\t" "SUB ESP, 4\n\t" "MOVSS [ESP], $src\n\t" "FLD [ESP]\n\t" "ADD ESP, 4\n\t" "CALL d2i_wrapper\n" "fast:" %} ins_encode %{ Label fast; __ cvttss2sil($dst$$Register, $src$$XMMRegister); __ cmpl($dst$$Register, 0x80000000); __ jccb(Assembler::notEqual, fast); __ subptr(rsp, 4); __ movflt(Address(rsp, 0), $src$$XMMRegister); __ fld_s(Address(rsp, 0)); __ addptr(rsp, 4); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); __ bind(fast); %} ins_pipe( pipe_slow );%}instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ predicate(UseSSE==0); match(Set dst (ConvF2L src)); effect( KILL cr ); format %{ "FLD $src\t# Convert float to long\n\t" "FLDCW trunc mode\n\t" "SUB ESP,8\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "POP EDX\n\t" "CMP EDX,0x80000000\n\t" "JNE,s fast\n\t" "TEST EAX,EAX\n\t" "JNE,s fast\n\t" "FLD $src\n\t" "CALL d2l_wrapper\n" "fast:" %} // DPR2L_encoding works for FPR2L ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); ins_pipe( pipe_slow );%}// XMM lacks a float/double->long conversion, so use the old FPU stack.instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ predicate (UseSSE>=1); match(Set dst (ConvF2L src)); effect( KILL cr ); format %{ "SUB ESP,8\t# Convert float to long\n\t" "MOVSS [ESP],$src\n\t" "FLD_S [ESP]\n\t" "FLDCW trunc mode\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "POP EDX\n\t" "CMP EDX,0x80000000\n\t" "JNE,s fast\n\t" "TEST EAX,EAX\n\t" "JNE,s fast\n\t" "SUB ESP,4\t# Convert float to long\n\t" "MOVSS [ESP],$src\n\t" "FLD_S [ESP]\n\t" "ADD ESP,4\n\t" "CALL d2l_wrapper\n" "fast:" %} ins_encode %{ Label fast; __ subptr(rsp, 8); __ movflt(Address(rsp, 0), $src$$XMMRegister); __ fld_s(Address(rsp, 0)); __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); __ fistp_d(Address(rsp, 0)); // Restore the rounding mode, mask the exception if (Compile::current()->in_24_bit_fp_mode()) { __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); } else { __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); } // Load the converted long, adjust CPU stack __ pop(rax); __ pop(rdx); __ cmpl(rdx, 0x80000000); __ jccb(Assembler::notEqual, fast); __ testl(rax, rax); __ jccb(Assembler::notEqual, fast); __ subptr(rsp, 4); __ movflt(Address(rsp, 0), $src$$XMMRegister); __ fld_s(Address(rsp, 0)); __ addptr(rsp, 4); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); __ bind(fast); %} ins_pipe( pipe_slow );%}instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ predicate( UseSSE<=1 ); match(Set dst (ConvI2D src)); format %{ "FILD $src\n\t" "FSTP $dst" %} opcode(0xDB, 0x0); /* DB /0 */ ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); ins_pipe( fpu_reg_mem );%}instruct convI2D_reg(regD dst, rRegI src) %{ predicate( UseSSE>=2 && !UseXmmI2D ); match(Set dst (ConvI2D src)); format %{ "CVTSI2SD $dst,$src" %} ins_encode %{ __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); %} ins_pipe( pipe_slow );%}instruct convI2D_mem(regD dst, memory mem) %{ predicate( UseSSE>=2 ); match(Set dst (ConvI2D (LoadI mem))); format %{ "CVTSI2SD $dst,$mem" %} ins_encode %{ __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow );%}instruct convXI2D_reg(regD dst, rRegI src)%{ predicate( UseSSE>=2 && UseXmmI2D ); match(Set dst (ConvI2D src)); format %{ "MOVD $dst,$src\n\t" "CVTDQ2PD $dst,$dst\t# i2d" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe(pipe_slow); // XXX%}instruct convI2DPR_mem(regDPR dst, memory mem) %{ predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2D (LoadI mem))); format %{ "FILD $mem\n\t" "FSTP $dst" %} opcode(0xDB); /* DB /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Pop_Reg_DPR(dst)); ins_pipe( fpu_reg_mem );%}// Convert a byte to a float; no rounding step needed.instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); match(Set dst (ConvI2F src)); format %{ "FILD $src\n\t" "FSTP $dst" %} opcode(0xDB, 0x0); /* DB /0 */ ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); ins_pipe( fpu_reg_mem );%}// In 24-bit mode, force exponent rounding by storing back outinstruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F src)); ins_cost(200); format %{ "FILD $src\n\t" "FSTP_S $dst" %} opcode(0xDB, 0x0); /* DB /0 */ ins_encode( Push_Mem_I(src), Pop_Mem_FPR(dst)); ins_pipe( fpu_mem_mem );%}// In 24-bit mode, force exponent rounding by storing back outinstruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F (LoadI mem))); ins_cost(200); format %{ "FILD $mem\n\t" "FSTP_S $dst" %} opcode(0xDB); /* DB /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Pop_Mem_FPR(dst)); ins_pipe( fpu_mem_mem );%}// This instruction does not round to 24-bitsinstruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F src)); format %{ "FILD $src\n\t" "FSTP $dst" %} opcode(0xDB, 0x0); /* DB /0 */ ins_encode( Push_Mem_I(src), Pop_Reg_FPR(dst)); ins_pipe( fpu_reg_mem );%}// This instruction does not round to 24-bitsinstruct convI2FPR_mem(regFPR dst, memory mem) %{ predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F (LoadI mem))); format %{ "FILD $mem\n\t" "FSTP $dst" %} opcode(0xDB); /* DB /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Pop_Reg_FPR(dst)); ins_pipe( fpu_reg_mem );%}// Convert an int to a float in xmm; no rounding step needed.instruct convI2F_reg(regF dst, rRegI src) %{ predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); match(Set dst (ConvI2F src)); format %{ "CVTSI2SS $dst, $src" %} ins_encode %{ __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); %} ins_pipe( pipe_slow );%} instruct convXI2F_reg(regF dst, rRegI src)%{ predicate( UseSSE>=2 && UseXmmI2F ); match(Set dst (ConvI2F src)); format %{ "MOVD $dst,$src\n\t" "CVTDQ2PS $dst,$dst\t# i2f" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe(pipe_slow); // XXX%}instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ match(Set dst (ConvI2L src)); effect(KILL cr); ins_cost(375); format %{ "MOV $dst.lo,$src\n\t" "MOV $dst.hi,$src\n\t" "SAR $dst.hi,31" %} ins_encode(convert_int_long(dst,src)); ins_pipe( ialu_reg_reg_long );%}// Zero-extend convert int to longinstruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ match(Set dst (AndL (ConvI2L src) mask) ); effect( KILL flags ); ins_cost(250); format %{ "MOV $dst.lo,$src\n\t" "XOR $dst.hi,$dst.hi" %} opcode(0x33); // XOR ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); ins_pipe( ialu_reg_reg_long );%}// Zero-extend longinstruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ match(Set dst (AndL src mask) ); effect( KILL flags ); ins_cost(250); format %{ "MOV $dst.lo,$src.lo\n\t" "XOR $dst.hi,$dst.hi\n\t" %} opcode(0x33); // XOR ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); ins_pipe( ialu_reg_reg_long );%}instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ predicate (UseSSE<=1); match(Set dst (ConvL2D src)); effect( KILL cr ); format %{ "PUSH $src.hi\t# Convert long to double\n\t" "PUSH $src.lo\n\t" "FILD ST,[ESP + #0]\n\t" "ADD ESP,8\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xDF, 0x5); /* DF /5 */ ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); ins_pipe( pipe_slow );%}instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (ConvL2D src)); effect( KILL cr ); format %{ "PUSH $src.hi\t# Convert long to double\n\t" "PUSH $src.lo\n\t" "FILD_D [ESP]\n\t" "FSTP_D [ESP]\n\t" "MOVSD $dst,[ESP]\n\t" "ADD ESP,8" %} opcode(0xDF, 0x5); /* DF /5 */ ins_encode(convert_long_double2(src), Push_ResultD(dst)); ins_pipe( pipe_slow );%}instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ predicate (UseSSE>=1); match(Set dst (ConvL2F src)); effect( KILL cr ); format %{ "PUSH $src.hi\t# Convert long to single float\n\t" "PUSH $src.lo\n\t" "FILD_D [ESP]\n\t" "FSTP_S [ESP]\n\t" "MOVSS $dst,[ESP]\n\t" "ADD ESP,8" %} opcode(0xDF, 0x5); /* DF /5 */ ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); ins_pipe( pipe_slow );%}instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ match(Set dst (ConvL2F src)); effect( KILL cr ); format %{ "PUSH $src.hi\t# Convert long to single float\n\t" "PUSH $src.lo\n\t" "FILD ST,[ESP + #0]\n\t" "ADD ESP,8\n\t" "FSTP_S $dst\t# F-round" %} opcode(0xDF, 0x5); /* DF /5 */ ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); ins_pipe( pipe_slow );%}instruct convL2I_reg( rRegI dst, eRegL src ) %{ match(Set dst (ConvL2I src)); effect( DEF dst, USE src ); format %{ "MOV $dst,$src.lo" %} ins_encode(enc_CopyL_Lo(dst,src)); ins_pipe( ialu_reg_reg );%}instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(100); format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} ins_encode %{ __ movl($dst$$Register, Address(rsp, $src$$disp)); %} ins_pipe( ialu_reg_mem );%}instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(125); format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} ins_encode( Pop_Mem_Reg_FPR(dst, src) ); ins_pipe( fpu_mem_reg );%}instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ predicate(UseSSE>=1); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(95); format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} ins_encode %{ __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); %} ins_pipe( pipe_slow );%}instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ predicate(UseSSE>=2); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(85); format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %} ins_pipe( pipe_slow );%}instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); ins_cost(100); format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} ins_encode %{ __ movl(Address(rsp, $dst$$disp), $src$$Register); %} ins_pipe( ialu_mem_reg );%}instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ predicate(UseSSE==0); match(Set dst (MoveI2F src)); effect(DEF dst, USE src); ins_cost(125); format %{ "FLD_S $src\n\t" "FSTP $dst\t# MoveI2F_stack_reg" %} opcode(0xD9); /* D9 /0, FLD m32real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem );%}instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ predicate(UseSSE>=1); match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); ins_cost(95); format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} ins_encode %{ __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); %} ins_pipe( pipe_slow );%}instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ predicate(UseSSE>=2); match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); ins_cost(85); format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %} ins_pipe( pipe_slow );%}instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ match(Set dst (MoveD2L src)); effect(DEF dst, USE src); ins_cost(250); format %{ "MOV $dst.lo,$src\n\t" "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} opcode(0x8B, 0x8B); ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); ins_pipe( ialu_mem_long_reg );%}instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (MoveD2L src)); effect(DEF dst, USE src); ins_cost(125); format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} ins_encode( Pop_Mem_Reg_DPR(dst, src) ); ins_pipe( fpu_mem_reg );%}instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (MoveD2L src)); effect(DEF dst, USE src); ins_cost(95); format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} ins_encode %{ __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); %} ins_pipe( pipe_slow );%}instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ predicate(UseSSE>=2); match(Set dst (MoveD2L src)); effect(DEF dst, USE src, TEMP tmp); ins_cost(85); format %{ "MOVD $dst.lo,$src\n\t" "PSHUFLW $tmp,$src,0x4E\n\t" "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); %} ins_pipe( pipe_slow );%}instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ match(Set dst (MoveL2D src)); effect(DEF dst, USE src); ins_cost(200); format %{ "MOV $dst,$src.lo\n\t" "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} opcode(0x89, 0x89); ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); ins_pipe( ialu_mem_long_reg );%}instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ predicate(UseSSE<=1); match(Set dst (MoveL2D src)); effect(DEF dst, USE src); ins_cost(125); format %{ "FLD_D $src\n\t" "FSTP $dst\t# MoveL2D_stack_reg" %} opcode(0xDD); /* DD /0, FLD m64real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_mem );%}instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); match(Set dst (MoveL2D src)); effect(DEF dst, USE src); ins_cost(95); format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} ins_encode %{ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); %} ins_pipe( pipe_slow );%}instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); match(Set dst (MoveL2D src)); effect(DEF dst, USE src); ins_cost(95); format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} ins_encode %{ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); %} ins_pipe( pipe_slow );%}instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ predicate(UseSSE>=2); match(Set dst (MoveL2D src)); effect(TEMP dst, USE src, TEMP tmp); ins_cost(85); format %{ "MOVD $dst,$src.lo\n\t" "MOVD $tmp,$src.hi\n\t" "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow );%}// =======================================================================// fast clearing of an arrayinstruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ predicate(!UseFastStosb); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); format %{ "XOR EAX,EAX\t# ClearArray:\n\t" "SHL ECX,1\t# Convert doublewords to words\n\t" "REP STOS\t# store EAX into [EDI++] while ECX--" %} ins_encode %{ __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); %} ins_pipe( pipe_slow );%}instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ predicate(UseFastStosb); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); format %{ "XOR EAX,EAX\t# ClearArray:\n\t" "SHL ECX,3\t# Convert doublewords to bytes\n\t" "REP STOSB\t# store EAX into [EDI++] while ECX--" %} ins_encode %{ __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); %} ins_pipe( pipe_slow );%}instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, eAXRegI result, regD tmp1, eFlagsReg cr) %{ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} ins_encode %{ __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, $result$$Register, $tmp1$$XMMRegister); %} ins_pipe( pipe_slow );%}// fast string equalsinstruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ match(Set result (StrEquals (Binary str1 str2) cnt)); effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} ins_encode %{ __ char_arrays_equals(false, $str1$$Register, $str2$$Register, $cnt$$Register, $result$$Register, $tmp3$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow );%}// fast search of substring with known size.instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} ins_encode %{ int icnt2 = (int)$int_cnt2$$constant; if (icnt2 >= 8) { // IndexOf for constant substrings with size >= 8 elements // which don't need to be loaded through stack. __ string_indexofC8($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, icnt2, $result$$Register, $vec$$XMMRegister, $tmp$$Register); } else { // Small strings are loaded through stack if they cross page boundary. __ string_indexof($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, icnt2, $result$$Register, $vec$$XMMRegister, $tmp$$Register); } %} ins_pipe( pipe_slow );%}instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} ins_encode %{ __ string_indexof($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, (-1), $result$$Register, $vec$$XMMRegister, $tmp$$Register); %} ins_pipe( pipe_slow );%}// fast array equalsinstruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)%{ match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} ins_encode %{ __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, $tmp3$$Register, $result$$Register, $tmp4$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow );%}// encode char[] to byte[] in ISO_8859_1instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} ins_encode %{ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); %} ins_pipe( pipe_slow );%}//----------Control Flow Instructions------------------------------------------// Signed compare Instructionsinstruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ match(Set cr (CmpI op1 op2)); effect( DEF cr, USE op1, USE op2 ); format %{ "CMP $op1,$op2" %} opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegReg( op1, op2) ); ins_pipe( ialu_cr_reg_reg );%}instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ match(Set cr (CmpI op1 op2)); effect( DEF cr, USE op1 ); format %{ "CMP $op1,$op2" %} opcode(0x81,0x07); /* Opcode 81 /7 */ // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); ins_pipe( ialu_cr_reg_imm );%}// Cisc-spilled version of cmpI_eReginstruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ match(Set cr (CmpI op1 (LoadI op2))); format %{ "CMP $op1,$op2" %} ins_cost(500); opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegMem( op1, op2) ); ins_pipe( ialu_cr_reg_mem );%}instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ match(Set cr (CmpI src zero)); effect( DEF cr, USE src ); format %{ "TEST $src,$src" %} opcode(0x85); ins_encode( OpcP, RegReg( src, src ) ); ins_pipe( ialu_cr_reg_imm );%}instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ match(Set cr (CmpI (AndI src con) zero)); format %{ "TEST $src,$con" %} opcode(0xF7,0x00); ins_encode( OpcP, RegOpc(src), Con32(con) ); ins_pipe( ialu_cr_reg_imm );%}instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ match(Set cr (CmpI (AndI src mem) zero)); format %{ "TEST $src,$mem" %} opcode(0x85); ins_encode( OpcP, RegMem( src, mem ) ); ins_pipe( ialu_cr_reg_mem );%}// Unsigned compare Instructions; really, same as signed except they// produce an eFlagsRegU instead of eFlagsReg.instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ match(Set cr (CmpU op1 op2)); format %{ "CMPu $op1,$op2" %} opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegReg( op1, op2) ); ins_pipe( ialu_cr_reg_reg );%}instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ match(Set cr (CmpU op1 op2)); format %{ "CMPu $op1,$op2" %} opcode(0x81,0x07); /* Opcode 81 /7 */ ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); ins_pipe( ialu_cr_reg_imm );%}// // Cisc-spilled version of cmpU_eReginstruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ match(Set cr (CmpU op1 (LoadI op2))); format %{ "CMPu $op1,$op2" %} ins_cost(500); opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegMem( op1, op2) ); ins_pipe( ialu_cr_reg_mem );%}// // Cisc-spilled version of cmpU_eReg//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{// match(Set cr (CmpU (LoadI op1) op2));//// format %{ "CMPu $op1,$op2" %}// ins_cost(500);// opcode(0x39); /* Opcode 39 /r */// ins_encode( OpcP, RegMem( op1, op2) );//%}instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ match(Set cr (CmpU src zero)); format %{ "TESTu $src,$src" %} opcode(0x85); ins_encode( OpcP, RegReg( src, src ) ); ins_pipe( ialu_cr_reg_imm );%}// Unsigned pointer compare Instructionsinstruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ match(Set cr (CmpP op1 op2)); format %{ "CMPu $op1,$op2" %} opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegReg( op1, op2) ); ins_pipe( ialu_cr_reg_reg );%}instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ match(Set cr (CmpP op1 op2)); format %{ "CMPu $op1,$op2" %} opcode(0x81,0x07); /* Opcode 81 /7 */ ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); ins_pipe( ialu_cr_reg_imm );%}// // Cisc-spilled version of cmpP_eReginstruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ match(Set cr (CmpP op1 (LoadP op2))); format %{ "CMPu $op1,$op2" %} ins_cost(500); opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegMem( op1, op2) ); ins_pipe( ialu_cr_reg_mem );%}// // Cisc-spilled version of cmpP_eReg//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{// match(Set cr (CmpP (LoadP op1) op2));//// format %{ "CMPu $op1,$op2" %}// ins_cost(500);// opcode(0x39); /* Opcode 39 /r */// ins_encode( OpcP, RegMem( op1, op2) );//%}// Compare raw pointer (used in out-of-heap check).// Only works because non-oop pointers must be raw pointers// and raw pointers have no anti-dependencies.instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); match(Set cr (CmpP op1 (LoadP op2))); format %{ "CMPu $op1,$op2" %} opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegMem( op1, op2) ); ins_pipe( ialu_cr_reg_mem );%}//// This will generate a signed flags result. This should be ok// since any compare to a zero should be eq/neq.instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ match(Set cr (CmpP src zero)); format %{ "TEST $src,$src" %} opcode(0x85); ins_encode( OpcP, RegReg( src, src ) ); ins_pipe( ialu_cr_reg_imm );%}// Cisc-spilled version of testP_reg// This will generate a signed flags result. This should be ok// since any compare to a zero should be eq/neq.instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ match(Set cr (CmpP (LoadP op) zero)); format %{ "TEST $op,0xFFFFFFFF" %} ins_cost(500); opcode(0xF7); /* Opcode F7 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); ins_pipe( ialu_cr_reg_imm );%}// Yanked all unsigned pointer compare operations.// Pointer compares are done with CmpP which is already unsigned.//----------Max and Min--------------------------------------------------------// Min Instructions////// *** Min and Max using the conditional move are slower than the// *** branch version on a Pentium III.// // Conditional move for min//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{// effect( USE_DEF op2, USE op1, USE cr );// format %{ "CMOVlt $op2,$op1\t! min" %}// opcode(0x4C,0x0F);// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );// ins_pipe( pipe_cmov_reg );//%}////// Min Register with Register (P6 version)//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{// predicate(VM_Version::supports_cmov() );// match(Set op2 (MinI op1 op2));// ins_cost(200);// expand %{// eFlagsReg cr;// compI_eReg(cr,op1,op2);// cmovI_reg_lt(op2,op1,cr);// %}//%}// Min Register with Register (generic version)instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ match(Set dst (MinI dst src)); effect(KILL flags); ins_cost(300); format %{ "MIN $dst,$src" %} opcode(0xCC); ins_encode( min_enc(dst,src) ); ins_pipe( pipe_slow );%}// Max Register with Register// *** Min and Max using the conditional move are slower than the// *** branch version on a Pentium III.// // Conditional move for max//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{// effect( USE_DEF op2, USE op1, USE cr );// format %{ "CMOVgt $op2,$op1\t! max" %}// opcode(0x4F,0x0F);// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );// ins_pipe( pipe_cmov_reg );//%}//// // Max Register with Register (P6 version)//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{// predicate(VM_Version::supports_cmov() );// match(Set op2 (MaxI op1 op2));// ins_cost(200);// expand %{// eFlagsReg cr;// compI_eReg(cr,op1,op2);// cmovI_reg_gt(op2,op1,cr);// %}//%}// Max Register with Register (generic version)instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ match(Set dst (MaxI dst src)); effect(KILL flags); ins_cost(300); format %{ "MAX $dst,$src" %} opcode(0xCC); ins_encode( max_enc(dst,src) ); ins_pipe( pipe_slow );%}// ============================================================================// Counted Loop limit node which represents exact final iterator value.// Note: the resulting value should fit into integer range since// counted loops have limit check on overflow.instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ match(Set limit (LoopLimit (Binary init limit) stride)); effect(TEMP limit_hi, TEMP tmp, KILL flags); ins_cost(300); format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} ins_encode %{ int strd = (int)$stride$$constant; assert(strd != 1 && strd != -1, "sanity"); int m1 = (strd > 0) ? 1 : -1; // Convert limit to long (EAX:EDX) __ cdql(); // Convert init to long (init:tmp) __ movl($tmp$$Register, $init$$Register); __ sarl($tmp$$Register, 31); // $limit - $init __ subl($limit$$Register, $init$$Register); __ sbbl($limit_hi$$Register, $tmp$$Register); // + ($stride - 1) if (strd > 0) { __ addl($limit$$Register, (strd - 1)); __ adcl($limit_hi$$Register, 0); __ movl($tmp$$Register, strd); } else { __ addl($limit$$Register, (strd + 1)); __ adcl($limit_hi$$Register, -1); __ lneg($limit_hi$$Register, $limit$$Register); __ movl($tmp$$Register, -strd); } // signed devision: (EAX:EDX) / pos_stride __ idivl($tmp$$Register); if (strd < 0) { // restore sign __ negl($tmp$$Register); } // (EAX) * stride __ mull($tmp$$Register); // + init (ignore upper bits) __ addl($limit$$Register, $init$$Register); %} ins_pipe( pipe_slow );%}// ============================================================================// Branch Instructions// Jump Tableinstruct jumpXtnd(rRegI switch_val) %{ match(Jump switch_val); ins_cost(350); format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} ins_encode %{ // Jump to Address(table_base + switch_reg) Address index(noreg, $switch_val$$Register, Address::times_1); __ jump(ArrayAddress($constantaddress, index)); %} ins_pipe(pipe_jmp);%}// Jump Direct - Label defines a relative address from JMP+1instruct jmpDir(label labl) %{ match(Goto); effect(USE labl); ins_cost(300); format %{ "JMP $labl" %} size(5); ins_encode %{ Label* L = $labl$$label; __ jmp(*L, false); // Always long jump %} ins_pipe( pipe_jmp );%}// Jump Direct Conditional - Label defines a relative address from Jcc+1instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ match(If cop cr); effect(USE labl); ins_cost(300); format %{ "J$cop $labl" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe( pipe_jcc );%}// Jump Direct Conditional - Label defines a relative address from Jcc+1instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ match(CountedLoopEnd cop cr); effect(USE labl); ins_cost(300); format %{ "J$cop $labl\t# Loop end" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe( pipe_jcc );%}// Jump Direct Conditional - Label defines a relative address from Jcc+1instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ match(CountedLoopEnd cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,u $labl\t# Loop end" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe( pipe_jcc );%}instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ match(CountedLoopEnd cop cmp); effect(USE labl); ins_cost(200); format %{ "J$cop,u $labl\t# Loop end" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe( pipe_jcc );%}// Jump Direct Conditional - using unsigned comparisoninstruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,u $labl" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe(pipe_jcc);%}instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(200); format %{ "J$cop,u $labl" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe(pipe_jcc);%}instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(200); format %{ $$template if ($cop$$cmpcode == Assembler::notEqual) { $$emit$$"JP,u $labl\n\t" $$emit$$"J$cop,u $labl" } else { $$emit$$"JP,u done\n\t" $$emit$$"J$cop,u $labl\n\t" $$emit$$"done:" } %} ins_encode %{ Label* l = $labl$$label; if ($cop$$cmpcode == Assembler::notEqual) { __ jcc(Assembler::parity, *l, false); __ jcc(Assembler::notEqual, *l, false); } else if ($cop$$cmpcode == Assembler::equal) { Label done; __ jccb(Assembler::parity, done); __ jcc(Assembler::equal, *l, false); __ bind(done); } else { ShouldNotReachHere(); } %} ins_pipe(pipe_jcc);%}// ============================================================================// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass// array for an instance of the superklass. Set a hidden internal cache on a// hit (cache is checked with exposed code in gen_subtype_check()). Return// NZ for a miss or zero for a hit. The encoding ALSO sets flags.instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ match(Set result (PartialSubtypeCheck sub super)); effect( KILL rcx, KILL cr ); ins_cost(1100); // slightly larger than the next version format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" "JNE,s miss\t\t# Missed: EDI not-zero\n\t" "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" "XOR $result,$result\t\t Hit: EDI zero\n\t" "miss:\t" %} opcode(0x1); // Force a XOR of EDI ins_encode( enc_PartialSubtypeCheck() ); ins_pipe( pipe_slow );%}instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); effect( KILL rcx, KILL result ); ins_cost(1000); format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" "JNE,s miss\t\t# Missed: flags NZ\n\t" "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" "miss:\t" %} opcode(0x0); // No need to XOR EDI ins_encode( enc_PartialSubtypeCheck() ); ins_pipe( pipe_slow );%}// ============================================================================// Branch Instructions -- short offset versions//// These instructions are used to replace jumps of a long offset (the default// match) with jumps of a shorter offset. These instructions are all tagged// with the ins_short_branch attribute, which causes the ADLC to suppress the// match rules in general matching. Instead, the ADLC generates a conversion// method in the MachNode which can be used to do in-place replacement of the// long variant with the shorter variant. The compiler will determine if a// branch can be taken by the is_short_branch_offset() predicate in the machine// specific code section of the file.// Jump Direct - Label defines a relative address from JMP+1instruct jmpDir_short(label labl) %{ match(Goto); effect(USE labl); ins_cost(300); format %{ "JMP,s $labl" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jmpb(*L); %} ins_pipe( pipe_jmp ); ins_short_branch(1);%}// Jump Direct Conditional - Label defines a relative address from Jcc+1instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ match(If cop cr); effect(USE labl); ins_cost(300); format %{ "J$cop,s $labl" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1);%}// Jump Direct Conditional - Label defines a relative address from Jcc+1instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ match(CountedLoopEnd cop cr); effect(USE labl); ins_cost(300); format %{ "J$cop,s $labl\t# Loop end" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1);%}// Jump Direct Conditional - Label defines a relative address from Jcc+1instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ match(CountedLoopEnd cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,us $labl\t# Loop end" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1);%}instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ match(CountedLoopEnd cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,us $labl\t# Loop end" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1);%}// Jump Direct Conditional - using unsigned comparisoninstruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,us $labl" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1);%}instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,us $labl" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1);%}instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(300); format %{ $$template if ($cop$$cmpcode == Assembler::notEqual) { $$emit$$"JP,u,s $labl\n\t" $$emit$$"J$cop,u,s $labl" } else { $$emit$$"JP,u,s done\n\t" $$emit$$"J$cop,u,s $labl\n\t" $$emit$$"done:" } %} size(4); ins_encode %{ Label* l = $labl$$label; if ($cop$$cmpcode == Assembler::notEqual) { __ jccb(Assembler::parity, *l); __ jccb(Assembler::notEqual, *l); } else if ($cop$$cmpcode == Assembler::equal) { Label done; __ jccb(Assembler::parity, done); __ jccb(Assembler::equal, *l); __ bind(done); } else { ShouldNotReachHere(); } %} ins_pipe(pipe_jcc); ins_short_branch(1);%}// ============================================================================// Long Compare//// Currently we hold longs in 2 registers. Comparing such values efficiently// is tricky. The flavor of compare used depends on whether we are testing// for LT, LE, or EQ. For a simple LT test we can check just the sign bit.// The GE test is the negated LT test. The LE test can be had by commuting// the operands (yielding a GE test) and then negating; negate again for the// GT test. The EQ test is done by ORcc'ing the high and low halves, and the// NE test is negated from that.// Due to a shortcoming in the ADLC, it mixes up expressions like:// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the// difference between 'Y' and '0L'. The tree-matches for the CmpI sections// are collapsed internally in the ADLC's dfa-gen code. The match for// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the// foo match ends up with the wrong leaf. One fix is to not match both// reg-reg and reg-zero forms of long-compare. This is unfortunate because// both forms beat the trinary form of long-compare and both are very useful// on Intel which has so few registers.// Manifest a CmpL result in an integer register. Very painful.// This is the test to avoid.instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ match(Set dst (CmpL3 src1 src2)); effect( KILL flags ); ins_cost(1000); format %{ "XOR $dst,$dst\n\t" "CMP $src1.hi,$src2.hi\n\t" "JLT,s m_one\n\t" "JGT,s p_one\n\t" "CMP $src1.lo,$src2.lo\n\t" "JB,s m_one\n\t" "JEQ,s done\n" "p_one:\tINC $dst\n\t" "JMP,s done\n" "m_one:\tDEC $dst\n" "done:" %} ins_encode %{ Label p_one, m_one, done; __ xorptr($dst$$Register, $dst$$Register); __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); __ jccb(Assembler::less, m_one); __ jccb(Assembler::greater, p_one); __ cmpl($src1$$Register, $src2$$Register); __ jccb(Assembler::below, m_one); __ jccb(Assembler::equal, done); __ bind(p_one); __ incrementl($dst$$Register); __ jmpb(done); __ bind(m_one); __ decrementl($dst$$Register); __ bind(done); %} ins_pipe( pipe_slow );%}//======// Manifest a CmpL result in the normal flags. Only good for LT or GE// compares. Can be used for LE or GT compares by reversing arguments.// NOT GOOD FOR EQ/NE tests.instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ match( Set flags (CmpL src zero )); ins_cost(100); format %{ "TEST $src.hi,$src.hi" %} opcode(0x85); ins_encode( OpcP, RegReg_Hi2( src, src ) ); ins_pipe( ialu_cr_reg_reg );%}// Manifest a CmpL result in the normal flags. Only good for LT or GE// compares. Can be used for LE or GT compares by reversing arguments.// NOT GOOD FOR EQ/NE tests.instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ match( Set flags (CmpL src1 src2 )); effect( TEMP tmp ); ins_cost(300); format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" "MOV $tmp,$src1.hi\n\t" "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} ins_encode( long_cmp_flags2( src1, src2, tmp ) ); ins_pipe( ialu_cr_reg_reg );%}// Long compares reg < zero/req OR reg >= zero/req.// Just a wrapper for a normal branch, plus the predicate test.instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ match(If cmp flags); effect(USE labl); predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); expand %{ jmpCon(cmp,flags,labl); // JLT or JGE... %}%}// Compare 2 longs and CMOVE longs.instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); ins_cost(400); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); ins_pipe( pipe_cmov_reg_long );%}instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); ins_cost(500); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); ins_pipe( pipe_cmov_reg_long );%}// Compare 2 longs and CMOVE ints.instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg );%}instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); ins_pipe( pipe_cmov_mem );%}// Compare 2 longs and CMOVE ints.instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg );%}// Compare 2 longs and CMOVE doublesinstruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovDPR_regS(cmp,flags,dst,src); %}%}// Compare 2 longs and CMOVE doublesinstruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovD_regS(cmp,flags,dst,src); %}%}instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovFPR_regS(cmp,flags,dst,src); %}%}instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovF_regS(cmp,flags,dst,src); %}%}//======// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ match( Set flags (CmpL src zero )); effect(TEMP tmp); ins_cost(200); format %{ "MOV $tmp,$src.lo\n\t" "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} ins_encode( long_cmp_flags0( src, tmp ) ); ins_pipe( ialu_reg_reg_long );%}// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ match( Set flags (CmpL src1 src2 )); ins_cost(200+300); format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" "JNE,s skip\n\t" "CMP $src1.hi,$src2.hi\n\t" "skip:\t" %} ins_encode( long_cmp_flags1( src1, src2 ) ); ins_pipe( ialu_cr_reg_reg );%}// Long compare reg == zero/reg OR reg != zero/reg// Just a wrapper for a normal branch, plus the predicate test.instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ match(If cmp flags); effect(USE labl); predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); expand %{ jmpCon(cmp,flags,labl); // JEQ or JNE... %}%}// Compare 2 longs and CMOVE longs.instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); ins_cost(400); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); ins_pipe( pipe_cmov_reg_long );%}instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); ins_cost(500); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); ins_pipe( pipe_cmov_reg_long );%}// Compare 2 longs and CMOVE ints.instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg );%}instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); ins_pipe( pipe_cmov_mem );%}// Compare 2 longs and CMOVE ints.instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg );%}// Compare 2 longs and CMOVE doublesinstruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovDPR_regS(cmp,flags,dst,src); %}%}// Compare 2 longs and CMOVE doublesinstruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovD_regS(cmp,flags,dst,src); %}%}instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovFPR_regS(cmp,flags,dst,src); %}%}instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovF_regS(cmp,flags,dst,src); %}%}//======// Manifest a CmpL result in the normal flags. Only good for LE or GT compares.// Same as cmpL_reg_flags_LEGT except must negate srcinstruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ match( Set flags (CmpL src zero )); effect( TEMP tmp ); ins_cost(300); format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" "CMP $tmp,$src.lo\n\t" "SBB $tmp,$src.hi\n\t" %} ins_encode( long_cmp_flags3(src, tmp) ); ins_pipe( ialu_reg_reg_long );%}// Manifest a CmpL result in the normal flags. Only good for LE or GT compares.// Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands// requires a commuted test to get the same result.instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ match( Set flags (CmpL src1 src2 )); effect( TEMP tmp ); ins_cost(300); format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" "MOV $tmp,$src2.hi\n\t" "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} ins_encode( long_cmp_flags2( src2, src1, tmp ) ); ins_pipe( ialu_cr_reg_reg );%}// Long compares reg < zero/req OR reg >= zero/req.// Just a wrapper for a normal branch, plus the predicate testinstruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ match(If cmp flags); effect(USE labl); predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); ins_cost(300); expand %{ jmpCon(cmp,flags,labl); // JGT or JLE... %}%}// Compare 2 longs and CMOVE longs.instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); ins_cost(400); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); ins_pipe( pipe_cmov_reg_long );%}instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); ins_cost(500); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi+4" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); ins_pipe( pipe_cmov_reg_long );%}// Compare 2 longs and CMOVE ints.instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg );%}instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); ins_pipe( pipe_cmov_mem );%}// Compare 2 longs and CMOVE ptrs.instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg );%}// Compare 2 longs and CMOVE doublesinstruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovDPR_regS(cmp,flags,dst,src); %}%}// Compare 2 longs and CMOVE doublesinstruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovD_regS(cmp,flags,dst,src); %}%}instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovFPR_regS(cmp,flags,dst,src); %}%}instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovF_regS(cmp,flags,dst,src); %}%}// ============================================================================// Procedure Call/Return Instructions// Call Java Static Instruction// Note: If this code changes, the corresponding ret_addr_offset() and// compute_padding() functions will have to be adjusted.instruct CallStaticJavaDirect(method meth) %{ match(CallStaticJava); predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke()); effect(USE meth); ins_cost(300); format %{ "CALL,static " %} opcode(0xE8); /* E8 cd */ ins_encode( pre_call_resets, Java_Static_Call( meth ), call_epilog, post_call_FPU ); ins_pipe( pipe_slow ); ins_alignment(4);%}// Call Java Static Instruction (method handle version)// Note: If this code changes, the corresponding ret_addr_offset() and// compute_padding() functions will have to be adjusted.instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{ match(CallStaticJava); predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); effect(USE meth); // EBP is saved by all callees (for interpreter stack correction). // We use it here for a similar purpose, in {preserve,restore}_SP. ins_cost(300); format %{ "CALL,static/MethodHandle " %} opcode(0xE8); /* E8 cd */ ins_encode( pre_call_resets, preserve_SP, Java_Static_Call( meth ), restore_SP, call_epilog, post_call_FPU ); ins_pipe( pipe_slow ); ins_alignment(4);%}// Call Java Dynamic Instruction// Note: If this code changes, the corresponding ret_addr_offset() and// compute_padding() functions will have to be adjusted.instruct CallDynamicJavaDirect(method meth) %{ match(CallDynamicJava); effect(USE meth); ins_cost(300); format %{ "MOV EAX,(oop)-1\n\t" "CALL,dynamic" %} opcode(0xE8); /* E8 cd */ ins_encode( pre_call_resets, Java_Dynamic_Call( meth ), call_epilog, post_call_FPU ); ins_pipe( pipe_slow ); ins_alignment(4);%}// Call Runtime Instructioninstruct CallRuntimeDirect(method meth) %{ match(CallRuntime ); effect(USE meth); ins_cost(300); format %{ "CALL,runtime " %} opcode(0xE8); /* E8 cd */ // Use FFREEs to clear entries in float stack ins_encode( pre_call_resets, FFree_Float_Stack_All, Java_To_Runtime( meth ), post_call_FPU ); ins_pipe( pipe_slow );%}// Call runtime without safepointinstruct CallLeafDirect(method meth) %{ match(CallLeaf); effect(USE meth); ins_cost(300); format %{ "CALL_LEAF,runtime " %} opcode(0xE8); /* E8 cd */ ins_encode( pre_call_resets, FFree_Float_Stack_All, Java_To_Runtime( meth ), Verify_FPU_For_Leaf, post_call_FPU ); ins_pipe( pipe_slow );%}instruct CallLeafNoFPDirect(method meth) %{ match(CallLeafNoFP); effect(USE meth); ins_cost(300); format %{ "CALL_LEAF_NOFP,runtime " %} opcode(0xE8); /* E8 cd */ ins_encode(Java_To_Runtime(meth)); ins_pipe( pipe_slow );%}// Return Instruction// Remove the return address & jump to it.instruct Ret() %{ match(Return); format %{ "RET" %} opcode(0xC3); ins_encode(OpcP); ins_pipe( pipe_jmp );%}// Tail Call; Jump from runtime stub to Java code.// Also known as an 'interprocedural jump'.// Target of jump will eventually return to caller.// TailJump below removes the return address.instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ match(TailCall jump_target method_oop ); ins_cost(300); format %{ "JMP $jump_target \t# EBX holds method oop" %} opcode(0xFF, 0x4); /* Opcode FF /4 */ ins_encode( OpcP, RegOpc(jump_target) ); ins_pipe( pipe_jmp );%}// Tail Jump; remove the return address; jump to target.// TailCall above leaves the return address around.instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ match( TailJump jump_target ex_oop ); ins_cost(300); format %{ "POP EDX\t# pop return address into dummy\n\t" "JMP $jump_target " %} opcode(0xFF, 0x4); /* Opcode FF /4 */ ins_encode( enc_pop_rdx, OpcP, RegOpc(jump_target) ); ins_pipe( pipe_jmp );%}// Create exception oop: created by stack-crawling runtime code.// Created exception is now available to this handler, and is setup// just prior to jumping to this handler. No code emitted.instruct CreateException( eAXRegP ex_oop )%{ match(Set ex_oop (CreateEx)); size(0); // use the following format syntax format %{ "# exception oop is in EAX; no code emitted" %} ins_encode(); ins_pipe( empty );%}// Rethrow exception:// The exception oop will come in the first argument position.// Then JUMP (not call) to the rethrow stub code.instruct RethrowException()%{ match(Rethrow); // use the following format syntax format %{ "JMP rethrow_stub" %} ins_encode(enc_rethrow); ins_pipe( pipe_jmp );%}// inlined locking and unlockinginstruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ match( Set cr (FastLock object box) ); effect( TEMP tmp, TEMP scr, USE_KILL box ); ins_cost(300); format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} ins_encode( Fast_Lock(object,box,tmp,scr) ); ins_pipe( pipe_slow );%}instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ match( Set cr (FastUnlock object box) ); effect( TEMP tmp, USE_KILL box ); ins_cost(300); format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} ins_encode( Fast_Unlock(object,box,tmp) ); ins_pipe( pipe_slow );%}// ============================================================================// Safepoint Instructioninstruct safePoint_poll(eFlagsReg cr) %{ match(SafePoint); effect(KILL cr); // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. // On SPARC that might be acceptable as we can generate the address with // just a sethi, saving an or. By polling at offset 0 we can end up // putting additional pressure on the index-0 in the D$. Because of // alignment (just like the situation at hand) the lower indices tend // to see more traffic. It'd be better to change the polling address // to offset 0 of the last $line in the polling page. format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} ins_cost(125); size(6) ; ins_encode( Safepoint_Poll() ); ins_pipe( ialu_reg_mem );%}// ============================================================================// This name is KNOWN by the ADLC and cannot be changed.// The ADLC forces a 'TypeRawPtr::BOTTOM' output type// for this guy.instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ match(Set dst (ThreadLocal)); effect(DEF dst, KILL cr); format %{ "MOV $dst, Thread::current()" %} ins_encode %{ Register dstReg = as_Register($dst$$reg); __ get_thread(dstReg); %} ins_pipe( ialu_reg_fat );%}//----------PEEPHOLE RULES-----------------------------------------------------// These must follow all instruction definitions as they use the names// defined in the instructions definitions.//// peepmatch ( root_instr_name [preceding_instruction]* );//// peepconstraint %{// (instruction_number.operand_name relational_op instruction_number.operand_name// [, ...] );// // instruction numbers are zero-based using left to right order in peepmatch//// peepreplace ( instr_name ( [instruction_number.operand_name]* ) );// // provide an instruction_number.operand_name for each operand that appears// // in the replacement instruction's match rule//// ---------VM FLAGS---------------------------------------------------------//// All peephole optimizations can be turned off using -XX:-OptoPeephole//// Each peephole rule is given an identifying number starting with zero and// increasing by one in the order seen by the parser. An individual peephole// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#// on the command-line.//// ---------CURRENT LIMITATIONS----------------------------------------------//// Only match adjacent instructions in same basic block// Only equality constraints// Only constraints between operands, not (0.dest_reg == EAX_enc)// Only one replacement instruction//// ---------EXAMPLE----------------------------------------------------------//// // pertinent parts of existing instructions in architecture description// instruct movI(rRegI dst, rRegI src) %{// match(Set dst (CopyI src));// %}//// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{// match(Set dst (AddI dst src));// effect(KILL cr);// %}//// // Change (inc mov) to lea// peephole %{// // increment preceeded by register-register move// peepmatch ( incI_eReg movI );// // require that the destination register of the increment// // match the destination register of the move// peepconstraint ( 0.dst == 1.dst );// // construct a replacement instruction that sets// // the destination to ( move's source register + one )// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );// %}//// Implementation no longer uses movX instructions since// machine-independent system no longer uses CopyX nodes.//// peephole %{// peepmatch ( incI_eReg movI );// peepconstraint ( 0.dst == 1.dst );// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );// %}//// peephole %{// peepmatch ( decI_eReg movI );// peepconstraint ( 0.dst == 1.dst );// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );// %}//// peephole %{// peepmatch ( addI_eReg_imm movI );// peepconstraint ( 0.dst == 1.dst );// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );// %}//// peephole %{// peepmatch ( addP_eReg_imm movP );// peepconstraint ( 0.dst == 1.dst );// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );// %}// // Change load of spilled value to only a spill// instruct storeI(memory mem, rRegI src) %{// match(Set mem (StoreI mem src));// %}//// instruct loadI(rRegI dst, memory mem) %{// match(Set dst (LoadI mem));// %}//peephole %{ peepmatch ( loadI storeI ); peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); peepreplace ( storeI( 1.mem 1.mem 1.src ) );%}//----------SMARTSPILL RULES---------------------------------------------------// These must follow all instruction definitions as they use the names// defined in the instructions definitions.