# HG changeset patch # User neliasso # Date 1448357423 -3600 # Node ID d25c2fc1e248321a0df15cb7e6af241e943f5bf2 # Parent 6efbc7ffd767bfa24a1154c983269e92b53d69d5# Parent 2dafc56da2530ac0915bd5a7ace0a8e7e3cace6c Merge diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/aarch64/vm/aarch64.ad --- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Tue Nov 24 10:30:23 2015 +0100 @@ -1079,10 +1079,10 @@ // and for a volatile write we need // // stlr - // + // // Alternatively, we can implement them by pairing a normal // load/store with a memory barrier. For a volatile read we need - // + // // ldr // dmb ishld // @@ -1240,7 +1240,7 @@ // Alternatively, we can elide generation of the dmb instructions // and plant the alternative CompareAndSwap macro-instruction // sequence (which uses ldaxr). - // + // // Of course, the above only applies when we see these signature // configurations. We still want to plant dmb instructions in any // other cases where we may see a MemBarAcquire, MemBarRelease or @@ -1367,7 +1367,7 @@ opcode = parent->Opcode(); return opcode == Op_MemBarRelease; } - + // 2) card mark detection helper // helper predicate which can be used to detect a volatile membar @@ -1383,7 +1383,7 @@ // true // // iii) the node's Mem projection feeds a StoreCM node. - + bool is_card_mark_membar(const MemBarNode *barrier) { if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) { @@ -1402,7 +1402,7 @@ return true; } } - + return false; } @@ -1430,7 +1430,7 @@ // where // || and \\ represent Ctl and Mem feeds via Proj nodes // | \ and / indicate further routing of the Ctl and Mem feeds - // + // // this is the graph we see for non-object stores. however, for a // volatile Object store (StoreN/P) we may see other nodes below the // leading membar because of the need for a GC pre- or post-write @@ -1592,7 +1592,7 @@ // ordering but neither will a releasing store (stlr). The latter // guarantees that the object put is visible but does not guarantee // that writes by other threads have also been observed. - // + // // So, returning to the task of translating the object put and the // leading/trailing membar nodes: what do the non-normal node graph // look like for these 2 special cases? and how can we determine the @@ -1731,7 +1731,7 @@ // | | | | // C | M | M | M | // \ | | / - // . . . + // . . . // (post write subtree elided) // . . . // C \ M / @@ -1812,12 +1812,12 @@ // | | | / / // | Region . . . Phi[M] _____/ // | / | / - // | | / + // | | / // | . . . . . . | / // | / | / // Region | | Phi[M] // | | | / Bot - // \ MergeMem + // \ MergeMem // \ / // MemBarVolatile // @@ -1858,7 +1858,7 @@ // to a trailing barrier via a MergeMem. That feed is either direct // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier // memory flow (for G1). - // + // // The predicates controlling generation of instructions for store // and barrier nodes employ a few simple helper functions (described // below) which identify the presence or absence of all these @@ -2112,8 +2112,8 @@ x = x->in(MemNode::Memory); } else { // the merge should get its Bottom mem feed from the leading membar - x = mm->in(Compile::AliasIdxBot); - } + x = mm->in(Compile::AliasIdxBot); + } // ensure this is a non control projection if (!x->is_Proj() || x->is_CFG()) { @@ -2190,12 +2190,12 @@ // . . . // | // MemBarVolatile (card mark) - // | | + // | | // | StoreCM // | | // | . . . - // Bot | / - // MergeMem + // Bot | / + // MergeMem // | // | // MemBarVolatile {trailing} @@ -2203,10 +2203,10 @@ // 2) // MemBarRelease/CPUOrder (leading) // | - // | + // | // |\ . . . - // | \ | - // | \ MemBarVolatile (card mark) + // | \ | + // | \ MemBarVolatile (card mark) // | \ | | // \ \ | StoreCM . . . // \ \ | @@ -2231,7 +2231,7 @@ // | \ \ | StoreCM . . . // | \ \ | // \ \ Phi - // \ \ / + // \ \ / // \ Phi // \ / // Phi . . . @@ -2506,7 +2506,7 @@ return (x->is_Load() && x->as_Load()->is_acquire()); } - + // now check for an unsafe volatile get // need to check for @@ -2644,7 +2644,7 @@ } membar = child_membar(membar); - + if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) { return false; } @@ -2703,7 +2703,7 @@ // first we check if this is part of a card mark. if so then we have // to generate a StoreLoad barrier - + if (is_card_mark_membar(mbvol)) { return false; } @@ -2769,7 +2769,7 @@ if (!is_card_mark_membar(mbvol)) { return true; } - + // we found a card mark -- just make sure we have a trailing barrier return (card_mark_to_trailing(mbvol) != NULL); @@ -2808,7 +2808,7 @@ assert(barrier->Opcode() == Op_MemBarCPUOrder, "CAS not fed by cpuorder membar!"); - + MemBarNode *b = parent_membar(barrier); assert ((b != NULL && b->Opcode() == Op_MemBarRelease), "CAS not fed by cpuorder+release membar pair!"); @@ -3463,6 +3463,17 @@ return true; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -4663,7 +4674,7 @@ call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf); } if (call == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } @@ -4671,7 +4682,7 @@ // Emit stub for static call address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); if (stub == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } } @@ -4681,7 +4692,7 @@ MacroAssembler _masm(&cbuf); address call = __ ic_call((address)$meth$$method); if (call == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } %} @@ -4706,7 +4717,7 @@ if (cb) { address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); if (call == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } } else { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -41,7 +41,9 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); - ce->store_parameter(_method->as_register(), 1); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + __ mov_metadata(rscratch1, m); + ce->store_parameter(rscratch1, 1); ce->store_parameter(_bci, 0); __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id))); ce->add_call_info_here(_info); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -70,6 +70,7 @@ LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::r0_opr; } LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp --- a/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -73,6 +73,7 @@ define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -29,16 +29,16 @@ #include "runtime/sharedRuntime.hpp" #include "vmreg_aarch64.inline.hpp" -jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) { +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) { Unimplemented(); return 0; } -void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) { +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) { Unimplemented(); } -void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) { +void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) { Unimplemented(); } @@ -46,20 +46,20 @@ Unimplemented(); } -void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) { +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) { Unimplemented(); } -void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) { +void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) { Unimplemented(); } -void CodeInstaller::pd_relocate_poll(address pc, jint mark) { +void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) { Unimplemented(); } // convert JVMCI register indices (as used in oop maps) to HotSpot registers -VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) { +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) { return NULL; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -91,20 +91,18 @@ unsigned offset_lo = dest & 0xfff; offset = adr_page - pc_page; - // We handle 3 types of PC relative addressing + // We handle 4 types of PC relative addressing // 1 - adrp Rx, target_page // ldr/str Ry, [Rx, #offset_in_page] // 2 - adrp Rx, target_page // add Ry, Rx, #offset_in_page // 3 - adrp Rx, target_page (page aligned reloc, offset == 0) - // In the first 2 cases we must check that Rx is the same in the adrp and the - // subsequent ldr/str or add instruction. Otherwise we could accidentally end - // up treating a type 3 relocation as a type 1 or 2 just because it happened - // to be followed by a random unrelated ldr/str or add instruction. - // - // In the case of a type 3 relocation, we know that these are only generated - // for the safepoint polling page, or for the card type byte map base so we - // assert as much and of course that the offset is 0. + // movk Rx, #imm16<<32 + // 4 - adrp Rx, target_page (page aligned reloc, offset == 0) + // In the first 3 cases we must check that Rx is the same in the adrp and the + // subsequent ldr/str, add or movk instruction. Otherwise we could accidentally end + // up treating a type 4 relocation as a type 1, 2 or 3 just because it happened + // to be followed by a random unrelated ldr/str, add or movk instruction. // unsigned insn2 = ((unsigned*)branch)[1]; if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 && @@ -123,13 +121,13 @@ Instruction_aarch64::patch(branch + sizeof (unsigned), 21, 10, offset_lo); instructions = 2; - } else { - assert((jbyte *)target == - ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base || - target == StubRoutines::crc_table_addr() || - (address)target == os::get_polling_page(), - "adrp must be polling page or byte map base"); - assert(offset_lo == 0, "offset must be 0 for polling page or byte map base"); + } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 && + Instruction_aarch64::extract(insn, 4, 0) == + Instruction_aarch64::extract(insn2, 4, 0)) { + // movk #imm16<<32 + Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32); + offset &= (1<<20)-1; + instructions = 2; } } int offset_lo = offset & 3; @@ -212,16 +210,16 @@ // Return the target address for the following sequences // 1 - adrp Rx, target_page // ldr/str Ry, [Rx, #offset_in_page] - // 2 - adrp Rx, target_page ] + // 2 - adrp Rx, target_page // add Ry, Rx, #offset_in_page // 3 - adrp Rx, target_page (page aligned reloc, offset == 0) + // movk Rx, #imm12<<32 + // 4 - adrp Rx, target_page (page aligned reloc, offset == 0) // // In the first two cases we check that the register is the same and // return the target_page + the offset within the page. // Otherwise we assume it is a page aligned relocation and return - // the target page only. The only cases this is generated is for - // the safepoint polling page or for the card table byte map base so - // we assert as much. + // the target page only. // unsigned insn2 = ((unsigned*)insn_addr)[1]; if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 && @@ -238,10 +236,12 @@ unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10); return address(target_page + byte_offset); } else { - assert((jbyte *)target_page == - ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base || - (address)target_page == os::get_polling_page(), - "adrp must be polling page or byte map base"); + if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 && + Instruction_aarch64::extract(insn, 4, 0) == + Instruction_aarch64::extract(insn2, 4, 0)) { + target_page = (target_page & 0xffffffff) | + ((uint64_t)Instruction_aarch64::extract(insn2, 20, 5) << 32); + } return (address)target_page; } } else { @@ -3964,22 +3964,26 @@ void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) { relocInfo::relocType rtype = dest.rspec().reloc()->type(); - if (uabs(pc() - dest.target()) >= (1LL << 32)) { - guarantee(rtype == relocInfo::none - || rtype == relocInfo::external_word_type - || rtype == relocInfo::poll_type - || rtype == relocInfo::poll_return_type, - "can only use a fixed address with an ADRP"); - // Out of range. This doesn't happen very often, but we have to - // handle it - mov(reg1, dest); - byte_offset = 0; + unsigned long low_page = (unsigned long)CodeCache::low_bound() >> 12; + unsigned long high_page = (unsigned long)(CodeCache::high_bound()-1) >> 12; + unsigned long dest_page = (unsigned long)dest.target() >> 12; + long offset_low = dest_page - low_page; + long offset_high = dest_page - high_page; + + InstructionMark im(this); + code_section()->relocate(inst_mark(), dest.rspec()); + // 8143067: Ensure that the adrp can reach the dest from anywhere within + // the code cache so that if it is relocated we know it will still reach + if (offset_high >= -(1<<20) && offset_low < (1<<20)) { + _adrp(reg1, dest.target()); } else { - InstructionMark im(this); - code_section()->relocate(inst_mark(), dest.rspec()); - byte_offset = (uint64_t)dest.target() & 0xfff; - _adrp(reg1, dest.target()); + unsigned long pc_page = (unsigned long)pc() >> 12; + long offset = dest_page - pc_page; + offset = (offset & ((1<<20)-1)) << 12; + _adrp(reg1, pc()+offset); + movk(reg1, ((unsigned long)dest.target() >> 32) & 0xffff, 32); } + byte_offset = (unsigned long)dest.target() & 0xfff; } void MacroAssembler::build_frame(int framesize) { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -2384,6 +2384,7 @@ } #endif // ASSERT __ mov(c_rarg0, rthread); + __ mov(c_rarg1, rcpool); __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); __ blrt(rscratch1, 1, 0, 1); __ bind(retaddr); @@ -2397,6 +2398,7 @@ // Load UnrollBlock* into rdi __ mov(r5, r0); + __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); Label noException; __ cmpw(rcpool, Deoptimization::Unpack_exception); // Was exception pending? __ br(Assembler::NE, noException); @@ -2609,6 +2611,7 @@ // n.b. 2 gp args, 0 fp args, integral return type __ mov(c_rarg0, rthread); + __ movw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap); __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); @@ -2628,6 +2631,16 @@ // move UnrollBlock* into r4 __ mov(r4, r0); +#ifdef ASSERT + { Label L; + __ ldrw(rscratch1, Address(r4, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); + __ cmpw(rscratch1, (unsigned)Deoptimization::Unpack_uncommon_trap); + __ br(Assembler::EQ, L); + __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); + __ bind(L); + } +#endif + // Pop all the frames we must move/replace. // // Frame picture (youngest to oldest) diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp --- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -61,6 +61,7 @@ define_pd_global(bool, UseCISCSpill, false); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); // GL: // Detected a problem with unscaled compressed oops and // narrow_oop_use_complex_address() == false. diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp --- a/hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -2697,7 +2697,7 @@ // Provide a debugger breakpoint in the frame manager if breakpoints // in osr'd methods are requested. #ifdef COMPILER2 - NOT_PRODUCT( if (OptoBreakpointOSR) { __ illtrap(); } ) + if (OptoBreakpointOSR) { __ illtrap(); } #endif // Load callee's pointer to locals array from callee's state. diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp --- a/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -297,8 +297,16 @@ __ bind(do_float); __ lfs(floatSlot, 0, arg_java); #if defined(LINUX) + // Linux uses ELF ABI. Both original ELF and ELFv2 ABIs have float + // in the least significant word of an argument slot. +#if defined(VM_LITTLE_ENDIAN) + __ stfs(floatSlot, 0, arg_c); +#else __ stfs(floatSlot, 4, arg_c); +#endif #elif defined(AIX) + // Although AIX runs on big endian CPU, float is in most significant + // word of an argument slot. __ stfs(floatSlot, 0, arg_c); #else #error "unknown OS" diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp --- a/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -29,16 +29,16 @@ #include "runtime/sharedRuntime.hpp" #include "vmreg_ppc.inline.hpp" -jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) { +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) { Unimplemented(); return 0; } -void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) { +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) { Unimplemented(); } -void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) { +void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) { Unimplemented(); } @@ -46,20 +46,20 @@ Unimplemented(); } -void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) { +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) { Unimplemented(); } -void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) { +void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) { Unimplemented(); } -void CodeInstaller::pd_relocate_poll(address pc, jint mark) { +void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) { Unimplemented(); } // convert JVMCI register indices (as used in oop maps) to HotSpot registers -VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) { +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) { return NULL; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/ppc/vm/ppc.ad --- a/hotspot/src/cpu/ppc/vm/ppc.ad Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/ppc/vm/ppc.ad Tue Nov 24 10:30:23 2015 +0100 @@ -2064,6 +2064,17 @@ return true; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -3416,7 +3427,7 @@ // The stub for call to interpreter. address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); if (stub == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } } @@ -3465,7 +3476,7 @@ // The stub for call to interpreter. address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); if (stub == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); + ciEnv::current()->record_failure("CodeCache is full"); return; } @@ -6911,7 +6922,7 @@ n_compare->_opnds[0] = op_crx; n_compare->_opnds[1] = op_src; n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR); - + decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode(); n2->add_req(n_region, n_src, n1); n2->_opnds[0] = op_dst; @@ -10588,7 +10599,7 @@ instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{ // Needs matchrule, see cmpDUnordered. - match(Set crx (CmpF src1 src2)); + match(Set crx (CmpF src1 src2)); // no match-rule, false predicate predicate(false); @@ -10697,13 +10708,13 @@ %} instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{ - // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the - // node right before the conditional move using it. + // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the + // node right before the conditional move using it. // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7, // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle // crashed in register allocation where the flags Reg between cmpDUnoredered and a // conditional move was supposed to be spilled. - match(Set crx (CmpD src1 src2)); + match(Set crx (CmpD src1 src2)); // False predicate, shall not be matched. predicate(false); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp --- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -753,6 +753,21 @@ // in farg_reg[j] if argument i is the j-th float argument of this call. // case T_FLOAT: +#if defined(LINUX) + // Linux uses ELF ABI. Both original ELF and ELFv2 ABIs have float + // in the least significant word of an argument slot. +#if defined(VM_LITTLE_ENDIAN) +#define FLOAT_WORD_OFFSET_IN_SLOT 0 +#else +#define FLOAT_WORD_OFFSET_IN_SLOT 1 +#endif +#elif defined(AIX) + // Although AIX runs on big endian CPU, float is in the most + // significant word of an argument slot. +#define FLOAT_WORD_OFFSET_IN_SLOT 0 +#else +#error "unknown OS" +#endif if (freg < Argument::n_float_register_parameters_c) { // Put float in register ... reg = farg_reg[freg]; @@ -766,14 +781,14 @@ if (arg >= Argument::n_regs_not_on_stack_c) { // ... and on the stack. guarantee(regs2 != NULL, "must pass float in register and stack slot"); - VMReg reg2 = VMRegImpl::stack2reg(stk LINUX_ONLY(+1)); + VMReg reg2 = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT); regs2[i].set1(reg2); stk += inc_stk_for_intfloat; } } else { // Put float on stack. - reg = VMRegImpl::stack2reg(stk LINUX_ONLY(+1)); + reg = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT); stk += inc_stk_for_intfloat; } regs[i].set1(reg); @@ -2802,7 +2817,7 @@ __ set_last_Java_frame(R1_SP, noreg); // With EscapeAnalysis turned on, this call may safepoint! - __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg); address calls_return_pc = __ last_calls_return_pc(); // Set an oopmap for the call site that describes all our saved registers. oop_maps->add_gc_map(calls_return_pc - start, map); @@ -2815,6 +2830,8 @@ // by save_volatile_registers(...). RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes); + // reload the exec mode from the UnrollBlock (it might have changed) + __ lwz(exec_mode_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg); // In excp_deopt_mode, restore and clear exception oop which we // stored in the thread during exception entry above. The exception // oop will be the return value of this stub. @@ -2945,8 +2962,9 @@ __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1); __ mr(klass_index_reg, R3); + __ li(R5_ARG3, Deoptimization::Unpack_uncommon_trap); __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), - R16_thread, klass_index_reg); + R16_thread, klass_index_reg, R5_ARG3); // Set an oopmap for the call site. oop_maps->add_gc_map(gc_map_pc - start, map); @@ -2966,6 +2984,12 @@ // stack: (caller_of_deoptee, ...). +#ifdef ASSERT + __ lwz(R22_tmp2, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg); + __ cmpdi(CCR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap); + __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0); +#endif + // Allocate new interpreter frame(s) and possibly a c2i adapter // frame. push_skeleton_frames(masm, false/*deopt*/, diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -94,8 +94,10 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); __ set(_bci, G4); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + __ set_metadata_constant(m, G5); __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type); - __ delayed()->mov_or_nop(_method->as_register(), G5); + __ delayed()->nop(); ce->add_call_info_here(_info); ce->verify_oop_map(_info); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -2812,7 +2812,23 @@ } void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { - fatal("CRC32 intrinsic is not implemented on this platform"); + assert(op->crc()->is_single_cpu(), "crc must be register"); + assert(op->val()->is_single_cpu(), "byte value must be register"); + assert(op->result_opr()->is_single_cpu(), "result must be register"); + Register crc = op->crc()->as_register(); + Register val = op->val()->as_register(); + Register table = op->result_opr()->as_register(); + Register res = op->result_opr()->as_register(); + + assert_different_registers(val, crc, table); + + __ set(ExternalAddress(StubRoutines::crc_table_addr()), table); + __ not1(crc); + __ clruwu(crc); + __ update_byte_crc32(crc, val, table); + __ not1(crc); + + __ mov(crc, res); } void LIR_Assembler::emit_lock(LIR_OpLock* op) { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -68,6 +68,7 @@ LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::Oexception_opr; } LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::Oissuing_pc_opr; } +LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } LIR_Opr LIRGenerator::syncTempOpr() { return new_register(T_OBJECT); } LIR_Opr LIRGenerator::getThreadTemp() { return rlock_callee_saved(NOT_LP64(T_INT) LP64_ONLY(T_LONG)); } @@ -785,7 +786,86 @@ } void LIRGenerator::do_update_CRC32(Intrinsic* x) { - fatal("CRC32 intrinsic is not implemented on this platform"); + // Make all state_for calls early since they can emit code + LIR_Opr result = rlock_result(x); + int flags = 0; + switch (x->id()) { + case vmIntrinsics::_updateCRC32: { + LIRItem crc(x->argument_at(0), this); + LIRItem val(x->argument_at(1), this); + // val is destroyed by update_crc32 + val.set_destroys_register(); + crc.load_item(); + val.load_item(); + __ update_crc32(crc.result(), val.result(), result); + break; + } + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: { + + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem len(x->argument_at(3), this); + + buf.load_item(); + off.load_nonconstant(); + + LIR_Opr index = off.result(); + int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + if(off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + offset += off.result()->as_jint(); + } + + LIR_Opr base_op = buf.result(); + + if (index->is_valid()) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index, tmp); + index = tmp; + if (index->is_constant()) { + offset += index->as_constant_ptr()->as_jint(); + index = LIR_OprFact::illegalOpr; + } else if (index->is_register()) { + LIR_Opr tmp2 = new_register(T_LONG); + LIR_Opr tmp3 = new_register(T_LONG); + __ move(base_op, tmp2); + __ move(index, tmp3); + __ add(tmp2, tmp3, tmp2); + base_op = tmp2; + } else { + ShouldNotReachHere(); + } + } + + LIR_Address* a = new LIR_Address(base_op, offset, T_BYTE); + + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for(x->type()); + + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + crc.load_item_force(cc->at(0)); + __ move(addr, cc->at(1)); + len.load_item_force(cc->at(2)); + + __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + break; + } + default: { + ShouldNotReachHere(); + } + } } // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp --- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -65,6 +65,7 @@ define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoScheduling, true); define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); #ifdef _LP64 // We need to make sure that all generated code is within diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp --- a/hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -43,8 +43,9 @@ void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); void generate_counter_overflow(Label& Lcontinue); + address generate_CRC32_update_entry(); + address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); + // Not supported - address generate_CRC32_update_entry() { return NULL; } - address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; } address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; } #endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -29,7 +29,7 @@ #include "runtime/sharedRuntime.hpp" #include "vmreg_sparc.inline.hpp" -jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) { +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) { if (inst->is_call() || inst->is_jump()) { return pc_offset + NativeCall::instruction_size; } else if (inst->is_call_reg()) { @@ -37,12 +37,12 @@ } else if (inst->is_sethi()) { return pc_offset + NativeFarCall::instruction_size; } else { - fatal("unsupported type of instruction for call site"); + JVMCI_ERROR_0("unsupported type of instruction for call site"); return 0; } } -void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) { +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) { address pc = _instructions->start() + pc_offset; Handle obj = HotSpotObjectConstantImpl::object(constant); jobject value = JNIHandles::make_local(obj()); @@ -52,7 +52,7 @@ RelocationHolder rspec = oop_Relocation::spec(oop_index); _instructions->relocate(pc, rspec, 1); #else - fatal("compressed oop on 32bit"); + JVMCI_ERROR("compressed oop on 32bit"); #endif } else { NativeMovConstReg* move = nativeMovConstReg_at(pc); @@ -66,20 +66,20 @@ } } -void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) { +void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) { address pc = _instructions->start() + pc_offset; if (HotSpotMetaspaceConstantImpl::compressed(constant)) { #ifdef _LP64 NativeMovConstReg32* move = nativeMovConstReg32_at(pc); - narrowKlass narrowOop = record_narrow_metadata_reference(constant); + narrowKlass narrowOop = record_narrow_metadata_reference(constant, CHECK); move->set_data((intptr_t)narrowOop); TRACE_jvmci_3("relocating (narrow metaspace constant) at %p/%p", pc, narrowOop); #else - fatal("compressed Klass* on 32bit"); + JVMCI_ERROR("compressed Klass* on 32bit"); #endif } else { NativeMovConstReg* move = nativeMovConstReg_at(pc); - Metadata* reference = record_metadata_reference(constant); + Metadata* reference = record_metadata_reference(constant, CHECK); move->set_data((intptr_t)reference); TRACE_jvmci_3("relocating (metaspace constant) at %p/%p", pc, reference); } @@ -106,7 +106,7 @@ } } -void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) { +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) { address pc = (address) inst; if (inst->is_call()) { NativeCall* call = nativeCall_at(pc); @@ -117,17 +117,17 @@ jump->set_jump_destination((address) foreign_call_destination); _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec()); } else { - fatal(err_msg("unknown call or jump instruction at " PTR_FORMAT, p2i(pc))); + JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)); } TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst)); } -void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) { +void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) { #ifdef ASSERT Method* method = NULL; // we need to check, this might also be an unresolved method if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) { - method = getMethodFromHotSpotMethod(hotspot_method); + method = getMethodFromHotSpotMethod(hotspot_method()); } #endif switch (_next_call_type) { @@ -156,33 +156,33 @@ break; } default: - fatal("invalid _next_call_type value"); + JVMCI_ERROR("invalid _next_call_type value"); break; } } -void CodeInstaller::pd_relocate_poll(address pc, jint mark) { +void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) { switch (mark) { case POLL_NEAR: - fatal("unimplemented"); + JVMCI_ERROR("unimplemented"); break; case POLL_FAR: _instructions->relocate(pc, relocInfo::poll_type); break; case POLL_RETURN_NEAR: - fatal("unimplemented"); + JVMCI_ERROR("unimplemented"); break; case POLL_RETURN_FAR: _instructions->relocate(pc, relocInfo::poll_return_type); break; default: - fatal("invalid mark value"); + JVMCI_ERROR("invalid mark value"); break; } } // convert JVMCI register indices (as used in oop maps) to HotSpot registers -VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) { +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) { // JVMCI Registers are numbered as follows: // 0..31: Thirty-two General Purpose registers (CPU Registers) // 32..63: Thirty-two single precision float registers @@ -199,7 +199,7 @@ } else if(jvmci_reg < 112) { floatRegisterNumber = 4 * (jvmci_reg - 96); } else { - fatal("Unknown jvmci register"); + JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg); } return as_FloatRegister(floatRegisterNumber)->as_VMReg(); } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -4771,3 +4771,243 @@ movdtox(src, tmp1); reverse_bytes_32(tmp1, dst, tmp2); } + +void MacroAssembler::fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset) { + xmulx(xcrc_hi, xK_hi, xtmp_lo); + xmulxhi(xcrc_hi, xK_hi, xtmp_hi); + xmulxhi(xcrc_lo, xK_lo, xcrc_hi); + xmulx(xcrc_lo, xK_lo, xcrc_lo); + xor3(xcrc_lo, xtmp_lo, xcrc_lo); + xor3(xcrc_hi, xtmp_hi, xcrc_hi); + ldxl(buf, G0, xtmp_lo); + inc(buf, 8); + ldxl(buf, G0, xtmp_hi); + inc(buf, 8); + xor3(xcrc_lo, xtmp_lo, xcrc_lo); + xor3(xcrc_hi, xtmp_hi, xcrc_hi); +} + +void MacroAssembler::fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register xbuf_hi, Register xbuf_lo) { + mov(xcrc_lo, xtmp_lo); + mov(xcrc_hi, xtmp_hi); + xmulx(xtmp_hi, xK_hi, xtmp_lo); + xmulxhi(xtmp_hi, xK_hi, xtmp_hi); + xmulxhi(xcrc_lo, xK_lo, xcrc_hi); + xmulx(xcrc_lo, xK_lo, xcrc_lo); + xor3(xcrc_lo, xbuf_lo, xcrc_lo); + xor3(xcrc_hi, xbuf_hi, xcrc_hi); + xor3(xcrc_lo, xtmp_lo, xcrc_lo); + xor3(xcrc_hi, xtmp_hi, xcrc_hi); +} + +void MacroAssembler::fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp) { + and3(xcrc, 0xFF, tmp); + sllx(tmp, 2, tmp); + lduw(table, tmp, xtmp); + srlx(xcrc, 8, xcrc); + xor3(xtmp, xcrc, xcrc); +} + +void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { + and3(crc, 0xFF, tmp); + srlx(crc, 8, crc); + sllx(tmp, 2, tmp); + lduw(table, tmp, tmp); + xor3(tmp, crc, crc); +} + +#define CRC32_TMP_REG_NUM 18 + +#define CRC32_CONST_64 0x163cd6124 +#define CRC32_CONST_96 0x0ccaa009e +#define CRC32_CONST_160 0x1751997d0 +#define CRC32_CONST_480 0x1c6e41596 +#define CRC32_CONST_544 0x154442bd4 + +void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table) { + + Label L_cleanup_loop, L_cleanup_check, L_align_loop, L_align_check; + Label L_main_loop_prologue; + Label L_fold_512b, L_fold_512b_loop, L_fold_128b; + Label L_fold_tail, L_fold_tail_loop; + Label L_8byte_fold_loop, L_8byte_fold_check; + + const Register tmp[CRC32_TMP_REG_NUM] = {L0, L1, L2, L3, L4, L5, L6, G1, I0, I1, I2, I3, I4, I5, I7, O4, O5, G3}; + + Register const_64 = tmp[CRC32_TMP_REG_NUM-1]; + Register const_96 = tmp[CRC32_TMP_REG_NUM-1]; + Register const_160 = tmp[CRC32_TMP_REG_NUM-2]; + Register const_480 = tmp[CRC32_TMP_REG_NUM-1]; + Register const_544 = tmp[CRC32_TMP_REG_NUM-2]; + + set(ExternalAddress(StubRoutines::crc_table_addr()), table); + + not1(crc); // ~c + clruwu(crc); // clear upper 32 bits of crc + + // Check if below cutoff, proceed directly to cleanup code + mov(31, G4); + cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check); + + // Align buffer to 8 byte boundry + mov(8, O5); + and3(buf, 0x7, O4); + sub(O5, O4, O5); + and3(O5, 0x7, O5); + sub(len, O5, len); + ba(L_align_check); + delayed()->nop(); + + // Alignment loop, table look up method for up to 7 bytes + bind(L_align_loop); + ldub(buf, 0, O4); + inc(buf); + dec(O5); + xor3(O4, crc, O4); + and3(O4, 0xFF, O4); + sllx(O4, 2, O4); + lduw(table, O4, O4); + srlx(crc, 8, crc); + xor3(O4, crc, crc); + bind(L_align_check); + nop(); + cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_align_loop); + + // Aligned on 64-bit (8-byte) boundry at this point + // Check if still above cutoff (31-bytes) + mov(31, G4); + cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check); + // At least 32 bytes left to process + + // Free up registers by storing them to FP registers + for (int i = 0; i < CRC32_TMP_REG_NUM; i++) { + movxtod(tmp[i], as_FloatRegister(2*i)); + } + + // Determine which loop to enter + // Shared prologue + ldxl(buf, G0, tmp[0]); + inc(buf, 8); + ldxl(buf, G0, tmp[1]); + inc(buf, 8); + xor3(tmp[0], crc, tmp[0]); // Fold CRC into first few bytes + and3(crc, 0, crc); // Clear out the crc register + // Main loop needs 128-bytes at least + mov(128, G4); + mov(64, tmp[2]); + cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_main_loop_prologue); + // Less than 64 bytes + nop(); + cmp_and_br_short(len, tmp[2], Assembler::lessUnsigned, Assembler::pt, L_fold_tail); + // Between 64 and 127 bytes + set64(CRC32_CONST_96, const_96, tmp[8]); + set64(CRC32_CONST_160, const_160, tmp[9]); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[2], tmp[3], buf, 0); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[4], tmp[5], buf, 16); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[6], tmp[7], buf, 32); + dec(len, 48); + ba(L_fold_tail); + delayed()->nop(); + + bind(L_main_loop_prologue); + for (int i = 2; i < 8; i++) { + ldxl(buf, G0, tmp[i]); + inc(buf, 8); + } + + // Fold total 512 bits of polynomial on each iteration, + // 128 bits per each of 4 parallel streams + set64(CRC32_CONST_480, const_480, tmp[8]); + set64(CRC32_CONST_544, const_544, tmp[9]); + + mov(128, G4); + bind(L_fold_512b_loop); + fold_128bit_crc32(tmp[1], tmp[0], const_480, const_544, tmp[9], tmp[8], buf, 0); + fold_128bit_crc32(tmp[3], tmp[2], const_480, const_544, tmp[11], tmp[10], buf, 16); + fold_128bit_crc32(tmp[5], tmp[4], const_480, const_544, tmp[13], tmp[12], buf, 32); + fold_128bit_crc32(tmp[7], tmp[6], const_480, const_544, tmp[15], tmp[14], buf, 64); + dec(len, 64); + cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_fold_512b_loop); + + // Fold 512 bits to 128 bits + bind(L_fold_512b); + set64(CRC32_CONST_96, const_96, tmp[8]); + set64(CRC32_CONST_160, const_160, tmp[9]); + + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[3], tmp[2]); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[5], tmp[4]); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[7], tmp[6]); + dec(len, 48); + + // Fold the rest of 128 bits data chunks + bind(L_fold_tail); + mov(32, G4); + cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_fold_128b); + + set64(CRC32_CONST_96, const_96, tmp[8]); + set64(CRC32_CONST_160, const_160, tmp[9]); + + bind(L_fold_tail_loop); + fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[2], tmp[3], buf, 0); + sub(len, 16, len); + cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_fold_tail_loop); + + // Fold the 128 bits in tmps 0 - 1 into tmp 1 + bind(L_fold_128b); + + set64(CRC32_CONST_64, const_64, tmp[4]); + + xmulx(const_64, tmp[0], tmp[2]); + xmulxhi(const_64, tmp[0], tmp[3]); + + srl(tmp[2], G0, tmp[4]); + xmulx(const_64, tmp[4], tmp[4]); + + srlx(tmp[2], 32, tmp[2]); + sllx(tmp[3], 32, tmp[3]); + or3(tmp[2], tmp[3], tmp[2]); + + xor3(tmp[4], tmp[1], tmp[4]); + xor3(tmp[4], tmp[2], tmp[1]); + dec(len, 8); + + // Use table lookup for the 8 bytes left in tmp[1] + dec(len, 8); + + // 8 8-bit folds to compute 32-bit CRC. + for (int j = 0; j < 4; j++) { + fold_8bit_crc32(tmp[1], table, tmp[2], tmp[3]); + } + srl(tmp[1], G0, crc); // move 32 bits to general register + for (int j = 0; j < 4; j++) { + fold_8bit_crc32(crc, table, tmp[3]); + } + + bind(L_8byte_fold_check); + + // Restore int registers saved in FP registers + for (int i = 0; i < CRC32_TMP_REG_NUM; i++) { + movdtox(as_FloatRegister(2*i), tmp[i]); + } + + ba(L_cleanup_check); + delayed()->nop(); + + // Table look-up method for the remaining few bytes + bind(L_cleanup_loop); + ldub(buf, 0, O4); + inc(buf); + dec(len); + xor3(O4, crc, O4); + and3(O4, 0xFF, O4); + sllx(O4, 2, O4); + lduw(table, O4, O4); + srlx(crc, 8, crc); + xor3(O4, crc, crc); + bind(L_cleanup_check); + nop(); + cmp_and_br_short(len, 0, Assembler::greaterUnsigned, Assembler::pt, L_cleanup_loop); + + not1(crc); +} + diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp --- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -904,7 +904,9 @@ inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0); // little-endian - inline void ldxl(Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); } + inline void lduwl(Register s1, Register s2, Register d) { lduwa(s1, s2, ASI_PRIMARY_LITTLE, d); } + inline void ldswl(Register s1, Register s2, Register d) { ldswa(s1, s2, ASI_PRIMARY_LITTLE, d);} + inline void ldxl( Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); } inline void ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { ldfa(w, s1, s2, ASI_PRIMARY_LITTLE, d); } // membar psuedo instruction. takes into account target memory model. @@ -1469,6 +1471,15 @@ void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2); void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2); + // CRC32 code for java.util.zip.CRC32::updateBytes0() instrinsic. + void kernel_crc32(Register crc, Register buf, Register len, Register table); + // Fold 128-bit data chunk + void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset); + void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register xbuf_hi, Register xbuf_lo); + // Fold 8-bit data + void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp); + void fold_8bit_crc32(Register crc, Register table, Register tmp); + #undef VIRTUAL }; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -3036,6 +3036,7 @@ __ mov((int32_t)Deoptimization::Unpack_reexecute, L0deopt_mode); __ mov(G2_thread, O0); + __ mov(L0deopt_mode, O2); __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)); __ delayed()->nop(); oop_maps->add_gc_map( __ offset()-start, map->deep_copy()); @@ -3121,6 +3122,7 @@ // do the call by hand so we can get the oopmap __ mov(G2_thread, L7_thread_cache); + __ mov(L0deopt_mode, O1); __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); __ delayed()->mov(G2_thread, O0); @@ -3146,6 +3148,7 @@ RegisterSaver::restore_result_registers(masm); + __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), G4deopt_mode); Label noException; __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException); @@ -3269,7 +3272,8 @@ __ save_frame(0); __ set_last_Java_frame(SP, noreg); __ mov(I0, O2klass_index); - __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index); + __ mov(Deoptimization::Unpack_uncommon_trap, O3); // exec mode + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index, O3); __ reset_last_Java_frame(); __ mov(O0, O2UnrollBlock->after_save()); __ restore(); @@ -3278,6 +3282,15 @@ __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); __ restore(); +#ifdef ASSERT + { Label L; + __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), O1); + __ cmp_and_br_short(O1, Deoptimization::Unpack_uncommon_trap, Assembler::equal, Assembler::pt, L); + __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); + __ bind(L); + } +#endif + // Allocate new interpreter frame(s) and possible c2i adapter frame make_new_frames(masm, false); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/sparc.ad --- a/hotspot/src/cpu/sparc/vm/sparc.ad Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/sparc.ad Tue Nov 24 10:30:23 2015 +0100 @@ -1860,6 +1860,17 @@ return true; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -1905,7 +1916,7 @@ } // Current (2013) SPARC platforms need to read original key -// to construct decryption expanded key +// to construct decryption expanded key const bool Matcher::pass_original_key_for_aes() { return true; } @@ -2612,7 +2623,7 @@ if (stub == NULL && !(TraceJumps && Compile::current()->in_scratch_emit_size())) { ciEnv::current()->record_failure("CodeCache is full"); return; - } + } } %} @@ -3132,10 +3143,10 @@ // AVOID_NONE - instruction can be placed anywhere // AVOID_BEFORE - instruction cannot be placed after an // instruction with MachNode::AVOID_AFTER -// AVOID_AFTER - the next instruction cannot be the one +// AVOID_AFTER - the next instruction cannot be the one // with MachNode::AVOID_BEFORE -// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at -// the same time +// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at +// the same time ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE); ins_attrib ins_short_branch(0); // Required flag: is this instruction a diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -5292,6 +5292,38 @@ return start; } +/** + * Arguments: + * + * Inputs: + * O0 - int crc + * O1 - byte* buf + * O2 - int len + * O3 - int* table + * + * Output: + * O0 - int crc result + */ + address generate_updateBytesCRC32() { + assert(UseCRC32Intrinsics, "need VIS3 instructions"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); + address start = __ pc(); + + const Register crc = O0; // crc + const Register buf = O1; // source java byte array address + const Register len = O2; // length + const Register table = O3; // crc_table address (reuse register) + + __ kernel_crc32(crc, buf, len, table); + + __ retl(); + __ delayed()->nop(); + + return start; + } + void generate_initial() { // Generates all stubs and initializes the entry points @@ -5324,6 +5356,12 @@ // Build this early so it's available for the interpreter. StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); + + if (UseCRC32Intrinsics) { + // set table address before stub generation which use it + StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table; + StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); + } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -52,3 +52,98 @@ address StubRoutines::Sparc::_flush_callers_register_windows_entry = CAST_FROM_FN_PTR(address, bootstrap_flush_windows); address StubRoutines::Sparc::_partial_subtype_check = NULL; + +uint64_t StubRoutines::Sparc::_crc_by128_masks[] = +{ + /* The fields in this structure are arranged so that they can be + * picked up two at a time with 128-bit loads. + * + * Because of flipped bit order for this CRC polynomials + * the constant for X**N is left-shifted by 1. This is because + * a 64 x 64 polynomial multiply produces a 127-bit result + * but the highest term is always aligned to bit 0 in the container. + * Pre-shifting by one fixes this, at the cost of potentially making + * the 32-bit constant no longer fit in a 32-bit container (thus the + * use of uint64_t, though this is also the size used by the carry- + * less multiply instruction. + * + * In addition, the flipped bit order and highest-term-at-least-bit + * multiply changes the constants used. The 96-bit result will be + * aligned to the high-term end of the target 128-bit container, + * not the low-term end; that is, instead of a 512-bit or 576-bit fold, + * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold. + * + * This cause additional problems in the 128-to-64-bit reduction; see the + * code for details. By storing a mask in the otherwise unused half of + * a 128-bit constant, bits can be cleared before multiplication without + * storing and reloading. Note that staying on a 128-bit datapath means + * that some data is uselessly stored and some unused data is intersected + * with an irrelevant constant. + */ + + ((uint64_t) 0xffffffffUL), /* low of K_M_64 */ + ((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64 */ + ((uint64_t) 0xba8ccbe8U << 1), /* low of K_160_96 */ + ((uint64_t) 0x6655004fU << 1), /* high of K_160_96 */ + ((uint64_t) 0xaa2215eaU << 1), /* low of K_544_480 */ + ((uint64_t) 0xe3720acbU << 1) /* high of K_544_480 */ +}; + +/** + * crc_table[] from jdk/src/java.base/share/native/libzip/zlib-1.2.8/crc32.h + */ +juint StubRoutines::Sparc::_crc_table[] = +{ + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +}; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp --- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -53,6 +53,9 @@ static address _flush_callers_register_windows_entry; static address _partial_subtype_check; + // masks and table for CRC32 + static uint64_t _crc_by128_masks[]; + static juint _crc_table[]; public: // test assembler stop routine by setting registers @@ -65,6 +68,8 @@ static intptr_t* (*flush_callers_register_windows_func())() { return CAST_TO_FN_PTR(intptr_t* (*)(void), _flush_callers_register_windows_entry); } static address partial_subtype_check() { return _partial_subtype_check; } + + static address crc_by128_masks_addr() { return (address)_crc_by128_masks; } }; #endif // CPU_SPARC_VM_STUBROUTINES_SPARC_HPP diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -803,6 +803,106 @@ return NULL; } +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address InterpreterGenerator::generate_CRC32_update_entry() { + + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + Label L_slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2); + __ set(SafepointSynchronize::_not_synchronized, O3); + __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path); + + // Load parameters + const Register crc = O0; // initial crc + const Register val = O1; // byte to update with + const Register table = O2; // address of 256-entry lookup table + + __ ldub(Gargs, 3, val); + __ lduw(Gargs, 8, crc); + + __ set(ExternalAddress(StubRoutines::crc_table_addr()), table); + + __ not1(crc); // ~crc + __ clruwu(crc); + __ update_byte_crc32(crc, val, table); + __ not1(crc); // ~crc + + // result in O0 + __ retl(); + __ delayed()->nop(); + + // generate a vanilla native entry as the slow path + __ bind(L_slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + Label L_slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2); + __ set(SafepointSynchronize::_not_synchronized, O3); + __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path); + + // Load parameters from the stack + const Register crc = O0; // initial crc + const Register buf = O1; // source java byte array address + const Register len = O2; // len + const Register offset = O3; // offset + + // Arguments are reversed on java expression stack + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ lduw(Gargs, 0, len); + __ lduw(Gargs, 8, offset); + __ ldx( Gargs, 16, buf); + __ lduw(Gargs, 32, crc); + __ add(buf, offset, buf); + } else { + __ lduw(Gargs, 0, len); + __ lduw(Gargs, 8, offset); + __ ldx( Gargs, 16, buf); + __ lduw(Gargs, 24, crc); + __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size + __ add(buf ,offset, buf); + } + + // Call the crc32 kernel + __ MacroAssembler::save_thread(L7_thread_cache); + __ kernel_crc32(crc, buf, len, O3); + __ MacroAssembler::restore_thread(L7_thread_cache); + + // result in O0 + __ retl(); + __ delayed()->nop(); + + // generate a vanilla native entry as the slow path + __ bind(L_slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + // // Interpreter stub for calling a native method. (asm interpreter) // This sets up a somewhat different looking stack for calling the native method diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -229,35 +229,35 @@ // SPARC T4 and above should have support for AES instructions if (has_aes()) { - if (UseVIS > 2) { // AES intrinsics use MOVxTOd/MOVdTOx which are VIS3 - if (FLAG_IS_DEFAULT(UseAES)) { - FLAG_SET_DEFAULT(UseAES, true); + if (FLAG_IS_DEFAULT(UseAES)) { + FLAG_SET_DEFAULT(UseAES, true); + } + if (!UseAES) { + if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); } - if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { - FLAG_SET_DEFAULT(UseAESIntrinsics, true); - } - // we disable both the AES flags if either of them is disabled on the command line - if (!UseAES || !UseAESIntrinsics) { - FLAG_SET_DEFAULT(UseAES, false); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } else { + // The AES intrinsic stubs require AES instruction support (of course) + // but also require VIS3 mode or higher for instructions it use. + if (UseVIS > 2) { + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + FLAG_SET_DEFAULT(UseAESIntrinsics, true); + } + } else { + if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("SPARC AES intrinsics require VIS3 instructions. Intrinsics will be disabled."); + } FLAG_SET_DEFAULT(UseAESIntrinsics, false); } - } else { - if (UseAES || UseAESIntrinsics) { - warning("SPARC AES intrinsics require VIS3 instruction support. Intrinsics will be disabled."); - if (UseAES) { - FLAG_SET_DEFAULT(UseAES, false); - } - if (UseAESIntrinsics) { - FLAG_SET_DEFAULT(UseAESIntrinsics, false); - } - } } } else if (UseAES || UseAESIntrinsics) { - warning("AES instructions are not available on this CPU"); - if (UseAES) { + if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { + warning("AES instructions are not available on this CPU"); FLAG_SET_DEFAULT(UseAES, false); } - if (UseAESIntrinsics) { + if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("AES intrinsics are not available on this CPU"); FLAG_SET_DEFAULT(UseAESIntrinsics, false); } } @@ -347,6 +347,15 @@ FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); } + if (UseVIS > 2) { + if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + FLAG_SET_DEFAULT(UseCRC32Intrinsics, true); + } + } else if (UseCRC32Intrinsics) { + warning("SPARC CRC32 intrinsics require VIS3 insructions support. Intriniscs will be disabled"); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + } + if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) ContendedPaddingWidth = cache_line_size; @@ -358,7 +367,6 @@ FLAG_SET_DEFAULT(UseUnalignedAccesses, false); } -#ifndef PRODUCT if (PrintMiscellaneous && Verbose) { tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size()); @@ -391,7 +399,6 @@ tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); } } -#endif // PRODUCT } void VM_Version::print_features() { @@ -400,7 +407,7 @@ int VM_Version::determine_features() { if (UseV8InstrsOnly) { - NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-V8");) + if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-V8"); } return generic_v8_m; } @@ -416,12 +423,12 @@ if (is_T_family(features)) { // Happy to accomodate... } else { - NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Niagara");) + if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-Niagara"); } features |= T_family_m; } } else { if (is_T_family(features) && !FLAG_IS_DEFAULT(UseNiagaraInstrs)) { - NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Not-Niagara");) + if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-Not-Niagara"); } features &= ~(T_family_m | T1_model_m); } else { // Happy to accomodate... diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/assembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -313,7 +313,7 @@ switch (cur_tuple_type) { case EVEX_FV: if ((cur_encoding & VEX_W) == VEX_W) { - mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2; } else { mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; } @@ -394,25 +394,27 @@ int mod_idx = 0; // We will test if the displacement fits the compressed format and if so // apply the compression to the displacment iff the result is8bit. - if (VM_Version::supports_evex() && _is_evex_instruction) { - switch (_tuple_type) { + if (VM_Version::supports_evex() && (_attributes != NULL) && _attributes->is_evex_instruction()) { + int evex_encoding = _attributes->get_evex_encoding(); + int tuple_type = _attributes->get_tuple_type(); + switch (tuple_type) { case EVEX_FV: - if ((_evex_encoding & VEX_W) == VEX_W) { - mod_idx += 2 + ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + if ((evex_encoding & VEX_W) == VEX_W) { + mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2; } else { - mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; } break; case EVEX_HV: - mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; + mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; break; case EVEX_FVM: break; case EVEX_T1S: - switch (_input_size_in_bits) { + switch (_attributes->get_input_size()) { case EVEX_8bit: break; @@ -433,7 +435,7 @@ case EVEX_T1F: case EVEX_T2: case EVEX_T4: - mod_idx = (_input_size_in_bits == EVEX_64bit) ? 1 : 0; + mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0; break; case EVEX_T8: @@ -459,8 +461,9 @@ break; } - if (_avx_vector_len >= AVX_128bit && _avx_vector_len <= AVX_512bit) { - int disp_factor = tuple_table[_tuple_type + mod_idx][_avx_vector_len]; + int vector_len = _attributes->get_vector_len(); + if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) { + int disp_factor = tuple_table[tuple_type + mod_idx][vector_len]; if ((disp % disp_factor) == 0) { int new_disp = disp / disp_factor; if (is8bit(new_disp)) { @@ -591,7 +594,6 @@ emit_data(disp, rspec, disp32_operand); } } - _is_evex_instruction = false; } void Assembler::emit_operand(XMMRegister reg, Register base, Register index, @@ -770,7 +772,7 @@ case 0x55: // andnps case 0x56: // orps case 0x57: // xorps - case 0x59: //mulpd + case 0x59: // mulpd case 0x6E: // movd case 0x7E: // movd case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush @@ -1234,51 +1236,53 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::addsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::addss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::addss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::aesdec(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDE); emit_operand(dst, src); } void Assembler::aesdec(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDE); emit_int8(0xC0 | encode); } @@ -1286,16 +1290,16 @@ void Assembler::aesdeclast(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDF); emit_operand(dst, src); } void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDF); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1303,16 +1307,16 @@ void Assembler::aesenc(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDC); emit_operand(dst, src); } void Assembler::aesenc(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDC); emit_int8(0xC0 | encode); } @@ -1320,16 +1324,16 @@ void Assembler::aesenclast(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDD); emit_operand(dst, src); } void Assembler::aesenclast(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xDD); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1361,15 +1365,17 @@ void Assembler::andnl(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF2); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andnl(Register dst, Register src1, Address src2) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_legacy(dst, src1, src2); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF2); emit_operand(dst, src2); } @@ -1396,45 +1402,51 @@ void Assembler::blsil(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsil(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_legacy(rbx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rbx, src); } void Assembler::blsmskl(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsmskl(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_legacy(rdx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rdx, src); } void Assembler::blsrl(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsrl(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_legacy(rcx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rcx, src); } @@ -1581,36 +1593,38 @@ // NOTE: dbx seems to decode this as comiss even though the // 0x66 is there. Strangly ucomisd comes out correct NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } else { - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);; + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x2F); + emit_operand(dst, src); } void Assembler::comisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } else { - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x2F); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::comiss(XMMRegister dst, Address src) { - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x2F); + emit_operand(dst, src); } void Assembler::comiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x2F); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cpuid() { @@ -1699,100 +1713,113 @@ void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE6); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5B); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5A); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsd2ss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1F; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5A); + emit_operand(dst, src); } void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VM_Version::supports_evex()); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); - } else { - emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x2A); + emit_operand(dst, src); } void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x2A); + emit_operand(dst, src); } void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5A); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtss2sd(XMMRegister dst, Address src) { - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5A); + emit_operand(dst, src); } void Assembler::cvttsd2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvttss2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1807,36 +1834,38 @@ void Assembler::divsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::divsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::divss(XMMRegister dst, Address src) { - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::divss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::emms() { @@ -2082,36 +2111,26 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_avx512novl()) { - int vector_len = AVX_512bit; - int dst_enc = dst->encoding(); - int src_enc = src->encoding(); - int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); - emit_int8(0x28); - emit_int8((unsigned char)(0xC0 | encode)); - } else if (VM_Version::supports_evex()) { - emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); - } + int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x28); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movaps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_avx512novl()) { - int vector_len = AVX_512bit; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, vector_len); - emit_int8(0x28); - emit_int8((unsigned char)(0xC0 | encode)); - } else { - emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE); - } + int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x28); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movlhps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8(0x16); emit_int8((unsigned char)(0xC0 | encode)); } @@ -2125,39 +2144,53 @@ } void Assembler::movddup(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse3(), "")); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false, VEX_OPCODE_0F, - /* rex_w */ VM_Version::supports_evex(), AVX_128bit, /* legacy_mode */ false); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x12); emit_int8(0xC0 | encode); - +} + +void Assembler::kmovwl(KRegister dst, Register src) { + NOT_LP64(assert(VM_Version::supports_evex(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0x92); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::kmovdl(KRegister dst, Register src) { + NOT_LP64(assert(VM_Version::supports_evex(), "")); + VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE; + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0x92); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::kmovql(KRegister dst, KRegister src) { NOT_LP64(assert(VM_Version::supports_evex(), "")); - int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, - /* no_mask_reg */ true, VEX_OPCODE_0F, /* rex_w */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0x90); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::kmovql(KRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_evex(), "")); - int dst_enc = dst->encoding(); - int nds_enc = 0; - vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE, - VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0x90); emit_operand((Register)dst, src); } void Assembler::kmovql(Address dst, KRegister src) { NOT_LP64(assert(VM_Version::supports_evex(), "")); - int src_enc = src->encoding(); - int nds_enc = 0; - vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE, - VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0x90); emit_operand((Register)src, dst); } @@ -2165,24 +2198,45 @@ void Assembler::kmovql(KRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_evex(), "")); VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE; - int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* legacy_mode */ !_legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_bw, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0x92); emit_int8((unsigned char)(0xC0 | encode)); } -void Assembler::kmovdl(KRegister dst, Register src) { +// This instruction produces ZF or CF flags +void Assembler::kortestbl(KRegister src1, KRegister src2) { + NOT_LP64(assert(VM_Version::supports_avx512dq(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0x98); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// This instruction produces ZF or CF flags +void Assembler::kortestwl(KRegister src1, KRegister src2) { NOT_LP64(assert(VM_Version::supports_evex(), "")); - VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE; - int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true); - emit_int8((unsigned char)0x92); - emit_int8((unsigned char)(0xC0 | encode)); -} - -void Assembler::kmovwl(KRegister dst, Register src) { - NOT_LP64(assert(VM_Version::supports_evex(), "")); - int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ true); - emit_int8((unsigned char)0x92); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0x98); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// This instruction produces ZF or CF flags +void Assembler::kortestdl(KRegister src1, KRegister src2) { + NOT_LP64(assert(VM_Version::supports_avx512bw(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0x98); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// This instruction produces ZF or CF flags +void Assembler::kortestql(KRegister src1, KRegister src2) { + NOT_LP64(assert(VM_Version::supports_avx512bw(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0x98); emit_int8((unsigned char)(0xC0 | encode)); } @@ -2205,190 +2259,231 @@ void Assembler::movdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x6E); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdl(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // swap src/dst to get correct prefix - int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, /* no_mask_reg */ true); + int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x7E); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x6E); emit_operand(dst, src); } void Assembler::movdl(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x7E); emit_operand(src, dst); } void Assembler::movdqa(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); + int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdqa(XMMRegister dst, Address src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_operand(dst, src); } void Assembler::movdqu(XMMRegister dst, Address src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_operand(dst, src); } void Assembler::movdqu(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdqu(Address dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } // Move Unaligned 256bit Vector void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; assert(UseAVX > 0, ""); - int vector_len = AVX_256bit; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmovdqu(XMMRegister dst, Address src) { - _instruction_uses_vl = true; assert(UseAVX > 0, ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - InstructionMark im(this); - int vector_len = AVX_256bit; - vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len); + InstructionMark im(this); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } void Assembler::vmovdqu(Address dst, XMMRegister src) { - _instruction_uses_vl = true; assert(UseAVX > 0, ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - InstructionMark im(this); - int vector_len = AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); // swap src<->dst for encoding assert(src != xnoreg, "sanity"); - vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64) +void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_operand(dst, src); +} + +void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(src != xnoreg, "sanity"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x7F); + emit_operand(src, dst); +} + +void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_operand(dst, src); +} + +void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(src != xnoreg, "sanity"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x7F); + emit_operand(src, dst); +} void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 0, ""); - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 0, ""); - InstructionMark im(this); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len); + assert(VM_Version::supports_evex(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 0, ""); - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - // swap src<->dst for encoding - vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 0, ""); - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 2, ""); - InstructionMark im(this); - _tuple_type = EVEX_FVM; - vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len); + assert(VM_Version::supports_evex(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 2, ""); - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); - _tuple_type = EVEX_FVM; - // swap src<->dst for encoding - vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } @@ -2434,13 +2529,12 @@ // The selection is done in MacroAssembler::movdbl() and movflt(). void Assembler::movlpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - emit_simd_arith_q(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } else { - emit_simd_arith(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x12); + emit_operand(dst, src); } void Assembler::movq( MMXRegister dst, Address src ) { @@ -2466,13 +2560,9 @@ void Assembler::movq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, /* no_mask_reg */ true); - } else { - simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7E); emit_operand(dst, src); } @@ -2480,14 +2570,9 @@ void Assembler::movq(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - simd_prefix(src, xnoreg, dst, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* rex_w */ true); - } else { - simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xD6); emit_operand(src, dst); } @@ -2510,60 +2595,56 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); - } else { - emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x10); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); - } else { - emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x10); + emit_operand(dst, src); } void Assembler::movsd(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2); - } else { - simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, /* no_mask_reg */ false); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x11); emit_operand(src, dst); } void Assembler::movss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x10); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x10); + emit_operand(dst, src); } void Assembler::movss(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x11); emit_operand(src, dst); } @@ -2655,36 +2736,38 @@ void Assembler::mulsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::mulsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::mulss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::mulss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::negl(Register dst) { @@ -2985,28 +3068,35 @@ void Assembler::packuswb(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x67); + emit_operand(dst, src); } void Assembler::packuswb(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x67); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "some form of AVX must be enabled"); - emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x67); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx2(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_3A, /* rex_w */ true, vector_len); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x00); emit_int8(0xC0 | encode); emit_int8(imm8); @@ -3020,8 +3110,8 @@ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); InstructionMark im(this); - simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_3A, - /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x61); emit_operand(dst, src); emit_int8(imm8); @@ -3029,46 +3119,162 @@ void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x61); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); } +// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst +void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x74); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst +void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x74); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, kdst is written the mask used to process the equal components +void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x74); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x75, dst, src, VEX_SIMD_66, - false, (VM_Version::supports_avx512dq() == false)); -} - + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x75); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - assert(UseAVX > 0, "some form of AVX must be enabled"); - emit_vex_arith(0x75, dst, nds, src, VEX_SIMD_66, vector_len, - false, (VM_Version::supports_avx512dq() == false)); + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x75); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, kdst is written the mask used to process the equal components +void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x75); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst +void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x76); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst +void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x76); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, kdst is written the mask used to process the equal components +void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x76); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst +void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse4_1(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x29); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst +void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x29); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, kdst is written the mask used to process the equal components +void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x29); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// In this context, kdst is written the mask used to process the equal components +void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int dst_enc = kdst->encoding(); + vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x29); + emit_operand(as_Register(dst_enc), src); } void Assembler::pmovmskb(Register dst, XMMRegister src) { assert(VM_Version::supports_sse2(), ""); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F, - false, AVX_128bit, (VM_Version::supports_avx512dq() == false)); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xD7); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmovmskb(Register dst, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); - int vector_len = AVX_256bit; - int encode = vex_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, - vector_len, VEX_OPCODE_0F, true, false); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xD7); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pextrd(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x16); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3076,8 +3282,8 @@ void Assembler::pextrq(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x16); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3085,8 +3291,8 @@ void Assembler::pextrw(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse2(), ""); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xC5); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3094,8 +3300,8 @@ void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x22); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3103,8 +3309,8 @@ void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x22); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3112,8 +3318,8 @@ void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse2(), ""); - int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xC4); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); @@ -3121,29 +3327,29 @@ void Assembler::pmovzxbw(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_HVM; - } - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x30); emit_operand(dst, src); } void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x30); emit_int8((unsigned char)(0xC0 | encode)); } -void Assembler::vpmovzxbw(XMMRegister dst, Address src) { +void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this); - bool vector256 = true; assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x30); emit_operand(dst, src); } @@ -3246,43 +3452,41 @@ void Assembler::pshufb(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_ssse3(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x00); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pshufb(XMMRegister dst, Address src) { assert(VM_Version::supports_ssse3(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x00); emit_operand(dst, src); } void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { - _instruction_uses_vl = true; assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x70); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(mode & 0xFF); } void Assembler::pshufd(XMMRegister dst, Address src, int mode) { - _instruction_uses_vl = true; assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x70); emit_operand(dst, src); emit_int8(mode & 0xFF); @@ -3291,7 +3495,10 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x70); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(mode & 0xFF); } @@ -3299,12 +3506,10 @@ assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - InstructionMark im(this); - simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x70); emit_operand(dst, src); emit_int8(mode & 0xFF); @@ -3313,9 +3518,9 @@ void Assembler::psrldq(XMMRegister dst, int shift) { // Shift left 128 bit value in dst XMMRegister by shift number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); // XMM3 is for /3 encoding: 66 0F 73 /3 ib - int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift); @@ -3324,9 +3529,9 @@ void Assembler::pslldq(XMMRegister dst, int shift) { // Shift left 128 bit value in dst XMMRegister by shift number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); // XMM7 is for /7 encoding: 66 0F 73 /7 ib - int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift); @@ -3336,16 +3541,16 @@ assert(VM_Version::supports_sse4_1(), ""); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); - simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x17); emit_operand(dst, src); } void Assembler::ptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x17); emit_int8((unsigned char)(0xC0 | encode)); } @@ -3353,20 +3558,18 @@ void Assembler::vptest(XMMRegister dst, Address src) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this); - int vector_len = AVX_256bit; + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); // swap src<->dst for encoding - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* rex_w */ false, - vector_len, /* legacy_mode */ true, /* no_mask_reg */ false); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x17); emit_operand(dst, src); } void Assembler::vptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x17); emit_int8((unsigned char)(0xC0 | encode)); } @@ -3374,42 +3577,47 @@ void Assembler::punpcklbw(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x60); + emit_operand(dst, src); } void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x60); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::punpckldq(XMMRegister dst, Address src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x62, dst, src, VEX_SIMD_66); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x62); + emit_operand(dst, src); } void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x62, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x62); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x6C, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x6C, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x6C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::push(int32_t imm32) { @@ -3454,16 +3662,18 @@ void Assembler::rcpps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int8(0x53); - emit_int8(0xC0 | encode); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::rcpss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x53); - emit_int8(0xC0 | encode); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::rdtsc() { @@ -3622,27 +3832,28 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::sqrtsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_operand(dst, src); } void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::std() { @@ -3651,11 +3862,12 @@ void Assembler::sqrtss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_operand(dst, src); } void Assembler::stmxcsr( Address dst) { @@ -3705,38 +3917,38 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::subsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - } - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2); - } else { - emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::subss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::subss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::testb(Register dst, int imm8) { @@ -3765,7 +3977,7 @@ emit_arith(0x85, 0xC0, dst, src); } -void Assembler::testl(Register dst, Address src) { +void Assembler::testl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); emit_int8((unsigned char)0x85); @@ -3792,36 +4004,38 @@ void Assembler::ucomisd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } else { - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x2E); + emit_operand(dst, src); } void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); - } else { - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x2E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::ucomiss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x2E); + emit_operand(dst, src); } void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x2E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::xabort(int8_t imm8) { @@ -3903,138 +4117,162 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } else { - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } //====================VECTOR ARITHMETIC===================================== @@ -4042,414 +4280,433 @@ // Float-point vector arithmetic void Assembler::addpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x58, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::addps(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); } void Assembler::subpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::subps(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); } void Assembler::mulpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x59, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::mulpd(XMMRegister dst, Address src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x59, dst, src, VEX_SIMD_66); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::mulps(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); } void Assembler::divpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::divps(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); } void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_operand(dst, src); } void Assembler::andpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_avx512dq()) { - emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andps(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_operand(dst, src); } void Assembler::andpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_avx512dq()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_operand(dst, src); } void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_avx512dq()) { - emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_avx512dq()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_operand(dst, src); } void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x54); + emit_operand(dst, src); } void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x15, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x15); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x14, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x14); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::xorpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_avx512dq()) { - emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::xorps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::xorpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_avx512dq()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_operand(dst, src); } void Assembler::xorps(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_operand(dst, src); } void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_avx512dq()) { - emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_avx512dq()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_operand(dst, src); } void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x57); + emit_operand(dst, src); } // Integer vector arithmetic void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx() && (vector_len == 0) || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x01); emit_int8((unsigned char)(0xC0 | encode)); } @@ -4457,280 +4714,324 @@ void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx() && (vector_len == 0) || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x02); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::paddb(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xFC, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFC); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::paddw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xFD, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFD); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::paddd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xFE, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFE); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::paddq(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD4); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::phaddw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse3(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x01); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::phaddd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse3(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x02); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFC); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFD); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFE); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD4); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFC); + emit_operand(dst, src); } void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFD); + emit_operand(dst, src); } void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFE); + emit_operand(dst, src); } void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD4); + emit_operand(dst, src); } void Assembler::psubb(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xF8, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF8); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psubw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xF9, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF9); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psubd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xFA, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFA); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psubq(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66); - } else { - emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF8); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF9); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFA); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF8); + emit_operand(dst, src); } void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF9); + emit_operand(dst, src); } void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFA); + emit_operand(dst, src); } void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); - } + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFB); + emit_operand(dst, src); } void Assembler::pmullw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xD5, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD5); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pmulld(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, - /* no_mask_reg */ false, VEX_OPCODE_0F_38); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x40); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD5); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x40); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 2, "requires some form of AVX"); - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x40); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FVM; - } - emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD5); + emit_operand(dst, src); } void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - InstructionMark im(this); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, - VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x40); emit_operand(dst, src); } void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_64bit; - } - InstructionMark im(this); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, - VEX_OPCODE_0F_38, /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq); + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x40); emit_operand(dst, src); } @@ -4738,29 +5039,29 @@ // Shift packed integers left by specified number of bits. void Assembler::psllw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 71 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, - /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x71); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::pslld(XMMRegister dst, int shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 72 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false); + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x72); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::psllq(XMMRegister dst, int shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 73 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ true); + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -4768,102 +5069,111 @@ void Assembler::psllw(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pslld(XMMRegister dst, XMMRegister shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF2); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psllq(XMMRegister dst, XMMRegister shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66); - } else { - emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 71 /6 ib - emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x71); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 72 /6 ib - emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len); + int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x72); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 73 /6 ib - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len); - } + int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x73); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF2); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); } // Shift packed integers logically right by specified number of bits. void Assembler::psrlw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 71 /2 ib - int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x71); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::psrld(XMMRegister dst, int shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 72 /2 ib - int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x72); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::psrlq(XMMRegister dst, int shift) { - _instruction_uses_vl = true; // Do not confuse it with psrldq SSE2 instruction which // shifts 128 bit value in xmm register by number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 73 /2 ib - int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F, /* rex_w */ VM_Version::supports_evex()); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -4871,89 +5181,98 @@ void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psrld(XMMRegister dst, XMMRegister shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD2); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66); - } else { - emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD3); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 71 /2 ib - emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x71); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 72 /2 ib - emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len); + int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x72); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 73 /2 ib - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len); - } + int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x73); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD2); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len); - } else { - emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len); - } + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD3); + emit_int8((unsigned char)(0xC0 | encode)); } // Shift packed integers arithmetically right by specified number of bits. void Assembler::psraw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM4 is for /4 encoding: 66 0F 71 /4 ib - int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x71); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::psrad(XMMRegister dst, int shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM4 is for /4 encoding: 66 0F 72 /4 ib - int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false); + int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x72); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); @@ -4961,128 +5280,157 @@ void Assembler::psraw(XMMRegister dst, XMMRegister shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::psrad(XMMRegister dst, XMMRegister shift) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE2); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); // XMM4 is for /4 encoding: 66 0F 71 /4 ib - emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x71); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); // XMM4 is for /4 encoding: 66 0F 71 /4 ib - emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len); + int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x72); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift & 0xFF); } void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE1); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE2); + emit_int8((unsigned char)(0xC0 | encode)); } // logical operations packed integers void Assembler::pand(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDB); + emit_operand(dst, src); } void Assembler::pandn(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66); - } - else { - emit_simd_arith(0xDF, dst, src, VEX_SIMD_66); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::por(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xEB, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEB); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEB); + emit_operand(dst, src); } void Assembler::pxor(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0xEF, dst, src, VEX_SIMD_66); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { - _instruction_uses_vl = true; assert(UseAVX > 0, "requires some form of AVX"); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_FV; - _input_size_in_bits = EVEX_32bit; - } - emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEF); + emit_operand(dst, src); } void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - vector_len = AVX_512bit; - } - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -5090,45 +5438,38 @@ emit_int8(0x01); } -void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) { +void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1A); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 256 bits - emit_int8(0x01); -} - -void Assembler::vinsertf64x4h(XMMRegister dst, Address src) { + emit_int8(value & 0x01); +} + +void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) { assert(VM_Version::supports_evex(), ""); - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_64bit; - InstructionMark im(this); - int vector_len = AVX_512bit; assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ true, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1A); emit_operand(dst, src); + // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 128 bits - emit_int8(0x01); + emit_int8(value & 0x01); } void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); + InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into q0 128 bits (0..127) @@ -5139,15 +5480,14 @@ } void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) { - assert(VM_Version::supports_evex(), ""); - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - InstructionMark im(this); - int vector_len = AVX_512bit; + assert(VM_Version::supports_avx(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_operand(dst, src); // 0x00 - insert into q0 128 bits (0..127) @@ -5159,17 +5499,13 @@ void Assembler::vinsertf128h(XMMRegister dst, Address src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - vector_len = AVX_512bit; - } - InstructionMark im(this); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x18); emit_operand(dst, src); // 0x01 - insert into upper 128 bits @@ -5178,11 +5514,9 @@ void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - vector_len = AVX_512bit; - } - int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -5192,16 +5526,12 @@ void Assembler::vextractf128h(Address dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - vector_len = AVX_512bit; - } - InstructionMark im(this); assert(src != xnoreg, "sanity"); - int src_enc = src->encoding(); - vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_operand(src, dst); // 0x01 - extract from upper 128 bits @@ -5210,11 +5540,10 @@ void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - vector_len = AVX_512bit; - } - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x38); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -5222,34 +5551,27 @@ emit_int8(0x01); } -void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) { +void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); int nds_enc = nds->is_valid() ? nds->encoding() : 0; - int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_reg_mask */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x38); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 256 bits // 0x01 - insert into upper 256 bits - emit_int8(0x01); + emit_int8(value & 0x01); } void Assembler::vinserti128h(XMMRegister dst, Address src) { assert(VM_Version::supports_avx2(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - vector_len = AVX_512bit; - } - InstructionMark im(this); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x38); emit_operand(dst, src); // 0x01 - insert into upper 128 bits @@ -5258,11 +5580,9 @@ void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - vector_len = AVX_512bit; - } - int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits @@ -5272,48 +5592,33 @@ void Assembler::vextracti128h(Address dst, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); - int vector_len = AVX_256bit; - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - vector_len = AVX_512bit; - } - InstructionMark im(this); assert(src != xnoreg, "sanity"); - int src_enc = src->encoding(); - vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_operand(src, dst); // 0x01 - extract from upper 128 bits emit_int8(0x01); } -void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) { +void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x3B); emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(0x01); + emit_int8(value & 0x01); } void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode; - if (VM_Version::supports_avx512dq()) { - encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); - } else { - encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ false, vector_len, /* legacy_mode */ true, /* no_mask_reg */ false); - } + InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_int8((unsigned char)(0xC0 | encode)); // 0x01 - extract from bits 255:128 @@ -5322,42 +5627,36 @@ emit_int8(value & 0x3); } -void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) { +void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1B); emit_int8((unsigned char)(0xC0 | encode)); + // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(0x01); -} - -void Assembler::vextractf64x4h(Address dst, XMMRegister src) { + emit_int8(value & 0x1); +} + +void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_64bit; - InstructionMark im(this); - int vector_len = AVX_512bit; assert(src != xnoreg, "sanity"); - int src_enc = src->encoding(); - vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ true, vector_len); + InstructionMark im(this); + InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1B); emit_operand(src, dst); + // 0x00 - extract from lower 256 bits // 0x01 - extract from upper 256 bits - emit_int8(0x01); + emit_int8(value & 0x01); } void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) { - assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + assert(VM_Version::supports_avx(), ""); + int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit; + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - extract from bits 127:0 @@ -5369,13 +5668,11 @@ void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - _tuple_type = EVEX_T4; - _input_size_in_bits = EVEX_32bit; - InstructionMark im(this); - int vector_len = AVX_512bit; assert(src != xnoreg, "sanity"); - int src_enc = src->encoding(); - vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); + InstructionMark im(this); + InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_operand(src, dst); // 0x00 - extract from bits 127:0 @@ -5387,11 +5684,8 @@ void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) { assert(VM_Version::supports_evex(), ""); - int vector_len = AVX_512bit; - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, - /* vex_w */ !_legacy_mode_dq, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); // 0x01 - extract from bits 255:128 @@ -5402,10 +5696,9 @@ // duplicate 4-bytes integer data from src into 8 locations in dest void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int vector_len = AVX_256bit; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); + assert(VM_Version::supports_avx2(), ""); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x58); emit_int8((unsigned char)(0xC0 | encode)); } @@ -5413,189 +5706,170 @@ // duplicate 2-bytes integer data from src into 16 locations in dest void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); - bool vector_len = AVX_256bit; - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, - vector_len, VEX_OPCODE_0F_38, false); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x79); emit_int8((unsigned char)(0xC0 | encode)); } // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x78); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_8bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x78); emit_operand(dst, src); } // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x79); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_16bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x79); emit_operand(dst, src); } // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x58); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x58); emit_operand(dst, src); } // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x59); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); // swap src<->dst for encoding - vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len); + vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x59); emit_operand(dst, src); } // duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x18); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) { - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); // swap src<->dst for encoding - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x18); emit_operand(dst, src); } // duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /*vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x19); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) { - _instruction_uses_vl = true; - assert(UseAVX > 1, ""); - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_64bit; - InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); - int dst_enc = dst->encoding(); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); // swap src<->dst for encoding - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x19); emit_operand(dst, src); } // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /*vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x7A); emit_int8((unsigned char)(0xC0 | encode)); } // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x7B); emit_int8((unsigned char)(0xC0 | encode)); } // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x7C); emit_int8((unsigned char)(0xC0 | encode)); } // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) { - _instruction_uses_vl = true; assert(VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, - /* vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x7C); emit_int8((unsigned char)(0xC0 | encode)); } @@ -5603,8 +5877,8 @@ // Carry-Less Multiplication Quadword void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) { assert(VM_Version::supports_clmul(), ""); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, - VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x44); emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)mask); @@ -5613,8 +5887,9 @@ // Carry-Less Multiplication Quadword void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) { assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), ""); - int vector_len = AVX_128bit; - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* legacy_mode */ true); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x44); emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)mask); @@ -5622,11 +5897,9 @@ void Assembler::vzeroupper() { assert(VM_Version::supports_avx(), ""); - if (UseAVX < 3) - { - (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); - emit_int8(0x77); - } + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x77); } @@ -6130,8 +6403,7 @@ if (pre > 0) { emit_int8(simd_pre[pre]); } - int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : - prefix_and_encode(dst_enc, src_enc); + int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc); if (opc > 0) { emit_int8(0x0F); int opc2 = simd_opc[opc]; @@ -6143,7 +6415,9 @@ } -void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) { +void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) { + int vector_len = _attributes->get_vector_len(); + bool vex_w = _attributes->is_rex_vex_w(); if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { prefix(VEX_3bytes); @@ -6167,13 +6441,13 @@ } // This is a 4 byte encoding -void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v, - int nds_enc, VexSimdPrefix pre, VexOpcode opc, - bool is_extended_context, bool is_merge_context, - int vector_len, bool no_mask_reg ){ +void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){ // EVEX 0x62 prefix prefix(EVEX_4bytes); - _evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0); + bool vex_w = _attributes->is_rex_vex_w(); + int evex_encoding = (vex_w ? VEX_W : 0); + // EVEX.b is not currently used for broadcast of single element or data rounding modes + _attributes->set_evex_encoding(evex_encoding); // P0: byte 2, initialized to RXBR`00mm // instead of not'd @@ -6195,214 +6469,127 @@ emit_int8(byte3); // P2: byte 4 as zL'Lbv'aaa - int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now) + int byte4 = (_attributes->is_no_reg_mask()) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now) // EVEX.v` for extending EVEX.vvvv or VIDX byte4 |= (evex_v ? 0: EVEX_V); // third EXEC.b for broadcast actions - byte4 |= (is_extended_context ? EVEX_Rb : 0); + byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0); // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024 - byte4 |= ((vector_len) & 0x3) << 5; + byte4 |= ((_attributes->get_vector_len())& 0x3) << 5; // last is EVEX.z for zero/merge actions - byte4 |= (is_merge_context ? EVEX_Z : 0); + byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0); emit_int8(byte4); } -void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, - VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) { +void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) { bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0; bool vex_b = adr.base_needs_rex(); bool vex_x = adr.index_needs_rex(); - _avx_vector_len = vector_len; + set_attributes(attributes); + attributes->set_current_assembler(this); // if vector length is turned off, revert to AVX for vectors smaller than 512-bit - if (_legacy_mode_vl && _instruction_uses_vl) { - switch (vector_len) { + if ((UseAVX > 2) && _legacy_mode_vl && attributes->uses_vl()) { + switch (attributes->get_vector_len()) { case AVX_128bit: case AVX_256bit: - legacy_mode = true; + attributes->set_is_legacy_mode(); break; } } - if ((UseAVX > 2) && (legacy_mode == false)) + if ((UseAVX > 2) && !attributes->is_legacy_mode()) { bool evex_r = (xreg_enc >= 16); bool evex_v = (nds_enc >= 16); - _is_evex_instruction = true; - evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg); + attributes->set_is_evex_instruction(); + evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc); } else { - vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len); + vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc); } - _instruction_uses_vl = false; -} - -int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, - bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) { +} + +int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) { bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0; bool vex_b = ((src_enc & 8) == 8) ? 1 : 0; bool vex_x = false; - _avx_vector_len = vector_len; + set_attributes(attributes); + attributes->set_current_assembler(this); // if vector length is turned off, revert to AVX for vectors smaller than 512-bit - if (_legacy_mode_vl && _instruction_uses_vl) { - switch (vector_len) { + if ((UseAVX > 2) && _legacy_mode_vl && attributes->uses_vl()) { + switch (attributes->get_vector_len()) { case AVX_128bit: case AVX_256bit: - legacy_mode = true; + if ((dst_enc >= 16) | (nds_enc >= 16) | (src_enc >= 16)) { + // up propagate arithmetic instructions to meet RA requirements + attributes->set_vector_len(AVX_512bit); + } else { + attributes->set_is_legacy_mode(); + } break; } } - if ((UseAVX > 2) && (legacy_mode == false)) + if ((UseAVX > 2) && !attributes->is_legacy_mode()) { bool evex_r = (dst_enc >= 16); bool evex_v = (nds_enc >= 16); // can use vex_x as bank extender on rm encoding vex_x = (src_enc >= 16); - evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg); + attributes->set_is_evex_instruction(); + evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc); } else { - vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len); + vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc); } - _instruction_uses_vl = false; - // return modrm byte components for operands return (((dst_enc & 7) << 3) | (src_enc & 7)); } void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, - bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) { + VexOpcode opc, InstructionAttr *attributes) { if (UseAVX > 0) { int xreg_enc = xreg->encoding(); - int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes); } else { assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); - rex_prefix(adr, xreg, pre, opc, rex_w); + rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w()); } } int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, - bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) { + VexOpcode opc, InstructionAttr *attributes) { int dst_enc = dst->encoding(); int src_enc = src->encoding(); if (UseAVX > 0) { int nds_enc = nds->is_valid() ? nds->encoding() : 0; - return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg); + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes); } else { assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); - return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); + return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w()); } } int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre, - bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) { - int dst_enc = dst->encoding(); - int src_enc = src->encoding(); + VexOpcode opc, InstructionAttr *attributes) { int nds_enc = nds->is_valid() ? nds->encoding() : 0; - return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg); + return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes); } int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre, - bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) { - int dst_enc = dst->encoding(); - int src_enc = src->encoding(); + VexOpcode opc, InstructionAttr *attributes) { int nds_enc = nds->is_valid() ? nds->encoding() : 0; - return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg); -} - -void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { - InstructionMark im(this); - simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) { - InstructionMark im(this); - simd_prefix_q(dst, dst, src, pre, no_mask_reg); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { - int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); -} - -void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { - int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); -} - -// Versions with no second source register (non-destructive source). -void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) { - InstructionMark im(this); - simd_prefix(dst, xnoreg, src, pre, opNoRegMask); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) { - InstructionMark im(this); - simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { - int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); -} - -void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { - int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); -} - -// 3-operands AVX instructions -void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src, - VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) { - InstructionMark im(this); - vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, - Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) { - InstructionMark im(this); - vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg); - emit_int8(opcode); - emit_operand(dst, src); -} - -void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, - VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) { - int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, legacy_mode, no_mask_reg); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); -} - -void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, - VexSimdPrefix pre, int vector_len, bool no_mask_reg) { - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int nds_enc = nds->is_valid() ? nds->encoding() : 0; - int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg); - emit_int8(opcode); - emit_int8((unsigned char)(0xC0 | encode)); + return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes); } void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) { assert(VM_Version::supports_avx(), ""); assert(!VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F, /* no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xC2); emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xF & cop)); @@ -6411,7 +6598,9 @@ void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { assert(VM_Version::supports_avx(), ""); assert(!VM_Version::supports_evex(), ""); - int encode = vex_prefix_and_encode(dst, nds, src1, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* no_mask_reg */ false); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8((unsigned char)0x4B); emit_int8((unsigned char)(0xC0 | encode)); int src2_enc = src2->encoding(); @@ -6430,7 +6619,7 @@ leal(dst, src); } -void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { +void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { InstructionMark im(this); emit_int8((unsigned char)0xC7); emit_operand(rax, dst); @@ -6948,15 +7137,17 @@ void Assembler::andnq(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF2); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andnq(Register dst, Register src1, Address src2) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q_legacy(dst, src1, src2); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF2); emit_operand(dst, src2); } @@ -6983,45 +7174,51 @@ void Assembler::blsiq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsiq(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q_legacy(rbx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rbx, src); } void Assembler::blsmskq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsmskq(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q_legacy(rdx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rdx, src); } void Assembler::blsrq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::blsrq(Register dst, Address src) { - InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q_legacy(rcx, dst, src); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF3); emit_operand(rcx, src); } @@ -7095,45 +7292,44 @@ void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - InstructionMark im(this); - simd_prefix_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_operand(dst, src); } void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - if (VM_Version::supports_evex()) { - _tuple_type = EVEX_T1S; - _input_size_in_bits = EVEX_32bit; - } - InstructionMark im(this); - simd_prefix_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2A); emit_operand(dst, src); } void Assembler::cvttsd2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvttss2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x2C); emit_int8((unsigned char)(0xC0 | encode)); } @@ -7316,7 +7512,8 @@ void Assembler::movdq(XMMRegister dst, Register src) { // table D-1 says MMX/SSE2 NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, /* no_mask_reg */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x6E); emit_int8((unsigned char)(0xC0 | encode)); } @@ -7324,8 +7521,9 @@ void Assembler::movdq(Register dst, XMMRegister src) { // table D-1 says MMX/SSE2 NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // swap src/dst to get correct prefix - int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, /* no_mask_reg */ true); + int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x7E); emit_int8((unsigned char)(0xC0 | encode)); } @@ -7458,8 +7656,8 @@ void Assembler::mulxq(Register dst1, Register dst2, Register src) { assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); - int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, - /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes); emit_int8((unsigned char)0xF6); emit_int8((unsigned char)(0xC0 | encode)); } @@ -7621,8 +7819,8 @@ void Assembler::rorxq(Register dst, Register src, int imm8) { assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, - /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes); emit_int8((unsigned char)0xF0); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/assembler_x86.hpp --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -438,6 +438,8 @@ }; +class InstructionAttr; + // 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes // See fxsave and xsave(EVEX enabled) documentation for layout const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize); @@ -568,7 +570,8 @@ EVEX_8bit = 0, EVEX_16bit = 1, EVEX_32bit = 2, - EVEX_64bit = 3 + EVEX_64bit = 3, + EVEX_NObit = 4 }; enum WhichOperand { @@ -598,16 +601,12 @@ private: - int _evex_encoding; - int _input_size_in_bits; - int _avx_vector_len; - int _tuple_type; - bool _is_evex_instruction; bool _legacy_mode_bw; bool _legacy_mode_dq; bool _legacy_mode_vl; bool _legacy_mode_vlbw; - bool _instruction_uses_vl; + + class InstructionAttr *_attributes; // 64bit prefixes int prefix_and_encode(int reg_enc, bool byteinst = false); @@ -637,181 +636,30 @@ int rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w); - void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, - int nds_enc, VexSimdPrefix pre, VexOpcode opc, - int vector_len); - - void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v, - int nds_enc, VexSimdPrefix pre, VexOpcode opc, - bool is_extended_context, bool is_merge_context, - int vector_len, bool no_mask_reg ); + void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc); + + void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, + int nds_enc, VexSimdPrefix pre, VexOpcode opc); void vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, - bool vex_w, int vector_len, - bool legacy_mode = false, bool no_mask_reg = false); - - void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, - VexSimdPrefix pre, int vector_len = AVX_128bit, - bool no_mask_reg = false, bool legacy_mode = false) { - int dst_enc = dst->encoding(); - int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg); - } - - void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src, - VexSimdPrefix pre, int vector_len = AVX_128bit, - bool no_mask_reg = false) { - int dst_enc = dst->encoding(); - int nds_enc = nds->is_valid() ? nds->encoding() : 0; - vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg); - } - - void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) { - bool vex_w = false; - int vector_len = AVX_128bit; - vex_prefix(src, nds->encoding(), dst->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, - vector_len, no_mask_reg); - } - - void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) { - bool vex_w = false; - int vector_len = AVX_128bit; - vex_prefix(src, nds->encoding(), dst->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, - vector_len, true, no_mask_reg); - } - - void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) { - bool vex_w = true; - int vector_len = AVX_128bit; - vex_prefix(src, nds->encoding(), dst->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, - vector_len, no_mask_reg); - } - - void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) { - bool vex_w = true; - int vector_len = AVX_128bit; - vex_prefix(src, nds->encoding(), dst->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, - vector_len, true, no_mask_reg); - } + InstructionAttr *attributes); int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, - bool vex_w, int vector_len, - bool legacy_mode, bool no_mask_reg); - - int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) { - bool vex_w = false; - int vector_len = AVX_128bit; - return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, - false, no_mask_reg); - } - - int vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) { - bool vex_w = false; - int vector_len = AVX_128bit; - return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, - true, no_mask_reg); - } - - int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) { - bool vex_w = true; - int vector_len = AVX_128bit; - return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, - false, no_mask_reg); - } - - int vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) { - bool vex_w = true; - int vector_len = AVX_128bit; - return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), - VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, - true, no_mask_reg); - } - - int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, - VexSimdPrefix pre, int vector_len = AVX_128bit, - VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false, - bool no_mask_reg = false) { - int src_enc = src->encoding(); - int dst_enc = dst->encoding(); - int nds_enc = nds->is_valid() ? nds->encoding() : 0; - return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg); - } - - void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, - VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F, - bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false); - - void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre, - bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) { - simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc); - } - - void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { - simd_prefix(src, dst, pre, no_mask_reg); - } - void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src, - VexSimdPrefix pre, bool no_mask_reg = false) { - bool rex_w = true; - simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w); - } - - int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, - VexSimdPrefix pre, bool no_mask_reg, - VexOpcode opc = VEX_OPCODE_0F, - bool rex_w = false, int vector_len = AVX_128bit, - bool legacy_mode = false); - - int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, - VexSimdPrefix pre, bool no_mask_reg, - VexOpcode opc = VEX_OPCODE_0F, - bool rex_w = false, int vector_len = AVX_128bit); - - int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, - VexSimdPrefix pre, bool no_mask_reg, - VexOpcode opc = VEX_OPCODE_0F, - bool rex_w = false, int vector_len = AVX_128bit); - - // Move/convert 32-bit integer value. - int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, - VexSimdPrefix pre, bool no_mask_reg) { - // It is OK to cast from Register to XMMRegister to pass argument here - // since only encoding is used in simd_prefix_and_encode() and number of - // Gen and Xmm registers are the same. - return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F); - } - int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) { - return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg); - } - int simd_prefix_and_encode(Register dst, XMMRegister src, - VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, - bool no_mask_reg = false) { - return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc); - } - - // Move/convert 64-bit integer value. - int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src, - VexSimdPrefix pre, bool no_mask_reg = false) { - bool rex_w = true; - return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w); - } - int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) { - return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg); - } - int simd_prefix_and_encode_q(Register dst, XMMRegister src, - VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, - bool no_mask_reg = false) { - bool rex_w = true; - return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w); - } + InstructionAttr *attributes); + + void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, + VexOpcode opc, InstructionAttr *attributes); + + int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, + VexOpcode opc, InstructionAttr *attributes); + + int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre, + VexOpcode opc, InstructionAttr *attributes); + + int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre, + VexOpcode opc, InstructionAttr *attributes); // Helper functions for groups of instructions void emit_arith_b(int op1, int op2, Register dst, int imm8); @@ -821,27 +669,6 @@ void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32); void emit_arith(int op1, int op2, Register dst, Register src); - void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false); - void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false); - void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false); - void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false); - void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false); - void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false); - void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false); - void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false); - void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, - Address src, VexSimdPrefix pre, int vector_len, - bool no_mask_reg = false, bool legacy_mode = false); - void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, - Address src, VexSimdPrefix pre, int vector_len, - bool no_mask_reg = false); - void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, - XMMRegister src, VexSimdPrefix pre, int vector_len, - bool no_mask_reg = false, bool legacy_mode = false); - void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, - XMMRegister src, VexSimdPrefix pre, int vector_len, - bool no_mask_reg = false); - bool emit_compressed_disp_byte(int &disp); void emit_operand(Register reg, @@ -986,18 +813,16 @@ // belong in macro assembler but there is no need for both varieties to exist void init_attributes(void) { - _evex_encoding = 0; - _input_size_in_bits = 0; - _avx_vector_len = AVX_NoVec; - _tuple_type = EVEX_ETUP; - _is_evex_instruction = false; _legacy_mode_bw = (VM_Version::supports_avx512bw() == false); _legacy_mode_dq = (VM_Version::supports_avx512dq() == false); _legacy_mode_vl = (VM_Version::supports_avx512vl() == false); _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false); - _instruction_uses_vl = false; + _attributes = NULL; } + void set_attributes(InstructionAttr *attributes) { _attributes = attributes; } + void clear_attributes(void) { _attributes = NULL; } + void lea(Register dst, Address src); void mov(Register dst, Register src); @@ -1506,13 +1331,18 @@ void movddup(XMMRegister dst, XMMRegister src); + void kmovwl(KRegister dst, Register src); + void kmovdl(KRegister dst, Register src); void kmovql(KRegister dst, KRegister src); void kmovql(KRegister dst, Register src); - void kmovdl(KRegister dst, Register src); - void kmovwl(KRegister dst, Register src); void kmovql(Address dst, KRegister src); void kmovql(KRegister dst, Address src); + void kortestbl(KRegister dst, KRegister src); + void kortestwl(KRegister dst, KRegister src); + void kortestdl(KRegister dst, KRegister src); + void kortestql(KRegister dst, KRegister src); + void movdl(XMMRegister dst, Register src); void movdl(Register dst, XMMRegister src); void movdl(XMMRegister dst, Address src); @@ -1537,6 +1367,12 @@ void vmovdqu(XMMRegister dst, XMMRegister src); // Move Unaligned 512bit Vector + void evmovdqub(Address dst, XMMRegister src, int vector_len); + void evmovdqub(XMMRegister dst, Address src, int vector_len); + void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len); + void evmovdquw(Address dst, XMMRegister src, int vector_len); + void evmovdquw(XMMRegister dst, Address src, int vector_len); + void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len); void evmovdqul(Address dst, XMMRegister src, int vector_len); void evmovdqul(XMMRegister dst, Address src, int vector_len); void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len); @@ -1682,8 +1518,22 @@ void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); void pcmpestri(XMMRegister xmm1, Address src, int imm8); + void pcmpeqb(XMMRegister dst, XMMRegister src); + void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); + void pcmpeqw(XMMRegister dst, XMMRegister src); void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); + + void pcmpeqd(XMMRegister dst, XMMRegister src); + void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); + + void pcmpeqq(XMMRegister dst, XMMRegister src); + void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); + void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len); void pmovmskb(Register dst, XMMRegister src); void vpmovmskb(Register dst, XMMRegister src); @@ -1704,7 +1554,7 @@ void pmovzxbw(XMMRegister dst, XMMRegister src); void pmovzxbw(XMMRegister dst, Address src); - void vpmovzxbw(XMMRegister dst, Address src); + void vpmovzxbw(XMMRegister dst, Address src, int vector_len); #ifndef _LP64 // no 32bit push/pop on amd64 void popl(Address dst); @@ -2106,12 +1956,12 @@ void vextracti128h(Address dst, XMMRegister src); // Copy low 256bit into high 256bit of ZMM registers. - void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vextracti64x4h(XMMRegister dst, XMMRegister src); - void vextractf64x4h(XMMRegister dst, XMMRegister src); - void vextractf64x4h(Address dst, XMMRegister src); - void vinsertf64x4h(XMMRegister dst, Address src); + void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); + void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value); + void vextracti64x4h(XMMRegister dst, XMMRegister src, int value); + void vextractf64x4h(XMMRegister dst, XMMRegister src, int value); + void vextractf64x4h(Address dst, XMMRegister src, int value); + void vinsertf64x4h(XMMRegister dst, Address src, int value); // Copy targeted 128bit segments of the ZMM registers void vextracti64x2h(XMMRegister dst, XMMRegister src, int value); @@ -2173,4 +2023,95 @@ }; +// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions. +// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction +// are applied. +class InstructionAttr { +public: + InstructionAttr( + int vector_len, + bool rex_vex_w, + bool legacy_mode, + bool no_reg_mask, + bool uses_vl) + : + _avx_vector_len(vector_len), + _rex_vex_w(rex_vex_w), + _legacy_mode(legacy_mode), + _no_reg_mask(no_reg_mask), + _uses_vl(uses_vl), + _tuple_type(Assembler::EVEX_ETUP), + _input_size_in_bits(Assembler::EVEX_NObit), + _is_evex_instruction(false), + _evex_encoding(0), + _is_clear_context(false), + _is_extended_context(false), + _current_assembler(NULL) { + if (UseAVX < 3) _legacy_mode = true; + } + + ~InstructionAttr() { + if (_current_assembler != NULL) { + _current_assembler->clear_attributes(); + } + _current_assembler = NULL; + } + +private: + int _avx_vector_len; + bool _rex_vex_w; + bool _legacy_mode; + bool _no_reg_mask; + bool _uses_vl; + int _tuple_type; + int _input_size_in_bits; + bool _is_evex_instruction; + int _evex_encoding; + bool _is_clear_context; + bool _is_extended_context; + + Assembler *_current_assembler; + +public: + // query functions for field accessors + int get_vector_len(void) const { return _avx_vector_len; } + bool is_rex_vex_w(void) const { return _rex_vex_w; } + bool is_legacy_mode(void) const { return _legacy_mode; } + bool is_no_reg_mask(void) const { return _no_reg_mask; } + bool uses_vl(void) const { return _uses_vl; } + int get_tuple_type(void) const { return _tuple_type; } + int get_input_size(void) const { return _input_size_in_bits; } + int is_evex_instruction(void) const { return _is_evex_instruction; } + int get_evex_encoding(void) const { return _evex_encoding; } + bool is_clear_context(void) const { return _is_clear_context; } + bool is_extended_context(void) const { return _is_extended_context; } + + // Set the vector len manually + void set_vector_len(int vector_len) { _avx_vector_len = vector_len; } + + // Set the instruction to be encoded in AVX mode + void set_is_legacy_mode(void) { _legacy_mode = true; } + + // Set the current instuction to be encoded as an EVEX instuction + void set_is_evex_instruction(void) { _is_evex_instruction = true; } + + // Internal encoding data used in compressed immediate offset programming + void set_evex_encoding(int value) { _evex_encoding = value; } + + // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components + void set_is_clear_context(void) { _is_clear_context = true; } + + // Map back to current asembler so that we can manage object level assocation + void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; } + + // Address modifiers used for compressed displacement calculation + void set_address_attributes(int tuple_type, int input_size_in_bits) { + if (VM_Version::supports_evex()) { + _tuple_type = tuple_type; + _input_size_in_bits = input_size_in_bits; + } + } + +}; + #endif // CPU_X86_VM_ASSEMBLER_X86_HPP diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp --- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -81,7 +81,8 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); - ce->store_parameter(_method->as_register(), 1); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + ce->store_parameter(m, 1); ce->store_parameter(_bci, 0); __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id))); ce->add_call_info_here(_info); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -2971,6 +2971,14 @@ } +void LIR_Assembler::store_parameter(Metadata* m, int offset_from_rsp_in_words) { + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ mov_metadata(Address(rsp, offset_from_rsp_in_bytes), m); +} + + // This code replaces a call to arraycopy; no exception may // be thrown in this code, they must be thrown in the System.arraycopy // activation frame; we could save some checks if this would not be the case @@ -3711,7 +3719,7 @@ if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) { __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg()); } - if (UseAVX > 1) { + if (UseAVX > 0) { __ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(), ExternalAddress((address)float_signflip_pool)); } else { @@ -3722,7 +3730,7 @@ if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) { __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg()); } - if (UseAVX > 1) { + if (UseAVX > 0) { __ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(), ExternalAddress((address)double_signflip_pool)); } else { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp --- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -49,9 +49,10 @@ Register recv, Label* update_done); public: - void store_parameter(Register r, int offset_from_esp_in_words); - void store_parameter(jint c, int offset_from_esp_in_words); - void store_parameter(jobject c, int offset_from_esp_in_words); + void store_parameter(Register r, int offset_from_esp_in_words); + void store_parameter(jint c, int offset_from_esp_in_words); + void store_parameter(jobject c, int offset_from_esp_in_words); + void store_parameter(Metadata* c, int offset_from_esp_in_words); enum { call_stub_size = NOT_LP64(15) LP64_ONLY(28), exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp --- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -80,6 +80,7 @@ LIR_Opr LIRGenerator::divOutOpr() { return FrameMap::rax_opr; } LIR_Opr LIRGenerator::remOutOpr() { return FrameMap::rdx_opr; } LIR_Opr LIRGenerator::shiftCountOpr() { return FrameMap::rcx_opr; } +LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::rax_opr; } LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/c2_globals_x86.hpp --- a/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -84,6 +84,7 @@ define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoRegScheduling, true); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/c2_init_x86.cpp --- a/hotspot/src/cpu/x86/vm/c2_init_x86.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/c2_init_x86.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -58,6 +58,4 @@ OptoReg::invalidate(i); } } - - SuperWordLoopUnrollAnalysis = true; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp --- a/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -36,7 +36,7 @@ #include "code/vmreg.hpp" #include "vmreg_x86.inline.hpp" -jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) { +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) { if (inst->is_call() || inst->is_jump()) { assert(NativeCall::instruction_size == (int)NativeJump::instruction_size, "unexpected size"); return (pc_offset + NativeCall::instruction_size); @@ -53,18 +53,17 @@ return (offset); } else if (inst->is_call_reg()) { // the inlined vtable stub contains a "call register" instruction - assert(method != NULL, "only valid for virtual calls"); + assert(method.not_null(), "only valid for virtual calls"); return (pc_offset + ((NativeCallReg *) inst)->next_instruction_offset()); } else if (inst->is_cond_jump()) { address pc = (address) (inst); return pc_offset + (jint) (Assembler::locate_next_instruction(pc) - pc); } else { - fatal("unsupported type of instruction for call site"); - return 0; + JVMCI_ERROR_0("unsupported type of instruction for call site"); } } -void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) { +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) { address pc = _instructions->start() + pc_offset; Handle obj = HotSpotObjectConstantImpl::object(constant); jobject value = JNIHandles::make_local(obj()); @@ -75,7 +74,7 @@ _instructions->relocate(pc, oop_Relocation::spec(oop_index), Assembler::narrow_oop_operand); TRACE_jvmci_3("relocating (narrow oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand)); #else - fatal("compressed oop on 32bit"); + JVMCI_ERROR("compressed oop on 32bit"); #endif } else { address operand = Assembler::locate_operand(pc, Assembler::imm_operand); @@ -85,19 +84,19 @@ } } -void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) { +void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) { address pc = _instructions->start() + pc_offset; if (HotSpotMetaspaceConstantImpl::compressed(constant)) { #ifdef _LP64 address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand); - *((narrowKlass*) operand) = record_narrow_metadata_reference(constant); + *((narrowKlass*) operand) = record_narrow_metadata_reference(constant, CHECK); TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand)); #else - fatal("compressed Klass* on 32bit"); + JVMCI_ERROR("compressed Klass* on 32bit"); #endif } else { address operand = Assembler::locate_operand(pc, Assembler::imm_operand); - *((Metadata**) operand) = record_metadata_reference(constant); + *((Metadata**) operand) = record_metadata_reference(constant, CHECK); TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand)); } } @@ -117,7 +116,7 @@ TRACE_jvmci_3("relocating at " PTR_FORMAT "/" PTR_FORMAT " with destination at " PTR_FORMAT " (%d)", p2i(pc), p2i(operand), p2i(dest), data_offset); } -void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) { +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) { address pc = (address) inst; if (inst->is_call()) { // NOTE: for call without a mov, the offset must fit a 32-bit immediate @@ -139,18 +138,18 @@ *(jint*) disp += ((address) foreign_call_destination) - old_dest; _instructions->relocate(pc, runtime_call_Relocation::spec(), Assembler::call32_operand); } else { - fatal("unsupported relocation for foreign call"); + JVMCI_ERROR("unsupported relocation for foreign call"); } TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst)); } -void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) { +void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) { #ifdef ASSERT Method* method = NULL; // we need to check, this might also be an unresolved method if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) { - method = getMethodFromHotSpotMethod(hotspot_method); + method = getMethodFromHotSpotMethod(hotspot_method()); } #endif switch (_next_call_type) { @@ -185,6 +184,7 @@ break; } default: + JVMCI_ERROR("invalid _next_call_type value"); break; } } @@ -198,7 +198,7 @@ } -void CodeInstaller::pd_relocate_poll(address pc, jint mark) { +void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) { switch (mark) { case POLL_NEAR: { relocate_poll_near(pc); @@ -222,13 +222,13 @@ _instructions->relocate(pc, relocInfo::poll_return_type, Assembler::imm_operand); break; default: - fatal("invalid mark value"); + JVMCI_ERROR("invalid mark value: %d", mark); break; } } // convert JVMCI register indices (as used in oop maps) to HotSpot registers -VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) { +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) { if (jvmci_reg < RegisterImpl::number_of_registers) { return as_Register(jvmci_reg)->as_VMReg(); } else { @@ -236,8 +236,7 @@ if (floatRegisterNumber < XMMRegisterImpl::number_of_registers) { return as_XMMRegister(floatRegisterNumber)->as_VMReg(); } - ShouldNotReachHere(); - return NULL; + JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg); } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -3651,12 +3651,71 @@ LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); } +void MacroAssembler::movdqu(Address dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) { + Assembler::vextractf32x4h(dst, src, 0); + } else { + Assembler::movdqu(dst, src); + } +} + +void MacroAssembler::movdqu(XMMRegister dst, Address src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) { + Assembler::vinsertf32x4h(dst, src, 0); + } else { + Assembler::movdqu(dst, src); + } +} + +void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { + Assembler::evmovdqul(dst, src, Assembler::AVX_512bit); + } else { + Assembler::movdqu(dst, src); + } +} + void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - Assembler::movdqu(dst, as_Address(src)); + movdqu(dst, as_Address(src)); } else { lea(rscratch1, src); - Assembler::movdqu(dst, Address(rscratch1, 0)); + movdqu(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vmovdqu(Address dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) { + Assembler::vextractf64x4h(dst, src, 0); + } else { + Assembler::vmovdqu(dst, src); + } +} + +void MacroAssembler::vmovdqu(XMMRegister dst, Address src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) { + Assembler::vinsertf64x4h(dst, src, 0); + } else { + Assembler::vmovdqu(dst, src); + } +} + +void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { + Assembler::evmovdqul(dst, src, Assembler::AVX_512bit); + } + else { + Assembler::vmovdqu(dst, src); + } +} + +void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + vmovdqu(dst, as_Address(src)); + } + else { + lea(rscratch1, src); + vmovdqu(dst, Address(rscratch1, 0)); } } @@ -3726,6 +3785,10 @@ call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); } +#ifdef _LP64 +#define XSTATE_BV 0x200 +#endif + void MacroAssembler::pop_CPU_state() { pop_FPU_state(); pop_IU_state(); @@ -3735,27 +3798,7 @@ #ifndef _LP64 frstor(Address(rsp, 0)); #else - // AVX will continue to use the fxsave area. - // EVEX needs to utilize the xsave area, which is under different - // management. - if(VM_Version::supports_evex()) { - // EDX:EAX describe the XSAVE header and - // are obtained while fetching info for XCR0 via cpuid. - // These two registers make up 64-bits in the header for which bits - // 62:10 are currently reserved for future implementations and unused. Bit 63 - // is unused for our implementation as we do not utilize - // compressed XSAVE areas. Bits 9..8 are currently ignored as we do not use - // the functionality for PKRU state and MSR tracing. - // Ergo we are primarily concerned with bits 7..0, which define - // which ISA extensions and features are enabled for a given machine and are - // defined in XemXcr0Eax and is used to map the XSAVE area - // for restoring registers as described via XCR0. - movl(rdx,VM_Version::get_xsave_header_upper_segment()); - movl(rax,VM_Version::get_xsave_header_lower_segment()); - xrstor(Address(rsp, 0)); - } else { - fxrstor(Address(rsp, 0)); - } + fxrstor(Address(rsp, 0)); #endif addptr(rsp, FPUStateSizeInWords * wordSize); } @@ -3773,49 +3816,13 @@ push_FPU_state(); } -#ifdef _LP64 -#define XSTATE_BV 0x200 -#endif - void MacroAssembler::push_FPU_state() { subptr(rsp, FPUStateSizeInWords * wordSize); #ifndef _LP64 fnsave(Address(rsp, 0)); fwait(); #else - // AVX will continue to use the fxsave area. - // EVEX needs to utilize the xsave area, which is under different - // management. - if(VM_Version::supports_evex()) { - // Save a copy of EAX and EDX - push(rax); - push(rdx); - // EDX:EAX describe the XSAVE header and - // are obtained while fetching info for XCR0 via cpuid. - // These two registers make up 64-bits in the header for which bits - // 62:10 are currently reserved for future implementations and unused. Bit 63 - // is unused for our implementation as we do not utilize - // compressed XSAVE areas. Bits 9..8 are currently ignored as we do not use - // the functionality for PKRU state and MSR tracing. - // Ergo we are primarily concerned with bits 7..0, which define - // which ISA extensions and features are enabled for a given machine and are - // defined in XemXcr0Eax and is used to program XSAVE area - // for saving the required registers as defined in XCR0. - int xcr0_edx = VM_Version::get_xsave_header_upper_segment(); - int xcr0_eax = VM_Version::get_xsave_header_lower_segment(); - movl(rdx,xcr0_edx); - movl(rax,xcr0_eax); - xsave(Address(rsp, wordSize*2)); - // now Apply control bits and clear bytes 8..23 in the header - pop(rdx); - pop(rax); - movl(Address(rsp, XSTATE_BV), xcr0_eax); - movl(Address(rsp, XSTATE_BV+4), xcr0_edx); - andq(Address(rsp, XSTATE_BV+8), 0); - andq(Address(rsp, XSTATE_BV+16), 0); - } else { - fxsave(Address(rsp, 0)); - } + fxsave(Address(rsp, 0)); #endif // LP64 } @@ -3942,6 +3949,236 @@ testl(dst, as_Address(src)); } +void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::pcmpeqb(dst, src); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::pcmpeqb(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pcmpeqb(xmm0, src); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pcmpeqb(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::pcmpeqb(xmm1, xmm0); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::pcmpeqw(dst, src); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::pcmpeqw(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pcmpeqw(xmm0, src); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pcmpeqw(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::pcmpeqw(xmm1, xmm0); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) { + int dst_enc = dst->encoding(); + if (dst_enc < 16) { + Assembler::pcmpestri(dst, src, imm8); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pcmpestri(xmm0, src, imm8); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::pcmpestri(dst, src, imm8); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pcmpestri(xmm0, src, imm8); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pcmpestri(dst, xmm0, imm8); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::pcmpestri(xmm1, xmm0, imm8); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::pmovzxbw(dst, src); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::pmovzxbw(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pmovzxbw(xmm0, src); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pmovzxbw(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::pmovzxbw(xmm1, xmm0); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) { + int dst_enc = dst->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::pmovzxbw(dst, src); + } else if (dst_enc < 16) { + Assembler::pmovzxbw(dst, src); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pmovzxbw(xmm0, src); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::pmovmskb(Register dst, XMMRegister src) { + int src_enc = src->encoding(); + if (src_enc < 16) { + Assembler::pmovmskb(dst, src); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pmovmskb(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::ptest(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::ptest(xmm0, src); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::ptest(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::ptest(xmm1, xmm0); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { Assembler::sqrtsd(dst, as_Address(src)); @@ -4007,6 +4244,23 @@ } } +void MacroAssembler::xorpd(XMMRegister dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) { + Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit); + } + else { + Assembler::xorpd(dst, src); + } +} + +void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) { + if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) { + Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit); + } else { + Assembler::xorps(dst, src); + } +} + void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { // Used in sign-bit flipping with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); @@ -4050,6 +4304,864 @@ } } +void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if ((dst_enc < 16) && (nds_enc < 16)) { + vandps(dst, nds, negate_field, vector_len); + } else if ((src_enc < 16) && (dst_enc < 16)) { + movss(src, nds); + vandps(dst, src, negate_field, vector_len); + } else if (src_enc < 16) { + movss(src, nds); + vandps(src, src, negate_field, vector_len); + movss(dst, src); + } else if (dst_enc < 16) { + movdqu(src, xmm0); + movss(xmm0, nds); + vandps(dst, xmm0, negate_field, vector_len); + movdqu(xmm0, src); + } else if (nds_enc < 16) { + movdqu(src, xmm0); + vandps(xmm0, nds, negate_field, vector_len); + movss(dst, xmm0); + movdqu(xmm0, src); + } else { + movdqu(src, xmm0); + movss(xmm0, nds); + vandps(xmm0, xmm0, negate_field, vector_len); + movss(dst, xmm0); + movdqu(xmm0, src); + } +} + +void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if ((dst_enc < 16) && (nds_enc < 16)) { + vandpd(dst, nds, negate_field, vector_len); + } else if ((src_enc < 16) && (dst_enc < 16)) { + movsd(src, nds); + vandpd(dst, src, negate_field, vector_len); + } else if (src_enc < 16) { + movsd(src, nds); + vandpd(src, src, negate_field, vector_len); + movsd(dst, src); + } else if (dst_enc < 16) { + movdqu(src, xmm0); + movsd(xmm0, nds); + vandpd(dst, xmm0, negate_field, vector_len); + movdqu(xmm0, src); + } else if (nds_enc < 16) { + movdqu(src, xmm0); + vandpd(xmm0, nds, negate_field, vector_len); + movsd(dst, xmm0); + movdqu(xmm0, src); + } else { + movdqu(src, xmm0); + movsd(xmm0, nds); + vandpd(xmm0, xmm0, negate_field, vector_len); + movsd(dst, xmm0); + movdqu(xmm0, src); + } +} + +void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpaddb(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpaddb(dst, dst, src, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for src + evmovdqul(nds, src, Assembler::AVX_512bit); + Assembler::vpaddb(dst, dst, nds, vector_len); + } else if ((src_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpaddb(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds as scatch for xmm0 to hold src + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpaddb(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpaddb(xmm0, xmm0, xmm1, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpaddb(dst, nds, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpaddb(dst, dst, src, vector_len); + } else if (nds_enc < 16) { + // implies dst_enc in upper bank with src as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpaddb(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs in upper bank + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpaddb(xmm0, xmm0, src, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpaddw(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpaddw(dst, dst, src, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for src + evmovdqul(nds, src, Assembler::AVX_512bit); + Assembler::vpaddw(dst, dst, nds, vector_len); + } else if ((src_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpaddw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds as scatch for xmm0 to hold src + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpaddw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpaddw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpaddw(dst, nds, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpaddw(dst, dst, src, vector_len); + } else if (nds_enc < 16) { + // implies dst_enc in upper bank with src as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpaddw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs in upper bank + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpaddw(xmm0, xmm0, src, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpbroadcastw(dst, src); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpbroadcastw(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpbroadcastw(xmm0, src); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpbroadcastw(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::vpbroadcastw(xmm1, xmm0); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + assert(dst_enc == nds_enc, ""); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpcmpeqb(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpcmpeqb(dst, nds, src, vector_len); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpcmpeqb(xmm0, xmm0, src, vector_len); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpcmpeqb(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::vpcmpeqb(xmm1, xmm1, xmm0, vector_len); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + assert(dst_enc == nds_enc, ""); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpcmpeqw(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpcmpeqw(dst, nds, src, vector_len); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpcmpeqw(xmm0, xmm0, src, vector_len); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpcmpeqw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::vpcmpeqw(xmm1, xmm1, xmm0, vector_len); + movdqu(dst, xmm1); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { + int dst_enc = dst->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpmovzxbw(dst, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpmovzxbw(dst, src, vector_len); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpmovzxbw(xmm0, src, vector_len); + movdqu(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) { + int src_enc = src->encoding(); + if (src_enc < 16) { + Assembler::vpmovmskb(dst, src); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpmovmskb(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpmullw(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpmullw(dst, dst, src, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for src + evmovdqul(nds, src, Assembler::AVX_512bit); + Assembler::vpmullw(dst, dst, nds, vector_len); + } else if ((src_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpmullw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds as scatch for xmm0 to hold src + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpmullw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpmullw(dst, nds, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpmullw(dst, dst, src, vector_len); + } else if (nds_enc < 16) { + // implies dst_enc in upper bank with src as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpmullw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs in upper bank + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpmullw(xmm0, xmm0, src, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsubb(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpsubb(dst, dst, src, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for src + evmovdqul(nds, src, Assembler::AVX_512bit); + Assembler::vpsubb(dst, dst, nds, vector_len); + } else if ((src_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsubb(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds as scatch for xmm0 to hold src + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpsubb(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsubb(dst, nds, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpsubb(dst, dst, src, vector_len); + } else if (nds_enc < 16) { + // implies dst_enc in upper bank with src as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsubb(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs in upper bank + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsubw(xmm0, xmm0, src, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int src_enc = src->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsubw(dst, nds, src, vector_len); + } else if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vpsubw(dst, dst, src, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for src + evmovdqul(nds, src, Assembler::AVX_512bit); + Assembler::vpsubw(dst, dst, nds, vector_len); + } else if ((src_enc < 16) && (nds_enc < 16)) { + // use nds as scratch for dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsubw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds as scatch for xmm0 to hold src + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vpsubw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsubw(dst, nds, src, vector_len); + } else if (dst_enc < 16) { + Assembler::vpsubw(dst, dst, src, vector_len); + } else if (nds_enc < 16) { + // implies dst_enc in upper bank with src as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsubw(nds, nds, src, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs in upper bank + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsubw(xmm0, xmm0, src, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int shift_enc = shift->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsraw(dst, nds, shift, vector_len); + } else if ((dst_enc < 16) && (shift_enc < 16)) { + Assembler::vpsraw(dst, dst, shift, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds_enc as scratch with shift + evmovdqul(nds, shift, Assembler::AVX_512bit); + Assembler::vpsraw(dst, dst, nds, vector_len); + } else if ((shift_enc < 16) && (nds_enc < 16)) { + // use nds as scratch with dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsraw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds to save a copy of xmm0 and hold shift + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsraw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else if (nds_enc < 16) { + // use nds as dest as temps + evmovdqul(nds, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsraw(nds, nds, xmm0, vector_len); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, shift, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(xmm1, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsraw(dst, nds, shift, vector_len); + } else if (dst_enc < 16) { + Assembler::vpsraw(dst, dst, shift, vector_len); + } else if (nds_enc < 16) { + // use nds as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsraw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // use nds as scratch for xmm0 + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsraw(xmm0, xmm0, shift, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int shift_enc = shift->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsrlw(dst, nds, shift, vector_len); + } else if ((dst_enc < 16) && (shift_enc < 16)) { + Assembler::vpsrlw(dst, dst, shift, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds_enc as scratch with shift + evmovdqul(nds, shift, Assembler::AVX_512bit); + Assembler::vpsrlw(dst, dst, nds, vector_len); + } else if ((shift_enc < 16) && (nds_enc < 16)) { + // use nds as scratch with dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsrlw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds to save a copy of xmm0 and hold shift + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsrlw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else if (nds_enc < 16) { + // use nds as dest as temps + evmovdqul(nds, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsrlw(nds, nds, xmm0, vector_len); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, shift, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(xmm1, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsrlw(dst, nds, shift, vector_len); + } else if (dst_enc < 16) { + Assembler::vpsrlw(dst, dst, shift, vector_len); + } else if (nds_enc < 16) { + // use nds as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsrlw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // use nds as scratch for xmm0 + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsrlw(xmm0, xmm0, shift, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + int shift_enc = shift->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsllw(dst, nds, shift, vector_len); + } else if ((dst_enc < 16) && (shift_enc < 16)) { + Assembler::vpsllw(dst, dst, shift, vector_len); + } else if ((dst_enc < 16) && (nds_enc < 16)) { + // use nds_enc as scratch with shift + evmovdqul(nds, shift, Assembler::AVX_512bit); + Assembler::vpsllw(dst, dst, nds, vector_len); + } else if ((shift_enc < 16) && (nds_enc < 16)) { + // use nds as scratch with dst + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsllw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else if (dst_enc < 16) { + // use nds to save a copy of xmm0 and hold shift + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsllw(dst, dst, xmm0, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } else if (nds_enc < 16) { + // use nds as dest as temps + evmovdqul(nds, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, shift, Assembler::AVX_512bit); + Assembler::vpsllw(nds, nds, xmm0, vector_len); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // worse case scenario, all regs are in the upper bank + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, shift, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len); + evmovdqul(xmm1, dst, Assembler::AVX_512bit); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { + int dst_enc = dst->encoding(); + int nds_enc = nds->encoding(); + if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { + Assembler::vpsllw(dst, nds, shift, vector_len); + } else if (dst_enc < 16) { + Assembler::vpsllw(dst, dst, shift, vector_len); + } else if (nds_enc < 16) { + // use nds as scratch + evmovdqul(nds, dst, Assembler::AVX_512bit); + Assembler::vpsllw(nds, nds, shift, vector_len); + evmovdqul(dst, nds, Assembler::AVX_512bit); + } else { + // use nds as scratch for xmm0 + evmovdqul(nds, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vpsllw(xmm0, xmm0, shift, vector_len); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + } +} + +void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if ((dst_enc < 16) && (src_enc < 16)) { + Assembler::vptest(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::vptest(xmm0, src); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::vptest(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + movdqu(xmm0, src); + movdqu(xmm1, dst); + Assembler::vptest(xmm1, xmm0); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } +} + +// This instruction exists within macros, ergo we cannot control its input +// when emitted through those patterns. +void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) { + if (VM_Version::supports_avx512nobw()) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (dst_enc == src_enc) { + if (dst_enc < 16) { + Assembler::punpcklbw(dst, src); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::punpcklbw(xmm0, xmm0); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } else { + if ((src_enc < 16) && (dst_enc < 16)) { + Assembler::punpcklbw(dst, src); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::punpcklbw(xmm0, src); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::punpcklbw(dst, xmm0); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + Assembler::punpcklbw(xmm0, xmm1); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + } else { + Assembler::punpcklbw(dst, src); + } +} + +// This instruction exists within macros, ergo we cannot control its input +// when emitted through those patterns. +void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { + if (VM_Version::supports_avx512nobw()) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (dst_enc == src_enc) { + if (dst_enc < 16) { + Assembler::pshuflw(dst, src, mode); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pshuflw(xmm0, xmm0, mode); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } else { + if ((src_enc < 16) && (dst_enc < 16)) { + Assembler::pshuflw(dst, src, mode); + } else if (src_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + Assembler::pshuflw(xmm0, src, mode); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else if (dst_enc < 16) { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + Assembler::pshuflw(dst, xmm0, mode); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); + evmovdqul(xmm0, dst, Assembler::AVX_512bit); + evmovdqul(xmm1, src, Assembler::AVX_512bit); + Assembler::pshuflw(xmm0, xmm1, mode); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } + } else { + Assembler::pshuflw(dst, src, mode); + } +} + void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { if (reachable(src)) { vandpd(dst, nds, as_Address(src), vector_len); @@ -4133,31 +5245,16 @@ subptr(rsp, 64); evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); movflt(xmm0, nds); - if (reachable(src)) { - vxorps(xmm0, xmm0, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorps(xmm0, xmm0, Address(rscratch1, 0), Assembler::AVX_128bit); - } + vxorps(xmm0, xmm0, src, Assembler::AVX_128bit); movflt(dst, xmm0); evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); addptr(rsp, 64); } else { movflt(dst, nds); - if (reachable(src)) { - vxorps(dst, dst, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorps(dst, dst, Address(rscratch1, 0), Assembler::AVX_128bit); - } - } - } else { - if (reachable(src)) { - vxorps(dst, nds, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorps(dst, nds, Address(rscratch1, 0), Assembler::AVX_128bit); - } + vxorps(dst, dst, src, Assembler::AVX_128bit); + } + } else { + vxorps(dst, nds, src, Assembler::AVX_128bit); } } @@ -4172,31 +5269,16 @@ subptr(rsp, 64); evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); movdbl(xmm0, nds); - if (reachable(src)) { - vxorps(xmm0, xmm0, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorps(xmm0, xmm0, Address(rscratch1, 0), Assembler::AVX_128bit); - } + vxorpd(xmm0, xmm0, src, Assembler::AVX_128bit); movdbl(dst, xmm0); evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); addptr(rsp, 64); } else { movdbl(dst, nds); - if (reachable(src)) { - vxorps(dst, dst, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorps(dst, dst, Address(rscratch1, 0), Assembler::AVX_128bit); - } - } - } else { - if (reachable(src)) { - vxorpd(dst, nds, as_Address(src), Assembler::AVX_128bit); - } else { - lea(rscratch1, src); - vxorpd(dst, nds, Address(rscratch1, 0), Assembler::AVX_128bit); - } + vxorpd(dst, dst, src, Assembler::AVX_128bit); + } + } else { + vxorpd(dst, nds, src, Assembler::AVX_128bit); } } @@ -4688,7 +5770,6 @@ pusha(); // if we are coming from c1, xmm registers may be live - int off = 0; int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8); if (UseAVX > 2) { num_xmm_regs = LP64_ONLY(32) NOT_LP64(8); @@ -4697,7 +5778,7 @@ if (UseSSE == 1) { subptr(rsp, sizeof(jdouble)*8); for (int n = 0; n < 8; n++) { - movflt(Address(rsp, off++*sizeof(jdouble)), as_XMMRegister(n)); + movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n)); } } else if (UseSSE >= 2) { if (UseAVX > 2) { @@ -4709,37 +5790,35 @@ #ifdef COMPILER2 if (MaxVectorSize > 16) { if(UseAVX > 2) { - // Save upper half of ZMM registes + // Save upper half of ZMM registers subptr(rsp, 32*num_xmm_regs); for (int n = 0; n < num_xmm_regs; n++) { - vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n)); + vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); } - off = 0; } assert(UseAVX > 0, "256 bit vectors are supported only with AVX"); - // Save upper half of YMM registes + // Save upper half of YMM registers subptr(rsp, 16*num_xmm_regs); for (int n = 0; n < num_xmm_regs; n++) { - vextractf128h(Address(rsp, off++*16), as_XMMRegister(n)); + vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); } } #endif // Save whole 128bit (16 bytes) XMM registers subptr(rsp, 16*num_xmm_regs); - off = 0; #ifdef _LP64 - if (VM_Version::supports_avx512novl()) { + if (VM_Version::supports_evex()) { for (int n = 0; n < num_xmm_regs; n++) { - vextractf32x4h(Address(rsp, off++*16), as_XMMRegister(n), 0); + vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0); } } else { for (int n = 0; n < num_xmm_regs; n++) { - movdqu(Address(rsp, off++*16), as_XMMRegister(n)); + movdqu(Address(rsp, n*16), as_XMMRegister(n)); } } #else for (int n = 0; n < num_xmm_regs; n++) { - movdqu(Address(rsp, off++*16), as_XMMRegister(n)); + movdqu(Address(rsp, n*16), as_XMMRegister(n)); } #endif } @@ -4808,44 +5887,40 @@ addptr(rsp, sizeof(jdouble)*nb_args); } - off = 0; if (UseSSE == 1) { for (int n = 0; n < 8; n++) { - movflt(as_XMMRegister(n), Address(rsp, off++*sizeof(jdouble))); + movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble))); } addptr(rsp, sizeof(jdouble)*8); } else if (UseSSE >= 2) { - // Restore whole 128bit (16 bytes) XMM regiters + // Restore whole 128bit (16 bytes) XMM registers #ifdef _LP64 - if (VM_Version::supports_avx512novl()) { - for (int n = 0; n < num_xmm_regs; n++) { - vinsertf32x4h(as_XMMRegister(n), Address(rsp, off++*16), 0); - } - } - else { - for (int n = 0; n < num_xmm_regs; n++) { - movdqu(as_XMMRegister(n), Address(rsp, off++*16)); - } - } + if (VM_Version::supports_evex()) { + for (int n = 0; n < num_xmm_regs; n++) { + vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0); + } + } else { + for (int n = 0; n < num_xmm_regs; n++) { + movdqu(as_XMMRegister(n), Address(rsp, n*16)); + } + } #else - for (int n = 0; n < num_xmm_regs; n++) { - movdqu(as_XMMRegister(n), Address(rsp, off++ * 16)); - } + for (int n = 0; n < num_xmm_regs; n++) { + movdqu(as_XMMRegister(n), Address(rsp, n*16)); + } #endif addptr(rsp, 16*num_xmm_regs); #ifdef COMPILER2 if (MaxVectorSize > 16) { - // Restore upper half of YMM registes. - off = 0; + // Restore upper half of YMM registers. for (int n = 0; n < num_xmm_regs; n++) { - vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16)); + vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); } addptr(rsp, 16*num_xmm_regs); if(UseAVX > 2) { - off = 0; for (int n = 0; n < num_xmm_regs; n++) { - vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32)); + vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); } addptr(rsp, 32*num_xmm_regs); } @@ -6312,7 +7387,8 @@ XMMRegister vec, Register tmp, int ae) { ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); + assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); // This method uses the pcmpestri instruction with bound registers @@ -6490,7 +7566,8 @@ XMMRegister vec, Register tmp, int ae) { ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); + assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); // @@ -6807,7 +7884,8 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result, XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); + assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); int stride = 8; @@ -6831,7 +7909,7 @@ bind(SCAN_TO_16_CHAR_LOOP); vmovdqu(vec3, Address(result, 0)); - vpcmpeqw(vec3, vec3, vec1, true); + vpcmpeqw(vec3, vec3, vec1, 1); vptest(vec2, vec3); jcc(Assembler::carryClear, FOUND_CHAR); addptr(result, 32); @@ -6844,36 +7922,32 @@ pshufd(vec1, vec1, 0); pxor(vec2, vec2); } - if (UseAVX >= 2 || UseSSE42Intrinsics) { - bind(SCAN_TO_8_CHAR); - cmpl(cnt1, stride); - if (UseAVX >= 2) { - jccb(Assembler::less, SCAN_TO_CHAR); - } - if (!(UseAVX >= 2)) { - jccb(Assembler::less, SCAN_TO_CHAR_LOOP); - movdl(vec1, ch); - pshuflw(vec1, vec1, 0x00); - pshufd(vec1, vec1, 0); - pxor(vec2, vec2); - } - movl(tmp, cnt1); - andl(tmp, 0xFFFFFFF8); //vector count (in chars) - andl(cnt1,0x00000007); //tail count (in chars) - - bind(SCAN_TO_8_CHAR_LOOP); - movdqu(vec3, Address(result, 0)); - pcmpeqw(vec3, vec1); - ptest(vec2, vec3); - jcc(Assembler::carryClear, FOUND_CHAR); - addptr(result, 16); - subl(tmp, stride); - jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP); - } + bind(SCAN_TO_8_CHAR); + cmpl(cnt1, stride); + if (UseAVX >= 2) { + jccb(Assembler::less, SCAN_TO_CHAR); + } else { + jccb(Assembler::less, SCAN_TO_CHAR_LOOP); + movdl(vec1, ch); + pshuflw(vec1, vec1, 0x00); + pshufd(vec1, vec1, 0); + pxor(vec2, vec2); + } + movl(tmp, cnt1); + andl(tmp, 0xFFFFFFF8); //vector count (in chars) + andl(cnt1,0x00000007); //tail count (in chars) + + bind(SCAN_TO_8_CHAR_LOOP); + movdqu(vec3, Address(result, 0)); + pcmpeqw(vec3, vec1); + ptest(vec2, vec3); + jcc(Assembler::carryClear, FOUND_CHAR); + addptr(result, 16); + subl(tmp, stride); + jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP); bind(SCAN_TO_CHAR); testl(cnt1, cnt1); jcc(Assembler::zero, RET_NOT_FOUND); - bind(SCAN_TO_CHAR_LOOP); load_unsigned_short(tmp, Address(result, 0)); cmpl(ch, tmp); @@ -6887,16 +7961,14 @@ movl(result, -1); jmpb(DONE_LABEL); - if (UseAVX >= 2 || UseSSE42Intrinsics) { - bind(FOUND_CHAR); - if (UseAVX >= 2) { - vpmovmskb(tmp, vec3); - } else { - pmovmskb(tmp, vec3); - } - bsfl(ch, tmp); - addl(result, ch); - } + bind(FOUND_CHAR); + if (UseAVX >= 2) { + vpmovmskb(tmp, vec3); + } else { + pmovmskb(tmp, vec3); + } + bsfl(ch, tmp); + addl(result, ch); bind(FOUND_SEQ_CHAR); subptr(result, str1); @@ -6985,6 +8057,7 @@ } if (UseAVX >= 2 && UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; Label COMPARE_TAIL_LONG; @@ -7059,7 +8132,7 @@ vmovdqu(vec1, Address(str1, result, scale)); vpxor(vec1, Address(str2, result, scale)); } else { - vpmovzxbw(vec1, Address(str1, result, scale1)); + vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_256bit); vpxor(vec1, Address(str2, result, scale2)); } vptest(vec1, vec1); @@ -7120,6 +8193,7 @@ bind(COMPARE_SMALL_STR); } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; int pcmpmask = 0x19; // Setup to compare 8-char (16-byte) vectors, @@ -7252,7 +8326,7 @@ movl(result, len); // copy - if (UseAVX >= 2) { + if (UseAVX >= 2 && UseSSE >= 2) { // With AVX2, use 32-byte vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -7287,6 +8361,7 @@ movl(len, result); // Fallthru to tail compare } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be for SSE4.2 intrinsics to be available"); // With SSE4.2, use double quad vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -7363,7 +8438,7 @@ // That's it bind(DONE); - if (UseAVX >= 2) { + if (UseAVX >= 2 && UseSSE >= 2) { // clean upper bits of YMM registers vpxor(vec1, vec1); vpxor(vec2, vec2); @@ -7451,6 +8526,7 @@ movl(limit, result); // Fallthru to tail compare } else if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); // With SSE4.2, use double quad vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -7672,7 +8748,7 @@ BIND(L_check_fill_32_bytes); addl(count, 8 << shift); jccb(Assembler::less, L_check_fill_8_bytes); - evmovdqul(Address(to, 0), xtmp, Assembler::AVX_256bit); + vmovdqu(Address(to, 0), xtmp); addptr(to, 32); subl(count, 8 << shift); @@ -7800,6 +8876,7 @@ negptr(len); if (UseSSE42Intrinsics || UseAVX >= 2) { + assert(UseSSE42Intrinsics ? UseSSE >= 4 : true, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit; Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; @@ -9572,6 +10649,7 @@ push(len); if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label copy_32_loop, copy_16, copy_tail; movl(result, len); @@ -9671,6 +10749,7 @@ assert_different_registers(src, dst, len, tmp2); if (UseSSE42Intrinsics) { + assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available"); Label copy_8_loop, copy_bytes, copy_tail; movl(tmp2, len); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -962,10 +962,15 @@ void divss(XMMRegister dst, AddressLiteral src); // Move Unaligned Double Quadword - void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); } - void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); } - void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); } + void movdqu(Address dst, XMMRegister src); + void movdqu(XMMRegister dst, Address src); + void movdqu(XMMRegister dst, XMMRegister src); void movdqu(XMMRegister dst, AddressLiteral src); + // AVX Unaligned forms + void vmovdqu(Address dst, XMMRegister src); + void vmovdqu(XMMRegister dst, Address src); + void vmovdqu(XMMRegister dst, XMMRegister src); + void vmovdqu(XMMRegister dst, AddressLiteral src); // Move Aligned Double Quadword void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } @@ -999,6 +1004,19 @@ Assembler::pclmulqdq(dst, src, 0x11); } + void pcmpeqb(XMMRegister dst, XMMRegister src); + void pcmpeqw(XMMRegister dst, XMMRegister src); + + void pcmpestri(XMMRegister dst, Address src, int imm8); + void pcmpestri(XMMRegister dst, XMMRegister src, int imm8); + + void pmovzxbw(XMMRegister dst, XMMRegister src); + void pmovzxbw(XMMRegister dst, Address src); + + void pmovmskb(Register dst, XMMRegister src); + + void ptest(XMMRegister dst, XMMRegister src); + void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, AddressLiteral src); @@ -1024,12 +1042,12 @@ void ucomisd(XMMRegister dst, AddressLiteral src); // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values - void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); } + void xorpd(XMMRegister dst, XMMRegister src); void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } void xorpd(XMMRegister dst, AddressLiteral src); // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values - void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); } + void xorps(XMMRegister dst, XMMRegister src); void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } void xorps(XMMRegister dst, AddressLiteral src); @@ -1047,6 +1065,49 @@ void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); + void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); + + void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + + void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + + void vpbroadcastw(XMMRegister dst, XMMRegister src); + + void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + + void vpmovzxbw(XMMRegister dst, Address src, int vector_len); + void vpmovmskb(Register dst, XMMRegister src); + + void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + + void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + + void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + + void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); + void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); + + void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); + void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); + + void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); + void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); + + void vptest(XMMRegister dst, XMMRegister src); + + void punpcklbw(XMMRegister dst, XMMRegister src); + void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } + + void pshuflw(XMMRegister dst, XMMRegister src, int mode); + void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); } + void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -192,31 +192,22 @@ } } else if(UseSSE >= 2) { // Save whole 128bit (16 bytes) XMM regiters - if (VM_Version::supports_avx512novl()) { - for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf32x4h(Address(rsp, off*wordSize), as_XMMRegister(n), 0); - off += delta; - } - } else { - for (int n = 0; n < num_xmm_regs; n++) { - __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n)); - off += delta; - } + for (int n = 0; n < num_xmm_regs; n++) { + __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n)); + off += delta; } } - if (vect_words > 0) { + if (save_vectors) { assert(vect_words*wordSize == 128, ""); __ subptr(rsp, 128); // Save upper half of YMM registes - off = 0; for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n)); + __ vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); } if (UseAVX > 2) { __ subptr(rsp, 256); // Save upper half of ZMM registes - off = 0; for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n)); + __ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); } } } @@ -275,44 +266,39 @@ #else assert(!restore_vectors, "vectors are generated only by C2"); #endif + + if (restore_vectors) { + assert(additional_frame_bytes == 128, ""); + if (UseAVX > 2) { + // Restore upper half of ZMM registers. + for (int n = 0; n < num_xmm_regs; n++) { + __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); + } + __ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes + } + // Restore upper half of YMM registes. + for (int n = 0; n < num_xmm_regs; n++) { + __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); + } + __ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes + } + int off = xmm0_off; int delta = xmm1_off - off; if (UseSSE == 1) { - assert(additional_frame_bytes == 0, ""); for (int n = 0; n < num_xmm_regs; n++) { __ movflt(as_XMMRegister(n), Address(rsp, off*wordSize)); off += delta; } } else if (UseSSE >= 2) { - if (VM_Version::supports_avx512novl()) { - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf32x4h(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes), 0); - off += delta; - } - } else { - for (int n = 0; n < num_xmm_regs; n++) { - __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes)); - off += delta; - } + // additional_frame_bytes only populated for the restore_vector case, else it is 0 + for (int n = 0; n < num_xmm_regs; n++) { + __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes)); + off += delta; } } - if (restore_vectors) { - if (UseAVX > 2) { - off = 0; - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32)); - } - __ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes - } - // Restore upper half of YMM registes. - assert(additional_frame_bytes == 128, ""); - off = 0; - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16)); - } - __ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes - } + __ pop_FPU_state(); __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers @@ -2562,7 +2548,8 @@ oop_maps->add_gc_map( __ pc()-start, map); - // Discard arg to fetch_unroll_info + // Discard args to fetch_unroll_info + __ pop(rcx); __ pop(rcx); __ get_thread(rcx); @@ -2575,9 +2562,8 @@ // we are very short of registers Address unpack_kind(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); - // retrieve the deopt kind from where we left it. - __ pop(rax); - __ movl(unpack_kind, rax); // save the unpack_kind value + // retrieve the deopt kind from the UnrollBlock. + __ movl(rax, unpack_kind); Label noException; __ cmpl(rax, Deoptimization::Unpack_exception); // Was exception pending? @@ -2787,11 +2773,12 @@ enum frame_layout { arg0_off, // thread sp + 0 // Arg location for arg1_off, // unloaded_class_index sp + 1 // calling C + arg2_off, // exec_mode sp + 2 // The frame sender code expects that rbp will be in the "natural" place and // will override any oopMap setting for it. We must therefore force the layout // so that it agrees with the frame sender code. - rbp_off, // callee saved register sp + 2 - return_off, // slot for return address sp + 3 + rbp_off, // callee saved register sp + 3 + return_off, // slot for return address sp + 4 framesize }; @@ -2823,6 +2810,7 @@ __ movptr(Address(rsp, arg0_off*wordSize), rdx); // argument already in ECX __ movl(Address(rsp, arg1_off*wordSize),rcx); + __ movl(Address(rsp, arg2_off*wordSize), Deoptimization::Unpack_uncommon_trap); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); // Set an oopmap for the call site @@ -2839,6 +2827,16 @@ // Load UnrollBlock into EDI __ movptr(rdi, rax); +#ifdef ASSERT + { Label L; + __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()), + (int32_t)Deoptimization::Unpack_uncommon_trap); + __ jcc(Assembler::equal, L); + __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); + __ bind(L); + } +#endif + // Pop all the frames we must move/replace. // // Frame picture (youngest to oldest) diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -72,45 +72,28 @@ class RegisterSaver { // Capture info about frame layout. Layout offsets are in jint // units because compiler frame slots are jints. -#define HALF_ZMM_BANK_WORDS 128 +#define XSAVE_AREA_BEGIN 160 +#define XSAVE_AREA_YMM_BEGIN 576 +#define XSAVE_AREA_ZMM_BEGIN 1152 +#define XSAVE_AREA_UPPERBANK 1664 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off +#define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off enum layout { fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area - xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area + xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area DEF_XMM_OFFS(0), DEF_XMM_OFFS(1), - DEF_XMM_OFFS(2), - DEF_XMM_OFFS(3), - DEF_XMM_OFFS(4), - DEF_XMM_OFFS(5), - DEF_XMM_OFFS(6), - DEF_XMM_OFFS(7), - DEF_XMM_OFFS(8), - DEF_XMM_OFFS(9), - DEF_XMM_OFFS(10), - DEF_XMM_OFFS(11), - DEF_XMM_OFFS(12), - DEF_XMM_OFFS(13), - DEF_XMM_OFFS(14), - DEF_XMM_OFFS(15), - zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt), + // 2..15 are implied in range usage + ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, + DEF_YMM_OFFS(0), + DEF_YMM_OFFS(1), + // 2..15 are implied in range usage + zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, + zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, DEF_ZMM_OFFS(16), DEF_ZMM_OFFS(17), - DEF_ZMM_OFFS(18), - DEF_ZMM_OFFS(19), - DEF_ZMM_OFFS(20), - DEF_ZMM_OFFS(21), - DEF_ZMM_OFFS(22), - DEF_ZMM_OFFS(23), - DEF_ZMM_OFFS(24), - DEF_ZMM_OFFS(25), - DEF_ZMM_OFFS(26), - DEF_ZMM_OFFS(27), - DEF_ZMM_OFFS(28), - DEF_ZMM_OFFS(29), - DEF_ZMM_OFFS(30), - DEF_ZMM_OFFS(31), + // 18..31 are implied in range usage fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), fpu_stateH_end, r15_off, r15H_off, @@ -160,8 +143,6 @@ }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { - int vect_words = 0; - int ymmhi_offset = -1; int off = 0; int num_xmm_regs = XMMRegisterImpl::number_of_registers; if (UseAVX < 3) { @@ -171,24 +152,15 @@ if (save_vectors) { assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); - // Save upper half of YMM registers - vect_words = 16 * num_xmm_regs / wordSize; - if (UseAVX < 3) { - ymmhi_offset = additional_frame_words; - additional_frame_words += vect_words; - } } #else assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); #endif - // Always make the frame size 16-byte aligned - int frame_size_in_bytes = round_to(additional_frame_words*wordSize + - reg_save_size*BytesPerInt, num_xmm_regs); + // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated + int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs); // OopMap frame size is in compiler stack slots (jint's) not bytes or words int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; - // The caller will allocate additional_frame_words - int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; // CodeBlob frame size is in words. int frame_size_in_words = frame_size_in_bytes / wordSize; *total_frame_words = frame_size_in_words; @@ -203,12 +175,34 @@ __ push_CPU_state(); // Push a multiple of 16 bytes // push cpu state handles this on EVEX enabled targets - if ((vect_words > 0) && (UseAVX < 3)) { - assert(vect_words*wordSize >= 256, ""); - // Save upper half of YMM registes(0..num_xmm_regs) - __ subptr(rsp, num_xmm_regs*16); - for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n)); + if (save_vectors) { + // Save upper half of YMM registes(0..15) + int base_addr = XSAVE_AREA_YMM_BEGIN; + for (int n = 0; n < 16; n++) { + __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n)); + } + if (VM_Version::supports_evex()) { + // Save upper half of ZMM registes(0..15) + base_addr = XSAVE_AREA_ZMM_BEGIN; + for (int n = 0; n < 16; n++) { + __ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1); + } + // Save full ZMM registes(16..num_xmm_regs) + base_addr = XSAVE_AREA_UPPERBANK; + int off = 0; + int vector_len = Assembler::AVX_512bit; + for (int n = 16; n < num_xmm_regs; n++) { + __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); + } + } + } else { + if (VM_Version::supports_evex()) { + // Save upper bank of ZMM registers(16..31) for double/float usage + int base_addr = XSAVE_AREA_UPPERBANK; + int off = 0; + for (int n = 16; n < num_xmm_regs; n++) { + __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n)); + } } } if (frame::arg_reg_save_area_bytes != 0) { @@ -224,8 +218,7 @@ OopMapSet *oop_maps = new OopMapSet(); OopMap* map = new OopMap(frame_size_in_slots, 0); -#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) -#define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset) +#define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); @@ -257,31 +250,21 @@ off = zmm16_off; delta = zmm17_off - off; for (int n = 16; n < num_xmm_regs; n++) { - XMMRegister xmm_name = as_XMMRegister(n); - map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); + XMMRegister zmm_name = as_XMMRegister(n); + map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); off += delta; } } #if defined(COMPILER2) || INCLUDE_JVMCI if (save_vectors) { - assert(ymmhi_offset != -1, "save area must exist"); - map->set_callee_saved(YMMHI_STACK_OFFSET( 0), xmm0->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next(4)); - map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next(4)); + off = ymm0_off; + int delta = ymm1_off - off; + for (int n = 0; n < 16; n++) { + XMMRegister ymm_name = as_XMMRegister(n); + map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); + off += delta; + } } #endif // COMPILER2 || INCLUDE_JVMCI @@ -316,8 +299,8 @@ off = zmm16H_off; delta = zmm17H_off - off; for (int n = 16; n < num_xmm_regs; n++) { - XMMRegister xmm_name = as_XMMRegister(n); - map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); + XMMRegister zmm_name = as_XMMRegister(n); + map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); off += delta; } } @@ -335,21 +318,48 @@ // Pop arg register save area __ addptr(rsp, frame::arg_reg_save_area_bytes); } + #if defined(COMPILER2) || INCLUDE_JVMCI - // On EVEX enabled targets everything is handled in pop fpu state - if ((restore_vectors) && (UseAVX < 3)) { - assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX"); - assert(MaxVectorSize == 64, "up to 512bit vectors are supported now"); - int off = 0; - // Restore upper half of YMM registes (0..num_xmm_regs) - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16)); - } - __ addptr(rsp, num_xmm_regs*16); + if (restore_vectors) { + assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); + assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); } #else - assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); + assert(!save_vectors, "vectors are generated only by C2"); #endif + + // On EVEX enabled targets everything is handled in pop fpu state + if (restore_vectors) { + // Restore upper half of YMM registes (0..15) + int base_addr = XSAVE_AREA_YMM_BEGIN; + for (int n = 0; n < 16; n++) { + __ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16)); + } + if (VM_Version::supports_evex()) { + // Restore upper half of ZMM registes (0..15) + base_addr = XSAVE_AREA_ZMM_BEGIN; + for (int n = 0; n < 16; n++) { + __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1); + } + // Restore full ZMM registes(16..num_xmm_regs) + base_addr = XSAVE_AREA_UPPERBANK; + int vector_len = Assembler::AVX_512bit; + int off = 0; + for (int n = 16; n < num_xmm_regs; n++) { + __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); + } + } + } else { + if (VM_Version::supports_evex()) { + // Restore upper bank of ZMM registes(16..31) for double/float usage + int base_addr = XSAVE_AREA_UPPERBANK; + int off = 0; + for (int n = 16; n < num_xmm_regs; n++) { + __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64))); + } + } + } + // Recover CPU state __ pop_CPU_state(); // Get the rbp described implicitly by the calling convention (no oopMap) @@ -2819,6 +2829,7 @@ __ movl(r14, (int32_t)Deoptimization::Unpack_reexecute); __ mov(c_rarg0, r15_thread); + __ movl(c_rarg2, r14); // exec mode __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); @@ -2905,6 +2916,7 @@ } #endif // ASSERT __ mov(c_rarg0, r15_thread); + __ movl(c_rarg1, r14); // exec_mode __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); // Need to have an oopmap that tells fetch_unroll_info where to @@ -2922,6 +2934,7 @@ // Load UnrollBlock* into rdi __ mov(rdi, rax); + __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); Label noException; __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? __ jcc(Assembler::notEqual, noException); @@ -3140,6 +3153,7 @@ // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); __ mov(c_rarg0, r15_thread); + __ movl(c_rarg2, Deoptimization::Unpack_uncommon_trap); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); // Set an oopmap for the call site @@ -3155,6 +3169,16 @@ // Load UnrollBlock* into rdi __ mov(rdi, rax); +#ifdef ASSERT + { Label L; + __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()), + (int32_t)Deoptimization::Unpack_uncommon_trap); + __ jcc(Assembler::equal, L); + __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); + __ bind(L); + } +#endif + // Pop all the frames we must move/replace. // // Frame picture (youngest to oldest) diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -273,7 +273,7 @@ if (UseAVX > 2) { last_reg = 31; } - if (VM_Version::supports_avx512novl()) { + if (VM_Version::supports_evex()) { for (int i = xmm_save_first; i <= last_reg; i++) { __ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0); } @@ -391,7 +391,7 @@ // restore regs belonging to calling function #ifdef _WIN64 // emit the restores for xmm regs - if (VM_Version::supports_avx512novl()) { + if (VM_Version::supports_evex()) { for (int i = xmm_save_first; i <= last_reg; i++) { __ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0); } @@ -1439,8 +1439,8 @@ // Copy 64-bytes per iteration __ BIND(L_loop); if (UseAVX > 2) { - __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 32), Assembler::AVX_512bit); - __ evmovdqul(Address(dest, qword_count, Address::times_8, 32), xmm0, Assembler::AVX_512bit); + __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit); + __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit); } else if (UseAVX == 2) { __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32)); __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/vm_version_x86.cpp --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -632,12 +632,36 @@ // Use AES instructions if available. if (supports_aes()) { if (FLAG_IS_DEFAULT(UseAES)) { - UseAES = true; + FLAG_SET_DEFAULT(UseAES, true); } - } else if (UseAES) { - if (!FLAG_IS_DEFAULT(UseAES)) + if (!UseAES) { + if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); + } + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } else { + if (UseSSE > 2) { + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + FLAG_SET_DEFAULT(UseAESIntrinsics, true); + } + } else { + // The AES intrinsic stubs require AES instruction support (of course) + // but also require sse3 mode or higher for instructions it use. + if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); + } + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + } + } else if (UseAES || UseAESIntrinsics) { + if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { warning("AES instructions are not available on this CPU"); - FLAG_SET_DEFAULT(UseAES, false); + FLAG_SET_DEFAULT(UseAES, false); + } + if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("AES intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } } // Use CLMUL instructions if available. @@ -673,18 +697,6 @@ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); } - // The AES intrinsic stubs require AES instruction support (of course) - // but also require sse3 mode for instructions it use. - if (UseAES && (UseSSE > 2)) { - if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { - UseAESIntrinsics = true; - } - } else if (UseAESIntrinsics) { - if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) - warning("AES intrinsics are not available on this CPU"); - FLAG_SET_DEFAULT(UseAESIntrinsics, false); - } - // GHASH/GCM intrinsics if (UseCLMUL && (UseSSE > 2)) { if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { @@ -891,7 +903,7 @@ UseNewLongLShift = true; } if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { - if( supports_sse4a() ) { + if (supports_sse4a()) { UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron } else { UseXmmLoadAndClearUpper = false; @@ -918,10 +930,15 @@ UseXmmI2D = false; } } - if( FLAG_IS_DEFAULT(UseSSE42Intrinsics) ) { - if( supports_sse4_2() && UseSSE >= 4 ) { - UseSSE42Intrinsics = true; + if (supports_sse4_2() && UseSSE >= 4) { + if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { + FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); } + } else { + if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); + } + FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); } // some defaults for AMD family 15h @@ -995,8 +1012,13 @@ } if (supports_sse4_2() && UseSSE >= 4) { if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { - UseSSE42Intrinsics = true; + FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); } + } else { + if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); + } + FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); } } if ((cpu_family() == 0x06) && diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/vm_version_x86.hpp --- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -552,6 +552,19 @@ break; } } + // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen + if (retVal == false) { + // Verify that OS save/restore all bits of EVEX registers + // during signal processing. + int nreg = 2 LP64_ONLY(+2); + retVal = true; + for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register + if (_cpuid_info.zmm_save[i] != ymm_test_value()) { + retVal = false; + break; + } + } + } } return retVal; } @@ -706,6 +719,9 @@ static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; } static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); } static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); } + static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); } + static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); } + static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); } // Intel features static bool is_intel_family_core() { return is_intel() && extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/x86.ad --- a/hotspot/src/cpu/x86/vm/x86.ad Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/x86.ad Tue Nov 24 10:30:23 2015 +0100 @@ -1716,6 +1716,36 @@ return ret_value; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + if (ret_value) { + switch (opcode) { + case Op_AddVB: + case Op_SubVB: + if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) + ret_value = false; + break; + case Op_URShiftVS: + case Op_RShiftVS: + case Op_LShiftVS: + case Op_MulVS: + case Op_AddVS: + case Op_SubVS: + if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) + ret_value = false; + break; + case Op_CMoveVD: + if (vlen != 4) + ret_value = false; + break; + } + } + + return ret_value; // Per default match rules are supported. +} + const int Matcher::float_pressure(int default_pressure_threshold) { int float_pressure_threshold = default_pressure_threshold; #ifdef _LP64 @@ -1759,11 +1789,9 @@ break; case T_BYTE: if (size < 4) return 0; - if ((size > 32) && !VM_Version::supports_avx512bw()) return 0; break; case T_SHORT: if (size < 4) return 0; - if ((size > 16) && !VM_Version::supports_avx512bw()) return 0; break; default: ShouldNotReachHere(); @@ -1967,27 +1995,34 @@ bool is_single_byte = false; int vec_len = 0; if ((UseAVX > 2) && (stack_offset != 0)) { + int tuple_type = Assembler::EVEX_FVM; + int input_size = Assembler::EVEX_32bit; switch (ireg) { - case Op_VecS: + case Op_VecS: + tuple_type = Assembler::EVEX_T1S; + break; case Op_VecD: + tuple_type = Assembler::EVEX_T1S; + input_size = Assembler::EVEX_64bit; + break; case Op_VecX: - break; - case Op_VecY: - vec_len = 1; - break; + break; + case Op_VecY: + vec_len = 1; + break; case Op_VecZ: - vec_len = 2; - break; + vec_len = 2; + break; } - is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0); + is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); } int offset_size = 0; int size = 5; if (UseAVX > 2 ) { - if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) { + if (VM_Version::supports_avx512novl() && (vec_len == 2)) { offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); size += 2; // Need an additional two bytes for EVEX encoding - } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) { + } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); } else { offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); @@ -2711,7 +2746,21 @@ %} instruct absF_reg_reg(regF dst, regF src) %{ - predicate(UseAVX > 0); + predicate(VM_Version::supports_avxonly()); + match(Set dst (AbsF src)); + ins_cost(150); + format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vandps($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(float_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} + +#ifdef _LP64 +instruct absF_reg_reg_evex(regF dst, regF src) %{ + predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (AbsF src)); ins_cost(150); format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} @@ -2723,6 +2772,34 @@ ins_pipe(pipe_slow); %} +instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ + predicate(VM_Version::supports_avx512novl()); + match(Set dst (AbsF src1)); + effect(TEMP src2); + ins_cost(150); + format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, + ExternalAddress(float_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} +#else // _LP64 +instruct absF_reg_reg_evex(regF dst, regF src) %{ + predicate(UseAVX > 2); + match(Set dst (AbsF src)); + ins_cost(150); + format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vandps($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(float_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} +#endif + instruct absD_reg(regD dst) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (AbsD dst)); @@ -2736,7 +2813,22 @@ %} instruct absD_reg_reg(regD dst, regD src) %{ - predicate(UseAVX > 0); + predicate(VM_Version::supports_avxonly()); + match(Set dst (AbsD src)); + ins_cost(150); + format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" + "# abs double by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vandpd($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(double_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} + +#ifdef _LP64 +instruct absD_reg_reg_evex(regD dst, regD src) %{ + predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (AbsD src)); ins_cost(150); format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" @@ -2749,6 +2841,35 @@ ins_pipe(pipe_slow); %} +instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ + predicate(VM_Version::supports_avx512novl()); + match(Set dst (AbsD src1)); + effect(TEMP src2); + ins_cost(150); + format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, + ExternalAddress(double_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} +#else // _LP64 +instruct absD_reg_reg_evex(regD dst, regD src) %{ + predicate(UseAVX > 2); + match(Set dst (AbsD src)); + ins_cost(150); + format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" + "# abs double by sign masking" %} + ins_encode %{ + int vector_len = 0; + __ vandpd($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(double_signmask()), vector_len); + %} + ins_pipe(pipe_slow); +%} +#endif + instruct negF_reg(regF dst) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (NegF dst)); @@ -4554,7 +4675,7 @@ %} instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ - predicate(UseAVX > 0 && UseAVX < 3); + predicate(VM_Version::supports_avxonly()); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" @@ -4594,37 +4715,37 @@ instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 2 && UseAVX == 0); match(Set dst (AddReductionVI src1 src2)); - effect(TEMP tmp2, TEMP tmp); - format %{ "movdqu $tmp2,$src2\n\t" - "phaddd $tmp2,$tmp2\n\t" - "phaddd $tmp2,$tmp2\n\t" - "movd $tmp,$src1\n\t" - "paddd $tmp,$tmp2\n\t" - "movd $dst,$tmp\t! add reduction4I" %} - ins_encode %{ - __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); - __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); - __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); - __ movdl($tmp$$XMMRegister, $src1$$Register); - __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdl($dst$$Register, $tmp$$XMMRegister); + effect(TEMP tmp, TEMP tmp2); + format %{ "movdqu $tmp,$src2\n\t" + "phaddd $tmp,$tmp\n\t" + "phaddd $tmp,$tmp\n\t" + "movd $tmp2,$src1\n\t" + "paddd $tmp2,$tmp\n\t" + "movd $dst,$tmp2\t! add reduction4I" %} + ins_encode %{ + __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); + __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); + __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); + __ movdl($tmp2$$XMMRegister, $src1$$Register); + __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ - predicate(UseAVX > 0 && UseAVX < 3); + predicate(VM_Version::supports_avxonly()); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" - "vphaddd $tmp,$tmp,$tmp2\n\t" + "vphaddd $tmp,$tmp,$tmp\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction4I" %} ins_encode %{ int vector_len = 0; __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); - __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); + __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); @@ -4657,7 +4778,7 @@ %} instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ - predicate(UseAVX > 0 && UseAVX < 3); + predicate(VM_Version::supports_avxonly()); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" @@ -4712,7 +4833,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vextracti64x4 $tmp3,$src2\n\t" + format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpaddd $tmp3,$tmp3,$src2\n\t" "vextracti128 $tmp,$tmp3\n\t" "vpaddd $tmp,$tmp,$tmp3\n\t" @@ -4724,7 +4845,7 @@ "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ - __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); + __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); @@ -4763,7 +4884,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" + format %{ "vextracti128 $tmp,$src2\n\t" "vpaddq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" @@ -4771,7 +4892,7 @@ "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction4L" %} ins_encode %{ - __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -4786,7 +4907,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti64x4 $tmp2,$src2\n\t" + format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpaddq $tmp2,$tmp2,$src2\n\t" "vextracti128 $tmp,$tmp2\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" @@ -4796,7 +4917,7 @@ "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction8L" %} ins_encode %{ - __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -4810,290 +4931,280 @@ %} #endif -instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ +instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "movdqu $tmp,$src1\n\t" - "addss $tmp,$src2\n\t" - "pshufd $tmp2,$src2,0x01\n\t" - "addss $tmp,$tmp2\n\t" - "movdqu $dst,$tmp\t! add reduction2F" %} - ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ addss($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); - __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ + match(Set dst (AddReductionVF dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "addss $dst,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "addss $dst,$tmp\t! add reduction2F" %} + ins_encode %{ + __ addss($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ addss($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseAVX > 0); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp2, TEMP tmp); - format %{ "vaddss $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVF dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} - ins_encode %{ - __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vaddss $dst,$dst,$tmp\t! add reduction2F" %} + ins_encode %{ + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "movdqu $tmp,$src1\n\t" - "addss $tmp,$src2\n\t" - "pshufd $tmp2,$src2,0x01\n\t" - "addss $tmp,$tmp2\n\t" - "pshufd $tmp2,$src2,0x02\n\t" - "addss $tmp,$tmp2\n\t" - "pshufd $tmp2,$src2,0x03\n\t" - "addss $tmp,$tmp2\n\t" - "movdqu $dst,$tmp\t! add reduction4F" %} - ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ addss($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); - __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); - __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); - __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ + match(Set dst (AddReductionVF dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "addss $dst,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "addss $dst,$tmp\n\t" + "pshufd $tmp,$src2,0x02\n\t" + "addss $dst,$tmp\n\t" + "pshufd $tmp,$src2,0x03\n\t" + "addss $dst,$tmp\t! add reduction4F" %} + ins_encode %{ + __ addss($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ addss($dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); + __ addss($dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); + __ addss($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseAVX > 0); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "vaddss $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVF dst src2)); + effect(TEMP tmp, TEMP dst); + format %{ "vaddss $dst,dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} - ins_encode %{ - __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vaddss $dst,$dst,$tmp\t! add reduction4F" %} + ins_encode %{ + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vaddss $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVF dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "vextractf128 $tmp3,$src2\n\t" - "vaddss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} - ins_encode %{ - __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vaddss $dst,$dst,$tmp\n\t" + "vextractf128 $tmp2,$src2\n\t" + "vaddss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vaddss $dst,$dst,$tmp\t! add reduction8F" %} + ins_encode %{ + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); - match(Set dst (AddReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vaddss $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVF dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" + "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x1\n\t" - "vaddss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x2\n\t" - "vaddss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x3\n\t" - "vaddss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vaddss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} - ins_encode %{ - __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vaddss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vaddss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vaddss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vaddss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vaddss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vaddss $dst,$dst,$tmp\t! add reduction16F" %} + ins_encode %{ + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (AddReductionVD src1 src2)); + match(Set dst (AddReductionVD dst src2)); effect(TEMP tmp, TEMP dst); - format %{ "movdqu $tmp,$src1\n\t" - "addsd $tmp,$src2\n\t" - "pshufd $dst,$src2,0xE\n\t" + format %{ "addsd $dst,$src2\n\t" + "pshufd $tmp,$src2,0xE\n\t" "addsd $dst,$tmp\t! add reduction2D" %} ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); + __ addsd($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ +instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseAVX > 0); - match(Set dst (AddReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "vaddsd $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVD dst src2)); + effect(TEMP tmp, TEMP dst); + format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} - ins_encode %{ - __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} + ins_encode %{ + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 0); - match(Set dst (AddReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vaddsd $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVD dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vaddsd $tmp2,$tmp2,$tmp\n\t" - "vextractf128 $tmp3,$src2\n\t" - "vaddsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} - ins_encode %{ - __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vaddsd $dst,$dst,$tmp\n\t" + "vextractf32x4h $tmp2,$src2, 0x1\n\t" + "vaddsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} + ins_encode %{ + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 2); - match(Set dst (AddReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vaddsd $tmp2,$src1,$src2\n\t" + match(Set dst (AddReductionVD dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vaddsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x1\n\t" - "vaddsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vaddsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x2\n\t" - "vaddsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vaddsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x3\n\t" - "vaddsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} - ins_encode %{ - __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vaddsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vaddsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vaddsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vaddsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vaddsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vaddsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} + ins_encode %{ + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); - __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -5216,7 +5327,7 @@ predicate(UseAVX > 2); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vextracti64x4 $tmp3,$src2\n\t" + format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpmulld $tmp3,$tmp3,$src2\n\t" "vextracti128 $tmp,$tmp3\n\t" "vpmulld $tmp,$tmp,$src2\n\t" @@ -5228,7 +5339,7 @@ "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ - __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); + __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); @@ -5267,7 +5378,7 @@ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" + format %{ "vextracti128 $tmp,$src2\n\t" "vpmullq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" @@ -5275,7 +5386,7 @@ "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction4L" %} ins_encode %{ - __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5290,7 +5401,7 @@ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti64x4 $tmp2,$src2\n\t" + format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpmullq $tmp2,$tmp2,$src2\n\t" "vextracti128 $tmp,$tmp2\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" @@ -5300,7 +5411,7 @@ "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction8L" %} ins_encode %{ - __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5314,290 +5425,280 @@ %} #endif -instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ +instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "movdqu $tmp,$src1\n\t" - "mulss $tmp,$src2\n\t" - "pshufd $tmp2,$src2,0x01\n\t" - "mulss $tmp,$tmp2\n\t" - "movdqu $dst,$tmp\t! mul reduction2F" %} - ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); - __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ + match(Set dst (MulReductionVF dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "mulss $dst,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "mulss $dst,$tmp\t! mul reduction2F" %} + ins_encode %{ + __ mulss($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseAVX > 0); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "vmulss $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVF dst src2)); + effect(TEMP tmp, TEMP dst); + format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} - ins_encode %{ - __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} + ins_encode %{ + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "movdqu $tmp,$src1\n\t" - "mulss $tmp,$src2\n\t" - "pshufd $tmp2,$src2,0x01\n\t" - "mulss $tmp,$tmp2\n\t" - "pshufd $tmp2,$src2,0x02\n\t" - "mulss $tmp,$tmp2\n\t" - "pshufd $tmp2,$src2,0x03\n\t" - "mulss $tmp,$tmp2\n\t" - "movdqu $dst,$tmp\t! mul reduction4F" %} - ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); - __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); - __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); - __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ + match(Set dst (MulReductionVF dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "mulss $dst,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "mulss $dst,$tmp\n\t" + "pshufd $tmp,$src2,0x02\n\t" + "mulss $dst,$tmp\n\t" + "pshufd $tmp,$src2,0x03\n\t" + "mulss $dst,$tmp\t! mul reduction4F" %} + ins_encode %{ + __ mulss($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); + __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); + __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseAVX > 0); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "vmulss $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVF dst src2)); + effect(TEMP tmp, TEMP dst); + format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} - ins_encode %{ - __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} + ins_encode %{ + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vmulss $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVF dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "vextractf128 $tmp3,$src2\n\t" - "vmulss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} - ins_encode %{ - __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vmulss $dst,$dst,$tmp\n\t" + "vextractf128 $tmp2,$src2\n\t" + "vmulss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} + ins_encode %{ + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2); - match(Set dst (MulReductionVF src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vmulss $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVF dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" + "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "vextractf32x4 $tmp3,$src2, 0x1\n\t" - "vmulss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "vextractf32x4 $tmp3,$src2, 0x2\n\t" - "vmulss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "vextractf32x4 $tmp3,$src2, 0x3\n\t" - "vmulss $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0x01\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x02\n\t" - "vmulss $tmp2,$tmp2,$tmp\n\t" - "pshufd $tmp,$tmp3,0x03\n\t" - "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} - ins_encode %{ - __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vmulss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vmulss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vmulss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vmulss $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0x01\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x02\n\t" + "vmulss $dst,$dst,$tmp\n\t" + "pshufd $tmp,$tmp2,0x03\n\t" + "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} + ins_encode %{ + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); - __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); - __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); - match(Set dst (MulReductionVD src1 src2)); - effect(TEMP tmp, TEMP dst); - format %{ "movdqu $tmp,$src1\n\t" - "mulsd $tmp,$src2\n\t" - "pshufd $dst,$src2,0xE\n\t" + match(Set dst (MulReductionVD dst src2)); + effect(TEMP dst, TEMP tmp); + format %{ "mulsd $dst,$src2\n\t" + "pshufd $tmp,$src2,0xE\n\t" "mulsd $dst,$tmp\t! mul reduction2D" %} ins_encode %{ - __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); - __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); - __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); + __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ +instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseAVX > 0); - match(Set dst (MulReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2); - format %{ "vmulsd $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVD dst src2)); + effect(TEMP tmp, TEMP dst); + format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} - ins_encode %{ - __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} + ins_encode %{ + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 0); - match(Set dst (MulReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vmulsd $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVD dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vmulsd $tmp2,$tmp2,$tmp\n\t" - "vextractf128 $tmp3,$src2\n\t" - "vmulsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} - ins_encode %{ - __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vmulsd $dst,$dst,$tmp\n\t" + "vextractf128 $tmp2,$src2\n\t" + "vmulsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} + ins_encode %{ + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 2); - match(Set dst (MulReductionVD src1 src2)); - effect(TEMP tmp, TEMP tmp2, TEMP tmp3); - format %{ "vmulsd $tmp2,$src1,$src2\n\t" + match(Set dst (MulReductionVD dst src2)); + effect(TEMP tmp, TEMP dst, TEMP tmp2); + format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vmulsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x1\n\t" - "vmulsd $tmp2,$tmp2,$tmp3\n\t" + "vmulsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$src2,0xE\n\t" - "vmulsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x2\n\t" - "vmulsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vmulsd $tmp2,$tmp2,$tmp\n\t" - "vextractf64x2 $tmp3,$src2, 0x3\n\t" - "vmulsd $tmp2,$tmp2,$tmp3\n\t" - "pshufd $tmp,$tmp3,0xE\n\t" - "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} - ins_encode %{ - __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + "vmulsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vmulsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vmulsd $dst,$dst,$tmp\n\t" + "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vmulsd $dst,$dst,$tmp2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} + ins_encode %{ + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); - __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); - __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); - __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); - __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -5608,7 +5709,7 @@ // Bytes vector add instruct vadd4B(vecS dst, vecS src) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (AddVB dst src)); format %{ "paddb $dst,$src\t! add packed4B" %} ins_encode %{ @@ -5617,8 +5718,19 @@ ins_pipe( pipe_slow ); %} -instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ins_encode %{ @@ -5628,8 +5740,20 @@ ins_pipe( pipe_slow ); %} -instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (AddVB dst src2)); + effect(TEMP src1); + format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} ins_encode %{ @@ -5639,8 +5763,31 @@ ins_pipe( pipe_slow ); %} +instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (AddVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd8B(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (AddVB dst src)); format %{ "paddb $dst,$src\t! add packed8B" %} ins_encode %{ @@ -5649,8 +5796,19 @@ ins_pipe( pipe_slow ); %} -instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} ins_encode %{ @@ -5660,8 +5818,20 @@ ins_pipe( pipe_slow ); %} -instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (AddVB dst src2)); + effect(TEMP src1); + format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} ins_encode %{ @@ -5671,8 +5841,31 @@ ins_pipe( pipe_slow ); %} +instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (AddVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd16B(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 16); + predicate(UseAVX == 0 && n->as_Vector()->length() == 16); match(Set dst (AddVB dst src)); format %{ "paddb $dst,$src\t! add packed16B" %} ins_encode %{ @@ -5681,8 +5874,19 @@ ins_pipe( pipe_slow ); %} -instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 16); +instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} ins_encode %{ @@ -5692,8 +5896,31 @@ ins_pipe( pipe_slow ); %} -instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 16); +instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (AddVB dst src2)); + effect(TEMP src1); + format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ @@ -5703,8 +5930,31 @@ ins_pipe( pipe_slow ); %} -instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 32); +instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (AddVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} ins_encode %{ @@ -5714,8 +5964,20 @@ ins_pipe( pipe_slow ); %} -instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 32); +instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); + match(Set dst (AddVB dst src2)); + effect(TEMP src1); + format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ins_encode %{ @@ -5725,8 +5987,31 @@ ins_pipe( pipe_slow ); %} +instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (AddVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 64); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} ins_encode %{ @@ -5737,7 +6022,7 @@ %} instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 64); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} ins_encode %{ @@ -5749,7 +6034,7 @@ // Shorts/Chars vector add instruct vadd2S(vecS dst, vecS src) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (AddVS dst src)); format %{ "paddw $dst,$src\t! add packed2S" %} ins_encode %{ @@ -5758,8 +6043,19 @@ ins_pipe( pipe_slow ); %} -instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} ins_encode %{ @@ -5769,8 +6065,20 @@ ins_pipe( pipe_slow ); %} -instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (AddVS dst src2)); + effect(TEMP src1); + format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} ins_encode %{ @@ -5780,8 +6088,31 @@ ins_pipe( pipe_slow ); %} +instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (AddVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd4S(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (AddVS dst src)); format %{ "paddw $dst,$src\t! add packed4S" %} ins_encode %{ @@ -5790,8 +6121,19 @@ ins_pipe( pipe_slow ); %} -instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} ins_encode %{ @@ -5801,8 +6143,20 @@ ins_pipe( pipe_slow ); %} -instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (AddVS dst src2)); + effect(TEMP src1); + format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} ins_encode %{ @@ -5812,8 +6166,31 @@ ins_pipe( pipe_slow ); %} +instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (AddVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd8S(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (AddVS dst src)); format %{ "paddw $dst,$src\t! add packed8S" %} ins_encode %{ @@ -5822,8 +6199,19 @@ ins_pipe( pipe_slow ); %} -instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} ins_encode %{ @@ -5833,8 +6221,31 @@ ins_pipe( pipe_slow ); %} -instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (AddVS dst src2)); + effect(TEMP src1); + format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ @@ -5844,8 +6255,31 @@ ins_pipe( pipe_slow ); %} -instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (AddVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} ins_encode %{ @@ -5855,8 +6289,20 @@ ins_pipe( pipe_slow ); %} -instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (AddVS dst src2)); + effect(TEMP src1); + format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} ins_encode %{ @@ -5866,8 +6312,31 @@ ins_pipe( pipe_slow ); %} +instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (AddVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} ins_encode %{ @@ -5878,7 +6347,7 @@ %} instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} ins_encode %{ @@ -6264,7 +6733,7 @@ // Bytes vector sub instruct vsub4B(vecS dst, vecS src) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (SubVB dst src)); format %{ "psubb $dst,$src\t! sub packed4B" %} ins_encode %{ @@ -6273,8 +6742,19 @@ ins_pipe( pipe_slow ); %} -instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} ins_encode %{ @@ -6284,8 +6764,20 @@ ins_pipe( pipe_slow ); %} -instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (SubVB dst src2)); + effect(TEMP src1); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} ins_encode %{ @@ -6295,8 +6787,31 @@ ins_pipe( pipe_slow ); %} +instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (SubVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub8B(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (SubVB dst src)); format %{ "psubb $dst,$src\t! sub packed8B" %} ins_encode %{ @@ -6305,8 +6820,19 @@ ins_pipe( pipe_slow ); %} -instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} ins_encode %{ @@ -6316,8 +6842,20 @@ ins_pipe( pipe_slow ); %} -instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (SubVB dst src2)); + effect(TEMP src1); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} ins_encode %{ @@ -6327,8 +6865,31 @@ ins_pipe( pipe_slow ); %} +instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (SubVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub16B(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 16); + predicate(UseAVX == 0 && n->as_Vector()->length() == 16); match(Set dst (SubVB dst src)); format %{ "psubb $dst,$src\t! sub packed16B" %} ins_encode %{ @@ -6337,8 +6898,19 @@ ins_pipe( pipe_slow ); %} -instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 16); +instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} ins_encode %{ @@ -6348,8 +6920,31 @@ ins_pipe( pipe_slow ); %} -instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 16); +instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (SubVB dst src2)); + effect(TEMP src1); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} ins_encode %{ @@ -6359,8 +6954,31 @@ ins_pipe( pipe_slow ); %} -instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 32); +instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (SubVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} ins_encode %{ @@ -6370,8 +6988,20 @@ ins_pipe( pipe_slow ); %} -instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 32); +instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); + match(Set dst (SubVB dst src2)); + effect(TEMP src1); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} ins_encode %{ @@ -6381,8 +7011,31 @@ ins_pipe( pipe_slow ); %} +instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); + match(Set dst (SubVB dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 64); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} ins_encode %{ @@ -6393,7 +7046,7 @@ %} instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 64); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} ins_encode %{ @@ -6405,7 +7058,7 @@ // Shorts/Chars vector sub instruct vsub2S(vecS dst, vecS src) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (SubVS dst src)); format %{ "psubw $dst,$src\t! sub packed2S" %} ins_encode %{ @@ -6414,8 +7067,19 @@ ins_pipe( pipe_slow ); %} -instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} ins_encode %{ @@ -6425,8 +7089,20 @@ ins_pipe( pipe_slow ); %} -instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (SubVS dst src2)); + effect(TEMP src1); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} ins_encode %{ @@ -6436,8 +7112,31 @@ ins_pipe( pipe_slow ); %} +instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (SubVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub4S(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (SubVS dst src)); format %{ "psubw $dst,$src\t! sub packed4S" %} ins_encode %{ @@ -6446,8 +7145,19 @@ ins_pipe( pipe_slow ); %} -instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ @@ -6457,8 +7167,20 @@ ins_pipe( pipe_slow ); %} -instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (SubVS dst src2)); + effect(TEMP src1); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} ins_encode %{ @@ -6468,8 +7190,31 @@ ins_pipe( pipe_slow ); %} +instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (SubVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub8S(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS dst src)); format %{ "psubw $dst,$src\t! sub packed8S" %} ins_encode %{ @@ -6478,8 +7223,19 @@ ins_pipe( pipe_slow ); %} -instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ @@ -6489,8 +7245,31 @@ ins_pipe( pipe_slow ); %} -instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (SubVS dst src2)); + effect(TEMP src1); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ @@ -6500,8 +7279,31 @@ ins_pipe( pipe_slow ); %} -instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (SubVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ @@ -6511,8 +7313,20 @@ ins_pipe( pipe_slow ); %} -instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (SubVS dst src2)); + effect(TEMP src1); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} ins_encode %{ @@ -6522,8 +7336,31 @@ ins_pipe( pipe_slow ); %} +instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (SubVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} ins_encode %{ @@ -6534,7 +7371,7 @@ %} instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} ins_encode %{ @@ -6920,7 +7757,7 @@ // Shorts/Chars vector mul instruct vmul2S(vecS dst, vecS src) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (MulVS dst src)); format %{ "pmullw $dst,$src\t! mul packed2S" %} ins_encode %{ @@ -6929,8 +7766,19 @@ ins_pipe( pipe_slow ); %} -instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} ins_encode %{ @@ -6940,8 +7788,20 @@ ins_pipe( pipe_slow ); %} -instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (MulVS dst src2)); + effect(TEMP src1); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} ins_encode %{ @@ -6951,8 +7811,31 @@ ins_pipe( pipe_slow ); %} +instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (MulVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vmul4S(vecD dst, vecD src) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (MulVS dst src)); format %{ "pmullw $dst,$src\t! mul packed4S" %} ins_encode %{ @@ -6961,8 +7844,19 @@ ins_pipe( pipe_slow ); %} -instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} ins_encode %{ @@ -6972,8 +7866,20 @@ ins_pipe( pipe_slow ); %} -instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (MulVS dst src2)); + effect(TEMP src1); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} ins_encode %{ @@ -6983,8 +7889,31 @@ ins_pipe( pipe_slow ); %} +instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (MulVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vmul8S(vecX dst, vecX src) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS dst src)); format %{ "pmullw $dst,$src\t! mul packed8S" %} ins_encode %{ @@ -6993,8 +7922,19 @@ ins_pipe( pipe_slow ); %} -instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ @@ -7004,8 +7944,31 @@ ins_pipe( pipe_slow ); %} -instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (MulVS dst src2)); + effect(TEMP src1); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ @@ -7015,8 +7978,31 @@ ins_pipe( pipe_slow ); %} -instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (MulVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ @@ -7026,8 +8012,20 @@ ins_pipe( pipe_slow ); %} -instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (MulVS dst src2)); + effect(TEMP src1); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} ins_encode %{ @@ -7037,8 +8035,31 @@ ins_pipe( pipe_slow ); %} +instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (MulVS dst (LoadVector mem))); + effect(TEMP src); + format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} ins_encode %{ @@ -7049,7 +8070,7 @@ %} instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} ins_encode %{ @@ -7711,7 +8732,7 @@ // Shorts/Chars vector left shift instruct vsll2S(vecS dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS dst shift)); format %{ "psllw $dst,$shift\t! left shift packed2S" %} ins_encode %{ @@ -7721,7 +8742,7 @@ %} instruct vsll2S_imm(vecS dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS dst shift)); format %{ "psllw $dst,$shift\t! left shift packed2S" %} ins_encode %{ @@ -7730,8 +8751,19 @@ ins_pipe( pipe_slow ); %} -instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ @@ -7741,8 +8773,20 @@ ins_pipe( pipe_slow ); %} -instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ @@ -7752,8 +8796,31 @@ ins_pipe( pipe_slow ); %} +instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsll4S(vecD dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS dst shift)); format %{ "psllw $dst,$shift\t! left shift packed4S" %} ins_encode %{ @@ -7763,7 +8830,7 @@ %} instruct vsll4S_imm(vecD dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS dst shift)); format %{ "psllw $dst,$shift\t! left shift packed4S" %} ins_encode %{ @@ -7772,8 +8839,19 @@ ins_pipe( pipe_slow ); %} -instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ @@ -7783,8 +8861,20 @@ ins_pipe( pipe_slow ); %} -instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ @@ -7794,8 +8884,31 @@ ins_pipe( pipe_slow ); %} +instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsll8S(vecX dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS dst shift)); format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ @@ -7805,7 +8918,7 @@ %} instruct vsll8S_imm(vecX dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS dst shift)); format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ @@ -7814,8 +8927,19 @@ ins_pipe( pipe_slow ); %} -instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ @@ -7825,8 +8949,31 @@ ins_pipe( pipe_slow ); %} -instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ @@ -7836,8 +8983,31 @@ ins_pipe( pipe_slow ); %} -instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ @@ -7847,8 +9017,20 @@ ins_pipe( pipe_slow ); %} -instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ @@ -7858,8 +9040,31 @@ ins_pipe( pipe_slow ); %} +instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ @@ -7870,7 +9075,7 @@ %} instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ @@ -8104,7 +9309,7 @@ // unsigned values. instruct vsrl2S(vecS dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS dst shift)); format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} ins_encode %{ @@ -8114,7 +9319,7 @@ %} instruct vsrl2S_imm(vecS dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS dst shift)); format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} ins_encode %{ @@ -8123,8 +9328,19 @@ ins_pipe( pipe_slow ); %} -instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ins_encode %{ @@ -8134,8 +9350,20 @@ ins_pipe( pipe_slow ); %} -instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ins_encode %{ @@ -8145,8 +9373,31 @@ ins_pipe( pipe_slow ); %} +instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsrl4S(vecD dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS dst shift)); format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} ins_encode %{ @@ -8156,7 +9407,7 @@ %} instruct vsrl4S_imm(vecD dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS dst shift)); format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} ins_encode %{ @@ -8165,8 +9416,19 @@ ins_pipe( pipe_slow ); %} -instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ @@ -8176,8 +9438,20 @@ ins_pipe( pipe_slow ); %} -instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ @@ -8187,8 +9461,31 @@ ins_pipe( pipe_slow ); %} +instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsrl8S(vecX dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS dst shift)); format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ins_encode %{ @@ -8198,7 +9495,7 @@ %} instruct vsrl8S_imm(vecX dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS dst shift)); format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ins_encode %{ @@ -8207,8 +9504,19 @@ ins_pipe( pipe_slow ); %} -instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ @@ -8218,8 +9526,31 @@ ins_pipe( pipe_slow ); %} -instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ @@ -8229,8 +9560,31 @@ ins_pipe( pipe_slow ); %} -instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ @@ -8240,8 +9594,20 @@ ins_pipe( pipe_slow ); %} -instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ @@ -8251,8 +9617,31 @@ ins_pipe( pipe_slow ); %} +instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (URShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ins_encode %{ @@ -8263,7 +9652,7 @@ %} instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ins_encode %{ @@ -8493,7 +9882,7 @@ // Shorts/Chars vector arithmetic right shift instruct vsra2S(vecS dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ @@ -8512,8 +9901,19 @@ ins_pipe( pipe_slow ); %} -instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ @@ -8523,8 +9923,20 @@ ins_pipe( pipe_slow ); %} -instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ @@ -8534,8 +9946,31 @@ ins_pipe( pipe_slow ); %} +instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsra4S(vecD dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ @@ -8545,7 +9980,7 @@ %} instruct vsra4S_imm(vecD dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 4); + predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ @@ -8554,8 +9989,19 @@ ins_pipe( pipe_slow ); %} -instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ @@ -8565,8 +10011,20 @@ ins_pipe( pipe_slow ); %} -instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ @@ -8576,8 +10034,31 @@ ins_pipe( pipe_slow ); %} +instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsra8S(vecX dst, vecS shift) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ @@ -8587,7 +10068,7 @@ %} instruct vsra8S_imm(vecX dst, immI8 shift) %{ - predicate(n->as_Vector()->length() == 8); + predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ @@ -8596,8 +10077,19 @@ ins_pipe( pipe_slow ); %} -instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ @@ -8607,8 +10099,31 @@ ins_pipe( pipe_slow ); %} -instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); +instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ @@ -8618,8 +10133,31 @@ ins_pipe( pipe_slow ); %} -instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + int vector_len = 0; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ @@ -8629,8 +10167,20 @@ ins_pipe( pipe_slow ); %} -instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); +instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ @@ -8640,8 +10190,31 @@ ins_pipe( pipe_slow ); %} +instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ + predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS dst shift)); + effect(TEMP src); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ins_encode %{ @@ -8652,7 +10225,7 @@ %} instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ins_encode %{ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/x86_32.ad --- a/hotspot/src/cpu/x86/vm/x86_32.ad Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/x86_32.ad Tue Nov 24 10:30:23 2015 +0100 @@ -291,9 +291,7 @@ size += 6; // fldcw } if (C->max_vector_size() > 16) { - if(UseAVX <= 2) { - size += 3; // vzeroupper - } + size += 3; // vzeroupper } return size; } @@ -1915,7 +1913,7 @@ if (stub == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return; - } + } } %} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/x86/vm/x86_64.ad --- a/hotspot/src/cpu/x86/vm/x86_64.ad Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/x86/vm/x86_64.ad Tue Nov 24 10:30:23 2015 +0100 @@ -536,11 +536,7 @@ #define __ _masm. static int clear_avx_size() { - if(UseAVX > 2) { - return 0; // vzeroupper is ignored - } else { - return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper - } + return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper } // !!!!! Special hack to get all types of calls to specify the byte offset @@ -871,7 +867,7 @@ if (framesize > 0) { st->print("\n\t"); st->print("addq rbp, #%d", framesize); - } + } } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp --- a/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -497,12 +497,15 @@ // 1: getfield // 2: index // 3: index - // 4: ireturn/areturn + // 4: ireturn/areturn/freturn/lreturn/dreturn // NB this is not raw bytecode: index is in machine order u1 *code = method->code_base(); assert(code[0] == Bytecodes::_aload_0 && code[1] == Bytecodes::_getfield && (code[4] == Bytecodes::_ireturn || + code[4] == Bytecodes::_freturn || + code[4] == Bytecodes::_lreturn || + code[4] == Bytecodes::_dreturn || code[4] == Bytecodes::_areturn), "should do"); u2 index = Bytes::get_native_u2(&code[2]); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/CompilerToVM.java --- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/CompilerToVM.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/CompilerToVM.java Tue Nov 24 10:30:23 2015 +0100 @@ -32,6 +32,7 @@ import jdk.vm.ci.code.InstalledCode; import jdk.vm.ci.code.InvalidInstalledCodeException; import jdk.vm.ci.code.TargetDescription; +import jdk.vm.ci.common.JVMCIError; import jdk.vm.ci.hotspotvmconfig.HotSpotVMField; import jdk.vm.ci.inittimer.InitTimer; import jdk.vm.ci.meta.JavaType; @@ -308,6 +309,8 @@ * {@link HotSpotVMConfig#codeInstallResultCodeTooLarge}, * {@link HotSpotVMConfig#codeInstallResultDependenciesFailed} or * {@link HotSpotVMConfig#codeInstallResultDependenciesInvalid}. + * @throws JVMCIError if there is something wrong with the compiled code or the associated + * metadata. */ native int installCode(TargetDescription target, HotSpotCompiledCode compiledCode, InstalledCode code, HotSpotSpeculationLog speculationLog); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java --- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java Tue Nov 24 10:30:23 2015 +0100 @@ -1680,6 +1680,7 @@ @HotSpotVMField(name = "Deoptimization::UnrollBlock::_caller_adjustment", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockCallerAdjustmentOffset; @HotSpotVMField(name = "Deoptimization::UnrollBlock::_number_of_frames", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockNumberOfFramesOffset; @HotSpotVMField(name = "Deoptimization::UnrollBlock::_total_frame_sizes", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockTotalFrameSizesOffset; + @HotSpotVMField(name = "Deoptimization::UnrollBlock::_unpack_kind", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockUnpackKindOffset; @HotSpotVMField(name = "Deoptimization::UnrollBlock::_frame_sizes", type = "intptr_t*", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockFrameSizesOffset; @HotSpotVMField(name = "Deoptimization::UnrollBlock::_frame_pcs", type = "address*", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockFramePcsOffset; @HotSpotVMField(name = "Deoptimization::UnrollBlock::_initial_info", type = "intptr_t", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockInitialInfoOffset; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp --- a/hotspot/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -66,12 +66,12 @@ features = generic_v9_m; if (detect_niagara()) { - NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Detected Linux on Niagara");) + if (PrintMiscellaneous && Verbose) { tty->print_cr("Detected Linux on Niagara"); } features = niagara1_m | T_family_m; } if (detect_M_family()) { - NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Detected Linux on M family");) + if (PrintMiscellaneous && Verbose) { tty->print_cr("Detected Linux on M family"); } features = sun4v_m | generic_v9_m | M_family_m | T_family_m; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/c1/c1_GraphBuilder.cpp --- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -707,12 +707,10 @@ BlockBegin* block = bci2block()->at(bci); if (block != NULL && block == parent()->bci2block()->at(bci)) { BlockBegin* new_block = new BlockBegin(block->bci()); -#ifndef PRODUCT if (PrintInitialBlockList) { tty->print_cr("CFG: cloned block %d (bci %d) as block %d for jsr", block->block_id(), block->bci(), new_block->block_id()); } -#endif // copy data from cloned blocked new_block->set_depth_first_number(block->depth_first_number()); if (block->is_set(BlockBegin::parser_loop_header_flag)) new_block->set(BlockBegin::parser_loop_header_flag); @@ -1438,7 +1436,9 @@ bool need_mem_bar = false; if (method()->name() == ciSymbol::object_initializer_name() && - (scope()->wrote_final() || (AlwaysSafeConstructors && scope()->wrote_fields()))) { + (scope()->wrote_final() || (AlwaysSafeConstructors && scope()->wrote_fields()) + || (support_IRIW_for_not_multiple_copy_atomic_cpu && scope()->wrote_volatile()) + )){ need_mem_bar = true; } @@ -1554,6 +1554,9 @@ if (code == Bytecodes::_putfield) { scope()->set_wrote_fields(); + if (field->is_volatile()) { + scope()->set_wrote_volatile(); + } } const int offset = !needs_patching ? field->offset() : -1; @@ -3785,12 +3788,10 @@ cont = new BlockBegin(next_bci()); // low number so that continuation gets parsed as early as possible cont->set_depth_first_number(0); -#ifndef PRODUCT if (PrintInitialBlockList) { tty->print_cr("CFG: created block %d (bci %d) as continuation for inline at bci %d", cont->block_id(), cont->bci(), bci()); } -#endif continuation_existed = false; } // Record number of predecessors of continuation block before diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/c1/c1_IR.cpp --- a/hotspot/src/share/vm/c1/c1_IR.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/c1/c1_IR.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -143,6 +143,7 @@ _monitor_pairing_ok = method->has_balanced_monitors(); _wrote_final = false; _wrote_fields = false; + _wrote_volatile = false; _start = NULL; if (osr_bci == -1) { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/c1/c1_IR.hpp --- a/hotspot/src/share/vm/c1/c1_IR.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/c1/c1_IR.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -151,6 +151,7 @@ bool _monitor_pairing_ok; // the monitor pairing info bool _wrote_final; // has written final field bool _wrote_fields; // has written fields + bool _wrote_volatile; // has written volatile field BlockBegin* _start; // the start block, successsors are method entries BitMap _requires_phi_function; // bit is set if phi functions at loop headers are necessary for a local variable @@ -187,7 +188,8 @@ bool wrote_final () const { return _wrote_final; } void set_wrote_fields() { _wrote_fields = true; } bool wrote_fields () const { return _wrote_fields; } - + void set_wrote_volatile() { _wrote_volatile = true; } + bool wrote_volatile () const { return _wrote_volatile; } }; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/c1/c1_LIR.cpp --- a/hotspot/src/share/vm/c1/c1_LIR.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/c1/c1_LIR.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -2004,7 +2004,7 @@ // LIR_Op2 void LIR_Op2::print_instr(outputStream* out) const { - if (code() == lir_cmove) { + if (code() == lir_cmove || code() == lir_cmp) { print_condition(out, condition()); out->print(" "); } in_opr1()->print(out); out->print(" "); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/c1/c1_LIRGenerator.cpp --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1761,7 +1761,7 @@ post_barrier(object.result(), value.result()); } - if (is_volatile && os::is_MP()) { + if (!support_IRIW_for_not_multiple_copy_atomic_cpu && is_volatile && os::is_MP()) { __ membar(); } } @@ -1822,6 +1822,10 @@ address = generate_address(object.result(), x->offset(), field_type); } + if (support_IRIW_for_not_multiple_copy_atomic_cpu && is_volatile && os::is_MP()) { + __ membar(); + } + bool needs_atomic_access = is_volatile || AlwaysAtomicAccesses; if (needs_atomic_access && !needs_patching) { volatile_field_load(address, reg, info); @@ -2238,6 +2242,10 @@ LIR_Opr value = rlock_result(x, x->basic_type()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu && x->is_volatile() && os::is_MP()) { + __ membar(); + } + get_Object_unsafe(value, src.result(), off.result(), type, x->is_volatile()); #if INCLUDE_ALL_GCS @@ -2395,7 +2403,7 @@ if (x->is_volatile() && os::is_MP()) __ membar_release(); put_Object_unsafe(src.result(), off.result(), data.result(), type, x->is_volatile()); - if (x->is_volatile() && os::is_MP()) __ membar(); + if (!support_IRIW_for_not_multiple_copy_atomic_cpu && x->is_volatile() && os::is_MP()) __ membar(); } @@ -2794,7 +2802,7 @@ assert(obj->is_valid(), "must be valid"); if (method()->is_synchronized() && GenerateSynchronizationCode) { - LIR_Opr lock = new_register(T_INT); + LIR_Opr lock = syncLockOpr(); __ load_stack_address_monitor(0, lock); CodeEmitInfo* info = new CodeEmitInfo(scope()->start()->state()->copy(ValueStack::StateBefore, SynchronizationEntryBCI), NULL, x->check_flag(Instruction::DeoptimizeOnException)); @@ -3421,14 +3429,18 @@ __ add(result, LIR_OprFact::intConst(InvocationCounter::count_increment), result); __ store(result, counter); if (notify) { - LIR_Opr mask = load_immediate(frequency << InvocationCounter::count_shift, T_INT); - LIR_Opr meth = new_register(T_METADATA); - __ metadata2reg(method->constant_encoding(), meth); - __ logical_and(result, mask, result); - __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); + LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); // The bci for info can point to cmp for if's we want the if bci CodeStub* overflow = new CounterOverflowStub(info, bci, meth); - __ branch(lir_cond_equal, T_INT, overflow); + int freq = frequency << InvocationCounter::count_shift; + if (freq == 0) { + __ branch(lir_cond_always, T_ILLEGAL, overflow); + } else { + LIR_Opr mask = load_immediate(freq, T_INT); + __ logical_and(result, mask, result); + __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); + __ branch(lir_cond_equal, T_INT, overflow); + } __ branch_destination(overflow->continuation()); } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/c1/c1_LIRGenerator.hpp --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -495,6 +495,7 @@ static LIR_Opr divOutOpr(); static LIR_Opr remOutOpr(); static LIR_Opr shiftCountOpr(); + LIR_Opr syncLockOpr(); LIR_Opr syncTempOpr(); LIR_Opr atomicLockOpr(); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/c1/c1_LinearScan.cpp --- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -6233,9 +6233,19 @@ if (prev_branch->stub() == NULL) { LIR_Op2* prev_cmp = NULL; + // There might be a cmove inserted for profiling which depends on the same + // compare. If we change the condition of the respective compare, we have + // to take care of this cmove as well. + LIR_Op2* prev_cmove = NULL; for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) { prev_op = instructions->at(j); + // check for the cmove + if (prev_op->code() == lir_cmove) { + assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2"); + prev_cmove = (LIR_Op2*)prev_op; + assert(prev_branch->cond() == prev_cmove->condition(), "should be the same"); + } if (prev_op->code() == lir_cmp) { assert(prev_op->as_Op2() != NULL, "branch must be of type LIR_Op2"); prev_cmp = (LIR_Op2*)prev_op; @@ -6252,6 +6262,13 @@ prev_branch->negate_cond(); prev_cmp->set_condition(prev_branch->cond()); instructions->truncate(instructions->length() - 1); + // if we do change the condition, we have to change the cmove as well + if (prev_cmove != NULL) { + prev_cmove->set_condition(prev_branch->cond()); + LIR_Opr t = prev_cmove->in_opr1(); + prev_cmove->set_in_opr1(prev_cmove->in_opr2()); + prev_cmove->set_in_opr2(t); + } } } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/ci/ciMethod.cpp --- a/hotspot/src/share/vm/ci/ciMethod.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/ci/ciMethod.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1262,6 +1262,8 @@ bool ciMethod::is_vanilla_constructor() const { FETCH_FLAG_FROM_VM(is_vanilla_constructor); } bool ciMethod::has_loops () const { FETCH_FLAG_FROM_VM(has_loops); } bool ciMethod::has_jsrs () const { FETCH_FLAG_FROM_VM(has_jsrs); } +bool ciMethod::is_getter () const { FETCH_FLAG_FROM_VM(is_getter); } +bool ciMethod::is_setter () const { FETCH_FLAG_FROM_VM(is_setter); } bool ciMethod::is_accessor () const { FETCH_FLAG_FROM_VM(is_accessor); } bool ciMethod::is_initializer () const { FETCH_FLAG_FROM_VM(is_initializer); } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/ci/ciMethod.hpp --- a/hotspot/src/share/vm/ci/ciMethod.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/ci/ciMethod.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -311,6 +311,8 @@ bool is_final_method() const { return is_final() || holder()->is_final(); } bool has_loops () const; bool has_jsrs () const; + bool is_getter () const; + bool is_setter () const; bool is_accessor () const; bool is_initializer () const; bool can_be_statically_bound() const { return _can_be_statically_bound; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/ci/ciTypeFlow.cpp --- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1588,6 +1588,7 @@ _exceptions = NULL; _exc_klasses = NULL; _successors = NULL; + _predecessors = new (outer->arena()) GrowableArray(outer->arena(), 1, 0, NULL); _state = new (outer->arena()) StateVector(outer); JsrSet* new_jsrs = new (outer->arena()) JsrSet(outer->arena(), jsrs->size()); @@ -1771,6 +1772,12 @@ break; } } + + // Set predecessor information + for (int i = 0; i < _successors->length(); i++) { + Block* block = _successors->at(i); + block->predecessors()->append(this); + } } return _successors; } @@ -1813,7 +1820,9 @@ } else { klass = handler->catch_klass(); } - _exceptions->append(analyzer->block_at(bci, _jsrs)); + Block* block = analyzer->block_at(bci, _jsrs); + _exceptions->append(block); + block->predecessors()->append(this); _exc_klasses->append(klass); } } @@ -1909,6 +1918,18 @@ st->cr(); } } + if (_predecessors == NULL) { + st->print_cr(" No predecessor information"); + } else { + int num_predecessors = _predecessors->length(); + st->print_cr(" Predecessors : %d", num_predecessors); + for (int i = 0; i < num_predecessors; i++) { + Block* predecessor = _predecessors->at(i); + st->print(" "); + predecessor->print_value_on(st); + st->cr(); + } + } if (_exceptions == NULL) { st->print_cr(" No exception information"); } else { @@ -2270,6 +2291,9 @@ for (SuccIter iter(tail); !iter.done(); iter.next()) { if (iter.succ() == head) { iter.set_succ(clone); + // Update predecessor information + head->predecessors()->remove(tail); + clone->predecessors()->append(tail); } } flow_block(tail, temp_vector, temp_set); @@ -2279,6 +2303,9 @@ for (SuccIter iter(clone); !iter.done(); iter.next()) { if (iter.succ() == head) { iter.set_succ(clone); + // Update predecessor information + head->predecessors()->remove(clone); + clone->predecessors()->append(clone); break; } } @@ -2884,6 +2911,69 @@ } // ------------------------------------------------------------------ +// ciTypeFlow::is_dominated_by +// +// Determine if the instruction at bci is dominated by the instruction at dom_bci. +bool ciTypeFlow::is_dominated_by(int bci, int dom_bci) { + assert(!method()->has_jsrs(), "jsrs are not supported"); + + ResourceMark rm; + JsrSet* jsrs = new ciTypeFlow::JsrSet(NULL); + int index = _methodBlocks->block_containing(bci)->index(); + int dom_index = _methodBlocks->block_containing(dom_bci)->index(); + Block* block = get_block_for(index, jsrs, ciTypeFlow::no_create); + Block* dom_block = get_block_for(dom_index, jsrs, ciTypeFlow::no_create); + + // Start block dominates all other blocks + if (start_block()->rpo() == dom_block->rpo()) { + return true; + } + + // Dominated[i] is true if block i is dominated by dom_block + int num_blocks = _methodBlocks->num_blocks(); + bool* dominated = NEW_RESOURCE_ARRAY(bool, num_blocks); + for (int i = 0; i < num_blocks; ++i) { + dominated[i] = true; + } + dominated[start_block()->rpo()] = false; + + // Iterative dominator algorithm + bool changed = true; + while (changed) { + changed = false; + // Use reverse postorder iteration + for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) { + if (blk->is_start()) { + // Ignore start block + continue; + } + // The block is dominated if it is the dominating block + // itself or if all predecessors are dominated. + int index = blk->rpo(); + bool dom = (index == dom_block->rpo()); + if (!dom) { + // Check if all predecessors are dominated + dom = true; + for (int i = 0; i < blk->predecessors()->length(); ++i) { + Block* pred = blk->predecessors()->at(i); + if (!dominated[pred->rpo()]) { + dom = false; + break; + } + } + } + // Update dominator information + if (dominated[index] != dom) { + changed = true; + dominated[index] = dom; + } + } + } + // block dominated by dom_block? + return dominated[block->rpo()]; +} + +// ------------------------------------------------------------------ // ciTypeFlow::record_failure() // The ciTypeFlow object keeps track of failure reasons separately from the ciEnv. // This is required because there is not a 1-1 relation between the ciEnv and diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/ci/ciTypeFlow.hpp --- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -529,6 +529,7 @@ GrowableArray* _exceptions; GrowableArray* _exc_klasses; GrowableArray* _successors; + GrowableArray* _predecessors; StateVector* _state; JsrSet* _jsrs; @@ -617,6 +618,12 @@ return _successors; } + // Predecessors of this block (including exception edges) + GrowableArray* predecessors() { + assert(_predecessors != NULL, "must be filled in"); + return _predecessors; + } + // Get the exceptional successors for this Block. GrowableArray* exceptions() { if (_exceptions == NULL) { @@ -941,6 +948,9 @@ // Perform type inference flow analysis. void do_flow(); + // Determine if bci is dominated by dom_bci + bool is_dominated_by(int bci, int dom_bci); + void print_on(outputStream* st) const PRODUCT_RETURN; void rpo_print_on(outputStream* st) const PRODUCT_RETURN; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/classfile/javaClasses.cpp --- a/hotspot/src/share/vm/classfile/javaClasses.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/classfile/javaClasses.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -28,6 +28,7 @@ #include "classfile/stringTable.hpp" #include "classfile/vmSymbols.hpp" #include "code/debugInfo.hpp" +#include "code/dependencyContext.hpp" #include "code/pcDesc.hpp" #include "interpreter/interpreter.hpp" #include "memory/oopFactory.hpp" @@ -3216,14 +3217,16 @@ } } -nmethodBucket* java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(oop call_site) { +DependencyContext java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(oop call_site) { assert(java_lang_invoke_MethodHandleNatives_CallSiteContext::is_instance(call_site), ""); - return (nmethodBucket*) (address) call_site->long_field(_vmdependencies_offset); -} - -void java_lang_invoke_MethodHandleNatives_CallSiteContext::set_vmdependencies(oop call_site, nmethodBucket* context) { - assert(java_lang_invoke_MethodHandleNatives_CallSiteContext::is_instance(call_site), ""); - call_site->long_field_put(_vmdependencies_offset, (jlong) (address) context); + intptr_t* vmdeps_addr = (intptr_t*)call_site->address_field_addr(_vmdependencies_offset); +#ifndef ASSERT + DependencyContext dep_ctx(vmdeps_addr); +#else + // Verify that call_site isn't moved during DependencyContext lifetime. + DependencyContext dep_ctx(vmdeps_addr, Handle(call_site)); +#endif // ASSERT + return dep_ctx; } // Support for java_security_AccessControlContext diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/classfile/javaClasses.hpp --- a/hotspot/src/share/vm/classfile/javaClasses.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/classfile/javaClasses.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -1212,6 +1212,8 @@ #define CALLSITECONTEXT_INJECTED_FIELDS(macro) \ macro(java_lang_invoke_MethodHandleNatives_CallSiteContext, vmdependencies, intptr_signature, false) +class DependencyContext; + class java_lang_invoke_MethodHandleNatives_CallSiteContext : AllStatic { friend class JavaClasses; @@ -1222,8 +1224,7 @@ public: // Accessors - static nmethodBucket* vmdependencies(oop context); - static void set_vmdependencies(oop context, nmethodBucket* bucket); + static DependencyContext vmdependencies(oop context); // Testers static bool is_subclass(Klass* klass) { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/classfile/vmSymbols.hpp --- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -109,6 +109,7 @@ template(java_io_ByteArrayInputStream, "java/io/ByteArrayInputStream") \ template(java_io_Serializable, "java/io/Serializable") \ template(java_util_Arrays, "java/util/Arrays") \ + template(java_util_Objects, "java/util/Objects") \ template(java_util_Properties, "java/util/Properties") \ template(java_util_Vector, "java/util/Vector") \ template(java_util_AbstractList, "java/util/AbstractList") \ @@ -883,6 +884,9 @@ do_intrinsic(_equalsL, java_lang_StringLatin1,equals_name, equalsB_signature, F_S) \ do_intrinsic(_equalsU, java_lang_StringUTF16, equals_name, equalsB_signature, F_S) \ \ + do_intrinsic(_Objects_checkIndex, java_util_Objects, checkIndex_name, Objects_checkIndex_signature, F_S) \ + do_signature(Objects_checkIndex_signature, "(IILjava/util/function/BiFunction;)I") \ + \ do_class(java_nio_Buffer, "java/nio/Buffer") \ do_intrinsic(_checkIndex, java_nio_Buffer, checkIndex_name, int_int_signature, F_R) \ do_name( checkIndex_name, "checkIndex") \ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/code/codeCache.cpp --- a/hotspot/src/share/vm/code/codeCache.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/code/codeCache.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -133,18 +133,47 @@ address CodeCache::_low_bound = 0; address CodeCache::_high_bound = 0; -int CodeCache::_number_of_blobs = 0; -int CodeCache::_number_of_adapters = 0; -int CodeCache::_number_of_nmethods = 0; int CodeCache::_number_of_nmethods_with_dependencies = 0; bool CodeCache::_needs_cache_clean = false; nmethod* CodeCache::_scavenge_root_nmethods = NULL; -int CodeCache::_codemem_full_count = 0; // Initialize array of CodeHeaps GrowableArray* CodeCache::_heaps = new(ResourceObj::C_HEAP, mtCode) GrowableArray (CodeBlobType::All, true); +void CodeCache::check_heap_sizes(size_t non_nmethod_size, size_t profiled_size, size_t non_profiled_size, size_t cache_size, bool all_set) { + size_t total_size = non_nmethod_size + profiled_size + non_profiled_size; + // Prepare error message + const char* error = "Invalid code heap sizes"; + err_msg message("NonNMethodCodeHeapSize (%zuK) + ProfiledCodeHeapSize (%zuK) + NonProfiledCodeHeapSize (%zuK) = %zuK", + non_nmethod_size/K, profiled_size/K, non_profiled_size/K, total_size/K); + + if (total_size > cache_size) { + // Some code heap sizes were explicitly set: total_size must be <= cache_size + message.append(" is greater than ReservedCodeCacheSize (%zuK).", cache_size/K); + vm_exit_during_initialization(error, message); + } else if (all_set && total_size != cache_size) { + // All code heap sizes were explicitly set: total_size must equal cache_size + message.append(" is not equal to ReservedCodeCacheSize (%zuK).", cache_size/K); + vm_exit_during_initialization(error, message); + } +} + void CodeCache::initialize_heaps() { + bool non_nmethod_set = FLAG_IS_CMDLINE(NonNMethodCodeHeapSize); + bool profiled_set = FLAG_IS_CMDLINE(ProfiledCodeHeapSize); + bool non_profiled_set = FLAG_IS_CMDLINE(NonProfiledCodeHeapSize); + size_t min_size = os::vm_page_size(); + size_t cache_size = ReservedCodeCacheSize; + size_t non_nmethod_size = NonNMethodCodeHeapSize; + size_t profiled_size = ProfiledCodeHeapSize; + size_t non_profiled_size = NonProfiledCodeHeapSize; + // Check if total size set via command line flags exceeds the reserved size + check_heap_sizes((non_nmethod_set ? non_nmethod_size : min_size), + (profiled_set ? profiled_size : min_size), + (non_profiled_set ? non_profiled_size : min_size), + cache_size, + non_nmethod_set && profiled_set && non_profiled_set); + // Determine size of compiler buffers size_t code_buffers_size = 0; #ifdef COMPILER1 @@ -159,51 +188,94 @@ code_buffers_size += c2_count * C2Compiler::initial_code_buffer_size(); #endif + // Increase default non_nmethod_size to account for compiler buffers + if (!non_nmethod_set) { + non_nmethod_size += code_buffers_size; + } // Calculate default CodeHeap sizes if not set by user - if (!FLAG_IS_CMDLINE(NonNMethodCodeHeapSize) && !FLAG_IS_CMDLINE(ProfiledCodeHeapSize) - && !FLAG_IS_CMDLINE(NonProfiledCodeHeapSize)) { - // Increase default NonNMethodCodeHeapSize to account for compiler buffers - FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, NonNMethodCodeHeapSize + code_buffers_size); - + if (!non_nmethod_set && !profiled_set && !non_profiled_set) { // Check if we have enough space for the non-nmethod code heap - if (ReservedCodeCacheSize > NonNMethodCodeHeapSize) { - // Use the default value for NonNMethodCodeHeapSize and one half of the - // remaining size for non-profiled methods and one half for profiled methods - size_t remaining_size = ReservedCodeCacheSize - NonNMethodCodeHeapSize; - size_t profiled_size = remaining_size / 2; - size_t non_profiled_size = remaining_size - profiled_size; - FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, profiled_size); - FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, non_profiled_size); + if (cache_size > non_nmethod_size) { + // Use the default value for non_nmethod_size and one half of the + // remaining size for non-profiled and one half for profiled methods + size_t remaining_size = cache_size - non_nmethod_size; + profiled_size = remaining_size / 2; + non_profiled_size = remaining_size - profiled_size; } else { // Use all space for the non-nmethod heap and set other heaps to minimal size - FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, ReservedCodeCacheSize - os::vm_page_size() * 2); - FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, os::vm_page_size()); - FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, os::vm_page_size()); + non_nmethod_size = cache_size - 2 * min_size; + profiled_size = min_size; + non_profiled_size = min_size; + } + } else if (!non_nmethod_set || !profiled_set || !non_profiled_set) { + // The user explicitly set some code heap sizes. Increase or decrease the (default) + // sizes of the other code heaps accordingly. First adapt non-profiled and profiled + // code heap sizes and then only change non-nmethod code heap size if still necessary. + intx diff_size = cache_size - (non_nmethod_size + profiled_size + non_profiled_size); + if (non_profiled_set) { + if (!profiled_set) { + // Adapt size of profiled code heap + if (diff_size < 0 && ((intx)profiled_size + diff_size) <= 0) { + // Not enough space available, set to minimum size + diff_size += profiled_size - min_size; + profiled_size = min_size; + } else { + profiled_size += diff_size; + diff_size = 0; + } + } + } else if (profiled_set) { + // Adapt size of non-profiled code heap + if (diff_size < 0 && ((intx)non_profiled_size + diff_size) <= 0) { + // Not enough space available, set to minimum size + diff_size += non_profiled_size - min_size; + non_profiled_size = min_size; + } else { + non_profiled_size += diff_size; + diff_size = 0; + } + } else if (non_nmethod_set) { + // Distribute remaining size between profiled and non-profiled code heaps + diff_size = cache_size - non_nmethod_size; + profiled_size = diff_size / 2; + non_profiled_size = diff_size - profiled_size; + diff_size = 0; + } + if (diff_size != 0) { + // Use non-nmethod code heap for remaining space requirements + assert(!non_nmethod_set && ((intx)non_nmethod_size + diff_size) > 0, "sanity"); + non_nmethod_size += diff_size; } } // We do not need the profiled CodeHeap, use all space for the non-profiled CodeHeap if(!heap_available(CodeBlobType::MethodProfiled)) { - FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, NonProfiledCodeHeapSize + ProfiledCodeHeapSize); - FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, 0); + non_profiled_size += profiled_size; + profiled_size = 0; } // We do not need the non-profiled CodeHeap, use all space for the non-nmethod CodeHeap if(!heap_available(CodeBlobType::MethodNonProfiled)) { - FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, NonNMethodCodeHeapSize + NonProfiledCodeHeapSize); - FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, 0); + non_nmethod_size += non_profiled_size; + non_profiled_size = 0; + } + // Make sure we have enough space for VM internal code + uint min_code_cache_size = CodeCacheMinimumUseSpace DEBUG_ONLY(* 3); + if (non_nmethod_size < (min_code_cache_size + code_buffers_size)) { + vm_exit_during_initialization(err_msg( + "Not enough space in non-nmethod code heap to run VM: %zuK < %zuK", + non_nmethod_size/K, (min_code_cache_size + code_buffers_size)/K)); } - // Make sure we have enough space for VM internal code - uint min_code_cache_size = CodeCacheMinimumUseSpace DEBUG_ONLY(* 3); - if (NonNMethodCodeHeapSize < (min_code_cache_size + code_buffers_size)) { - vm_exit_during_initialization("Not enough space in non-nmethod code heap to run VM."); - } - guarantee(NonProfiledCodeHeapSize + ProfiledCodeHeapSize + NonNMethodCodeHeapSize <= ReservedCodeCacheSize, "Size check"); + // Verify sizes and update flag values + assert(non_profiled_size + profiled_size + non_nmethod_size == cache_size, "Invalid code heap sizes"); + FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, non_nmethod_size); + FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, profiled_size); + FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, non_profiled_size); // Align CodeHeaps size_t alignment = heap_alignment(); - size_t non_method_size = align_size_up(NonNMethodCodeHeapSize, alignment); - size_t profiled_size = align_size_down(ProfiledCodeHeapSize, alignment); + non_nmethod_size = align_size_up(non_nmethod_size, alignment); + profiled_size = align_size_down(profiled_size, alignment); // Reserve one continuous chunk of memory for CodeHeaps and split it into // parts for the individual heaps. The memory layout looks like this: @@ -212,9 +284,9 @@ // Profiled nmethods // Non-nmethods // ---------- low ------------ - ReservedCodeSpace rs = reserve_heap_memory(ReservedCodeCacheSize); - ReservedSpace non_method_space = rs.first_part(non_method_size); - ReservedSpace rest = rs.last_part(non_method_size); + ReservedCodeSpace rs = reserve_heap_memory(cache_size); + ReservedSpace non_method_space = rs.first_part(non_nmethod_size); + ReservedSpace rest = rs.last_part(non_nmethod_size); ReservedSpace profiled_space = rest.first_part(profiled_size); ReservedSpace non_profiled_space = rest.last_part(profiled_size); @@ -420,42 +492,41 @@ } } print_trace("allocation", cb, size); - _number_of_blobs++; return cb; } void CodeCache::free(CodeBlob* cb) { assert_locked_or_safepoint(CodeCache_lock); - + CodeHeap* heap = get_code_heap(cb); print_trace("free", cb); if (cb->is_nmethod()) { - _number_of_nmethods--; + heap->set_nmethod_count(heap->nmethod_count() - 1); if (((nmethod *)cb)->has_dependencies()) { _number_of_nmethods_with_dependencies--; } } if (cb->is_adapter_blob()) { - _number_of_adapters--; + heap->set_adapter_count(heap->adapter_count() - 1); } - _number_of_blobs--; // Get heap for given CodeBlob and deallocate get_code_heap(cb)->deallocate(cb); - assert(_number_of_blobs >= 0, "sanity check"); + assert(heap->blob_count() >= 0, "sanity check"); } void CodeCache::commit(CodeBlob* cb) { // this is called by nmethod::nmethod, which must already own CodeCache_lock assert_locked_or_safepoint(CodeCache_lock); + CodeHeap* heap = get_code_heap(cb); if (cb->is_nmethod()) { - _number_of_nmethods++; + heap->set_nmethod_count(heap->nmethod_count() + 1); if (((nmethod *)cb)->has_dependencies()) { _number_of_nmethods_with_dependencies++; } } if (cb->is_adapter_blob()) { - _number_of_adapters++; + heap->set_adapter_count(heap->adapter_count() + 1); } // flush the hardware I-cache @@ -577,11 +648,9 @@ assert(cur->on_scavenge_root_list(), "else shouldn't be on this list"); bool is_live = (!cur->is_zombie() && !cur->is_unloaded()); -#ifndef PRODUCT if (TraceScavenge) { cur->print_on(tty, is_live ? "scavenge root" : "dead scavenge root"); tty->cr(); } -#endif //PRODUCT if (is_live) { // Perform cur->oops_do(f), maybe just once per nmethod. f->do_code_blob(cur); @@ -774,6 +843,55 @@ } } +int CodeCache::blob_count(int code_blob_type) { + CodeHeap* heap = get_code_heap(code_blob_type); + return (heap != NULL) ? heap->blob_count() : 0; +} + +int CodeCache::blob_count() { + int count = 0; + FOR_ALL_HEAPS(heap) { + count += (*heap)->blob_count(); + } + return count; +} + +int CodeCache::nmethod_count(int code_blob_type) { + CodeHeap* heap = get_code_heap(code_blob_type); + return (heap != NULL) ? heap->nmethod_count() : 0; +} + +int CodeCache::nmethod_count() { + int count = 0; + FOR_ALL_HEAPS(heap) { + count += (*heap)->nmethod_count(); + } + return count; +} + +int CodeCache::adapter_count(int code_blob_type) { + CodeHeap* heap = get_code_heap(code_blob_type); + return (heap != NULL) ? heap->adapter_count() : 0; +} + +int CodeCache::adapter_count() { + int count = 0; + FOR_ALL_HEAPS(heap) { + count += (*heap)->adapter_count(); + } + return count; +} + +address CodeCache::low_bound(int code_blob_type) { + CodeHeap* heap = get_code_heap(code_blob_type); + return (heap != NULL) ? (address)heap->low_boundary() : NULL; +} + +address CodeCache::high_bound(int code_blob_type) { + CodeHeap* heap = get_code_heap(code_blob_type); + return (heap != NULL) ? (address)heap->high_boundary() : NULL; +} + size_t CodeCache::capacity() { size_t cap = 0; FOR_ALL_HEAPS(heap) { @@ -863,6 +981,9 @@ initialize_heaps(); } else { // Use a single code heap + FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, 0); + FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, 0); + FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, 0); ReservedCodeSpace rs = reserve_heap_memory(ReservedCodeCacheSize); add_heap(rs, "CodeCache", CodeBlobType::All); } @@ -1104,9 +1225,8 @@ CodeHeap* heap = get_code_heap(code_blob_type); assert(heap != NULL, "heap is null"); - if (!heap->was_full() || print) { + if ((heap->full_count() == 0) || print) { // Not yet reported for this heap, report - heap->report_full(); if (SegmentedCodeCache) { warning("%s is full. Compiler has been disabled.", get_code_heap_name(code_blob_type)); warning("Try increasing the code heap size using -XX:%s=", get_code_heap_flag_name(code_blob_type)); @@ -1125,18 +1245,19 @@ tty->print("%s", s.as_string()); } - _codemem_full_count++; + heap->report_full(); + EventCodeCacheFull event; if (event.should_commit()) { event.set_codeBlobType((u1)code_blob_type); event.set_startAddress((u8)heap->low_boundary()); event.set_commitedTopAddress((u8)heap->high()); event.set_reservedTopAddress((u8)heap->high_boundary()); - event.set_entryCount(nof_blobs()); - event.set_methodCount(nof_nmethods()); - event.set_adaptorCount(nof_adapters()); + event.set_entryCount(heap->blob_count()); + event.set_methodCount(heap->nmethod_count()); + event.set_adaptorCount(heap->adapter_count()); event.set_unallocatedCapacity(heap->unallocated_capacity()/K); - event.set_fullCount(_codemem_full_count); + event.set_fullCount(heap->full_count()); event.commit(); } } @@ -1360,7 +1481,7 @@ if (detailed) { st->print_cr(" total_blobs=" UINT32_FORMAT " nmethods=" UINT32_FORMAT " adapters=" UINT32_FORMAT, - nof_blobs(), nof_nmethods(), nof_adapters()); + blob_count(), nmethod_count(), adapter_count()); st->print_cr(" compilation: %s", CompileBroker::should_compile_new_jobs() ? "enabled" : Arguments::mode() == Arguments::_int ? "disabled (interpreter mode)" : @@ -1392,6 +1513,6 @@ void CodeCache::log_state(outputStream* st) { st->print(" total_blobs='" UINT32_FORMAT "' nmethods='" UINT32_FORMAT "'" " adapters='" UINT32_FORMAT "' free_code_cache='" SIZE_FORMAT "'", - nof_blobs(), nof_nmethods(), nof_adapters(), + blob_count(), nmethod_count(), adapter_count(), unallocated_capacity()); } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/code/codeCache.hpp --- a/hotspot/src/share/vm/code/codeCache.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/code/codeCache.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -85,26 +85,23 @@ static address _low_bound; // Lower bound of CodeHeap addresses static address _high_bound; // Upper bound of CodeHeap addresses - static int _number_of_blobs; // Total number of CodeBlobs in the cache - static int _number_of_adapters; // Total number of Adapters in the cache - static int _number_of_nmethods; // Total number of nmethods in the cache static int _number_of_nmethods_with_dependencies; // Total number of nmethods with dependencies static bool _needs_cache_clean; // True if inline caches of the nmethods needs to be flushed static nmethod* _scavenge_root_nmethods; // linked via nm->scavenge_root_link() - static int _codemem_full_count; // Number of times a CodeHeap in the cache was full static void mark_scavenge_root_nmethods() PRODUCT_RETURN; static void verify_perm_nmethods(CodeBlobClosure* f_or_null) PRODUCT_RETURN; // CodeHeap management static void initialize_heaps(); // Initializes the CodeHeaps + // Check the code heap sizes set by the user via command line + static void check_heap_sizes(size_t non_nmethod_size, size_t profiled_size, size_t non_profiled_size, size_t cache_size, bool all_set); // Creates a new heap with the given name and size, containing CodeBlobs of the given type static void add_heap(ReservedSpace rs, const char* name, int code_blob_type); static CodeHeap* get_code_heap(const CodeBlob* cb); // Returns the CodeHeap for the given CodeBlob static CodeHeap* get_code_heap(int code_blob_type); // Returns the CodeHeap for the given CodeBlobType // Returns the name of the VM option to set the size of the corresponding CodeHeap static const char* get_code_heap_flag_name(int code_blob_type); - static bool heap_available(int code_blob_type); // Returns true if an own CodeHeap for the given CodeBlobType is available static size_t heap_alignment(); // Returns the alignment of the CodeHeaps in bytes static ReservedCodeSpace reserve_heap_memory(size_t size); // Reserves one continuous chunk of memory for the CodeHeaps @@ -139,9 +136,12 @@ static CodeBlob* find_blob_unsafe(void* start); // Same as find_blob but does not fail if looking up a zombie method static nmethod* find_nmethod(void* start); // Returns the nmethod containing the given address - static int nof_blobs() { return _number_of_blobs; } // Returns the total number of CodeBlobs in the cache - static int nof_adapters() { return _number_of_adapters; } // Returns the total number of Adapters in the cache - static int nof_nmethods() { return _number_of_nmethods; } // Returns the total number of nmethods in the cache + static int blob_count(); // Returns the total number of CodeBlobs in the cache + static int blob_count(int code_blob_type); + static int adapter_count(); // Returns the total number of Adapters in the cache + static int adapter_count(int code_blob_type); + static int nmethod_count(); // Returns the total number of nmethods in the cache + static int nmethod_count(int code_blob_type); // GC support static void gc_epilogue(); @@ -177,7 +177,9 @@ // The full limits of the codeCache static address low_bound() { return _low_bound; } + static address low_bound(int code_blob_type); static address high_bound() { return _high_bound; } + static address high_bound(int code_blob_type); // Profiling static size_t capacity(); @@ -191,6 +193,9 @@ static void set_needs_cache_clean(bool v) { _needs_cache_clean = v; } static void clear_inline_caches(); // clear all inline caches + // Returns true if an own CodeHeap for the given CodeBlobType is available + static bool heap_available(int code_blob_type); + // Returns the CodeBlobType for the given nmethod static int get_code_blob_type(nmethod* nm) { return get_code_heap(nm)->code_blob_type(); @@ -239,7 +244,10 @@ // tells how many nmethods have dependencies static int number_of_nmethods_with_dependencies(); - static int get_codemem_full_count() { return _codemem_full_count; } + static int get_codemem_full_count(int code_blob_type) { + CodeHeap* heap = get_code_heap(code_blob_type); + return (heap != NULL) ? heap->full_count() : 0; + } }; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/code/dependencyContext.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/code/dependencyContext.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/nmethod.hpp" +#include "code/dependencies.hpp" +#include "code/dependencyContext.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/atomic.hpp" +#include "runtime/perfData.hpp" +#include "utilities/exceptions.hpp" + +PerfCounter* DependencyContext::_perf_total_buckets_allocated_count = NULL; +PerfCounter* DependencyContext::_perf_total_buckets_deallocated_count = NULL; +PerfCounter* DependencyContext::_perf_total_buckets_stale_count = NULL; +PerfCounter* DependencyContext::_perf_total_buckets_stale_acc_count = NULL; + +void dependencyContext_init() { + DependencyContext::init(); +} + +void DependencyContext::init() { + if (UsePerfData) { + EXCEPTION_MARK; + _perf_total_buckets_allocated_count = + PerfDataManager::create_counter(SUN_CI, "nmethodBucketsAllocated", PerfData::U_Events, CHECK); + _perf_total_buckets_deallocated_count = + PerfDataManager::create_counter(SUN_CI, "nmethodBucketsDeallocated", PerfData::U_Events, CHECK); + _perf_total_buckets_stale_count = + PerfDataManager::create_counter(SUN_CI, "nmethodBucketsStale", PerfData::U_Events, CHECK); + _perf_total_buckets_stale_acc_count = + PerfDataManager::create_counter(SUN_CI, "nmethodBucketsStaleAccumulated", PerfData::U_Events, CHECK); + } +} + +// +// Walk the list of dependent nmethods searching for nmethods which +// are dependent on the changes that were passed in and mark them for +// deoptimization. Returns the number of nmethods found. +// +int DependencyContext::mark_dependent_nmethods(DepChange& changes) { + int found = 0; + for (nmethodBucket* b = dependencies(); b != NULL; b = b->next()) { + nmethod* nm = b->get_nmethod(); + // since dependencies aren't removed until an nmethod becomes a zombie, + // the dependency list may contain nmethods which aren't alive. + if (b->count() > 0 && nm->is_alive() && !nm->is_marked_for_deoptimization() && nm->check_dependency_on(changes)) { + if (TraceDependencies) { + ResourceMark rm; + tty->print_cr("Marked for deoptimization"); + changes.print(); + nm->print(); + nm->print_dependencies(); + } + nm->mark_for_deoptimization(); + found++; + } + } + return found; +} + +// +// Add an nmethod to the dependency context. +// It's possible that an nmethod has multiple dependencies on a klass +// so a count is kept for each bucket to guarantee that creation and +// deletion of dependencies is consistent. +// +void DependencyContext::add_dependent_nmethod(nmethod* nm, bool expunge) { + assert_lock_strong(CodeCache_lock); + for (nmethodBucket* b = dependencies(); b != NULL; b = b->next()) { + if (nm == b->get_nmethod()) { + b->increment(); + return; + } + } + set_dependencies(new nmethodBucket(nm, dependencies())); + if (UsePerfData) { + _perf_total_buckets_allocated_count->inc(); + } + if (expunge) { + // Remove stale entries from the list. + expunge_stale_entries(); + } +} + +// +// Remove an nmethod dependency from the context. +// Decrement count of the nmethod in the dependency list and, optionally, remove +// the bucket completely when the count goes to 0. This method must find +// a corresponding bucket otherwise there's a bug in the recording of dependencies. +// Can be called concurrently by parallel GC threads. +// +void DependencyContext::remove_dependent_nmethod(nmethod* nm, bool expunge) { + assert_locked_or_safepoint(CodeCache_lock); + nmethodBucket* first = dependencies(); + nmethodBucket* last = NULL; + for (nmethodBucket* b = first; b != NULL; b = b->next()) { + if (nm == b->get_nmethod()) { + int val = b->decrement(); + guarantee(val >= 0, "Underflow: %d", val); + if (val == 0) { + if (expunge) { + if (last == NULL) { + set_dependencies(b->next()); + } else { + last->set_next(b->next()); + } + delete b; + if (UsePerfData) { + _perf_total_buckets_deallocated_count->inc(); + } + } else { + // Mark the context as having stale entries, since it is not safe to + // expunge the list right now. + set_has_stale_entries(true); + if (UsePerfData) { + _perf_total_buckets_stale_count->inc(); + _perf_total_buckets_stale_acc_count->inc(); + } + } + } + if (expunge) { + // Remove stale entries from the list. + expunge_stale_entries(); + } + return; + } + last = b; + } +#ifdef ASSERT + tty->print_raw_cr("### can't find dependent nmethod"); + nm->print(); +#endif // ASSERT + ShouldNotReachHere(); +} + +// +// Reclaim all unused buckets. +// +void DependencyContext::expunge_stale_entries() { + assert_locked_or_safepoint(CodeCache_lock); + if (!has_stale_entries()) { + assert(!find_stale_entries(), "inconsistent info"); + return; + } + nmethodBucket* first = dependencies(); + nmethodBucket* last = NULL; + int removed = 0; + for (nmethodBucket* b = first; b != NULL;) { + assert(b->count() >= 0, "bucket count: %d", b->count()); + nmethodBucket* next = b->next(); + if (b->count() == 0) { + if (last == NULL) { + first = next; + } else { + last->set_next(next); + } + removed++; + delete b; + // last stays the same. + } else { + last = b; + } + b = next; + } + set_dependencies(first); + set_has_stale_entries(false); + if (UsePerfData && removed > 0) { + _perf_total_buckets_deallocated_count->inc(removed); + _perf_total_buckets_stale_count->dec(removed); + } +} + +// +// Invalidate all dependencies in the context +int DependencyContext::remove_all_dependents() { + assert_locked_or_safepoint(CodeCache_lock); + nmethodBucket* b = dependencies(); + set_dependencies(NULL); + int marked = 0; + int removed = 0; + while (b != NULL) { + nmethod* nm = b->get_nmethod(); + if (b->count() > 0 && nm->is_alive() && !nm->is_marked_for_deoptimization()) { + nm->mark_for_deoptimization(); + marked++; + } + nmethodBucket* next = b->next(); + removed++; + delete b; + b = next; + } + set_has_stale_entries(false); + if (UsePerfData && removed > 0) { + _perf_total_buckets_deallocated_count->inc(removed); + } + return marked; +} + +#ifndef PRODUCT +void DependencyContext::print_dependent_nmethods(bool verbose) { + int idx = 0; + for (nmethodBucket* b = dependencies(); b != NULL; b = b->next()) { + nmethod* nm = b->get_nmethod(); + tty->print("[%d] count=%d { ", idx++, b->count()); + if (!verbose) { + nm->print_on(tty, "nmethod"); + tty->print_cr(" } "); + } else { + nm->print(); + nm->print_dependencies(); + tty->print_cr("--- } "); + } + } +} + +bool DependencyContext::is_dependent_nmethod(nmethod* nm) { + for (nmethodBucket* b = dependencies(); b != NULL; b = b->next()) { + if (nm == b->get_nmethod()) { +#ifdef ASSERT + int count = b->count(); + assert(count >= 0, "count shouldn't be negative: %d", count); +#endif + return true; + } + } + return false; +} + +bool DependencyContext::find_stale_entries() { + for (nmethodBucket* b = dependencies(); b != NULL; b = b->next()) { + if (b->count() == 0) return true; + } + return false; +} + +#endif //PRODUCT + +int nmethodBucket::decrement() { + return Atomic::add(-1, (volatile int *)&_count); +} + +/////////////// Unit tests /////////////// + +#ifndef PRODUCT + +class TestDependencyContext { + public: + nmethod* _nmethods[3]; + + intptr_t _dependency_context; + + TestDependencyContext() : _dependency_context(DependencyContext::EMPTY) { + CodeCache_lock->lock_without_safepoint_check(); + + DependencyContext depContext(&_dependency_context); + + _nmethods[0] = reinterpret_cast(0x8 * 0); + _nmethods[1] = reinterpret_cast(0x8 * 1); + _nmethods[2] = reinterpret_cast(0x8 * 2); + + depContext.add_dependent_nmethod(_nmethods[2]); + depContext.add_dependent_nmethod(_nmethods[1]); + depContext.add_dependent_nmethod(_nmethods[0]); + } + + ~TestDependencyContext() { + wipe(); + CodeCache_lock->unlock(); + } + + static void testRemoveDependentNmethod(int id, bool delete_immediately) { + TestDependencyContext c; + DependencyContext depContext(&c._dependency_context); + assert(!has_stale_entries(depContext), "check"); + + nmethod* nm = c._nmethods[id]; + depContext.remove_dependent_nmethod(nm, delete_immediately); + + if (!delete_immediately) { + assert(has_stale_entries(depContext), "check"); + assert(depContext.is_dependent_nmethod(nm), "check"); + depContext.expunge_stale_entries(); + } + + assert(!has_stale_entries(depContext), "check"); + assert(!depContext.is_dependent_nmethod(nm), "check"); + } + + static void testRemoveDependentNmethod() { + testRemoveDependentNmethod(0, false); + testRemoveDependentNmethod(1, false); + testRemoveDependentNmethod(2, false); + + testRemoveDependentNmethod(0, true); + testRemoveDependentNmethod(1, true); + testRemoveDependentNmethod(2, true); + } + + static void test() { + testRemoveDependentNmethod(); + } + + static bool has_stale_entries(DependencyContext ctx) { + assert(ctx.has_stale_entries() == ctx.find_stale_entries(), "check"); + return ctx.has_stale_entries(); + } + + void wipe() { + DependencyContext ctx(&_dependency_context); + nmethodBucket* b = ctx.dependencies(); + ctx.set_dependencies(NULL); + ctx.set_has_stale_entries(false); + while (b != NULL) { + nmethodBucket* next = b->next(); + delete b; + b = next; + } + } +}; + +void TestDependencyContext_test() { + TestDependencyContext::test(); +} + +#endif // PRODUCT diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/code/dependencyContext.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/code/dependencyContext.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_CODE_DEPENDENCYCONTEXT_HPP +#define SHARE_VM_CODE_DEPENDENCYCONTEXT_HPP + +#include "memory/allocation.hpp" +#include "oops/oop.hpp" +#include "runtime/handles.hpp" +#include "runtime/perfData.hpp" + +class nmethod; +class DepChange; + +// +// nmethodBucket is used to record dependent nmethods for +// deoptimization. nmethod dependencies are actually +// pairs but we really only care about the klass part for purposes of +// finding nmethods which might need to be deoptimized. Instead of +// recording the method, a count of how many times a particular nmethod +// was recorded is kept. This ensures that any recording errors are +// noticed since an nmethod should be removed as many times are it's +// added. +// +class nmethodBucket: public CHeapObj { + friend class VMStructs; + private: + nmethod* _nmethod; + int _count; + nmethodBucket* _next; + + public: + nmethodBucket(nmethod* nmethod, nmethodBucket* next) : + _nmethod(nmethod), _next(next), _count(1) {} + + int count() { return _count; } + int increment() { _count += 1; return _count; } + int decrement(); + nmethodBucket* next() { return _next; } + void set_next(nmethodBucket* b) { _next = b; } + nmethod* get_nmethod() { return _nmethod; } +}; + +// +// Utility class to manipulate nmethod dependency context. +// The context consists of nmethodBucket* (a head of a linked list) +// and a boolean flag (does the list contains stale entries). The structure is +// encoded as an intptr_t: lower bit is used for the flag. It is possible since +// nmethodBucket* is aligned - the structure is malloc'ed in C heap. +// Dependency context can be attached either to an InstanceKlass (_dep_context field) +// or CallSiteContext oop for call_site_target dependencies (see javaClasses.hpp). +// DependencyContext class operates on some location which holds a intptr_t value. +// +class DependencyContext : public StackObj { + friend class VMStructs; + friend class TestDependencyContext; + private: + enum TagBits { _has_stale_entries_bit = 1, _has_stale_entries_mask = 1 }; + + intptr_t* _dependency_context_addr; + + void set_dependencies(nmethodBucket* b) { + assert((intptr_t(b) & _has_stale_entries_mask) == 0, "should be aligned"); + if (has_stale_entries()) { + *_dependency_context_addr = intptr_t(b) | _has_stale_entries_mask; + } else { + *_dependency_context_addr = intptr_t(b); + } + } + + void set_has_stale_entries(bool x) { + if (x) { + *_dependency_context_addr |= _has_stale_entries_mask; + } else { + *_dependency_context_addr &= ~_has_stale_entries_mask; + } + } + + nmethodBucket* dependencies() { + intptr_t value = *_dependency_context_addr; + return (nmethodBucket*) (value & ~_has_stale_entries_mask); + } + + bool has_stale_entries() const { + intptr_t value = *_dependency_context_addr; + return (value & _has_stale_entries_mask) != 0; + } + + static PerfCounter* _perf_total_buckets_allocated_count; + static PerfCounter* _perf_total_buckets_deallocated_count; + static PerfCounter* _perf_total_buckets_stale_count; + static PerfCounter* _perf_total_buckets_stale_acc_count; + + public: +#ifdef ASSERT + // Verification for dependency contexts rooted at Java objects. + Handle _base; // non-NULL if dependency context resides in an oop (e.g. CallSite). + oop _base_oop; + + DependencyContext(intptr_t* addr, Handle base = Handle()) + : _dependency_context_addr(addr), _base(base) + { + _base_oop = _base(); + } + + ~DependencyContext() { + // Base oop relocation invalidates _dependency_context_addr. + assert(_base_oop == _base(), "base oop relocation is forbidden"); + } +#else + DependencyContext(intptr_t* addr) : _dependency_context_addr(addr) {} +#endif // ASSERT + + static const intptr_t EMPTY = 0; // dependencies = NULL, has_stale_entries = false + + static void init(); + + int mark_dependent_nmethods(DepChange& changes); + void add_dependent_nmethod(nmethod* nm, bool expunge_stale_entries = false); + void remove_dependent_nmethod(nmethod* nm, bool expunge_stale_entries = false); + int remove_all_dependents(); + + void expunge_stale_entries(); + +#ifndef PRODUCT + void print_dependent_nmethods(bool verbose); + bool is_dependent_nmethod(nmethod* nm); + bool find_stale_entries(); +#endif //PRODUCT +}; +#endif // SHARE_VM_CODE_DEPENDENCYCONTEXT_HPP diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/code/nmethod.cpp --- a/hotspot/src/share/vm/code/nmethod.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/code/nmethod.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1539,7 +1539,7 @@ if (PrintMethodFlushing) { tty->print_cr("*flushing nmethod %3d/" INTPTR_FORMAT ". Live blobs:" UINT32_FORMAT "/Free CodeCache:" SIZE_FORMAT "Kb", - _compile_id, p2i(this), CodeCache::nof_blobs(), + _compile_id, p2i(this), CodeCache::blob_count(), CodeCache::unallocated_capacity(CodeCache::get_code_blob_type(this))/1024); } @@ -1819,9 +1819,7 @@ if (_jvmci_installed_code != NULL) { if (_jvmci_installed_code->is_a(HotSpotNmethod::klass()) && HotSpotNmethod::isDefault(_jvmci_installed_code)) { if (!is_alive->do_object_b(_jvmci_installed_code)) { - bs->write_ref_nmethod_pre(&_jvmci_installed_code, this); - _jvmci_installed_code = NULL; - bs->write_ref_nmethod_post(&_jvmci_installed_code, this); + clear_jvmci_installed_code(); } } else { if (can_unload(is_alive, (oop*)&_jvmci_installed_code, unloading_occurred)) { @@ -1922,27 +1920,6 @@ unloading_occurred = true; } -#if INCLUDE_JVMCI - // Follow JVMCI method - if (_jvmci_installed_code != NULL) { - if (_jvmci_installed_code->is_a(HotSpotNmethod::klass()) && HotSpotNmethod::isDefault(_jvmci_installed_code)) { - if (!is_alive->do_object_b(_jvmci_installed_code)) { - _jvmci_installed_code = NULL; - } - } else { - if (can_unload(is_alive, (oop*)&_jvmci_installed_code, unloading_occurred)) { - return false; - } - } - } - - if (_speculation_log != NULL) { - if (!is_alive->do_object_b(_speculation_log)) { - _speculation_log = NULL; - } - } -#endif - // Exception cache clean_exception_cache(is_alive); @@ -2006,9 +1983,7 @@ if (_jvmci_installed_code != NULL) { if (_jvmci_installed_code->is_a(HotSpotNmethod::klass()) && HotSpotNmethod::isDefault(_jvmci_installed_code)) { if (!is_alive->do_object_b(_jvmci_installed_code)) { - bs->write_ref_nmethod_pre(&_jvmci_installed_code, this); - _jvmci_installed_code = NULL; - bs->write_ref_nmethod_post(&_jvmci_installed_code, this); + clear_jvmci_installed_code(); } } else { if (can_unload(is_alive, (oop*)&_jvmci_installed_code, unloading_occurred)) { @@ -2271,7 +2246,7 @@ break; } // Mark was clear when we first saw this guy. - NOT_PRODUCT(if (TraceScavenge) print_on(tty, "oops_do, mark")); + if (TraceScavenge) { print_on(tty, "oops_do, mark"); } return false; } } @@ -2280,7 +2255,7 @@ } void nmethod::oops_do_marking_prologue() { - NOT_PRODUCT(if (TraceScavenge) tty->print_cr("[oops_do_marking_prologue")); + if (TraceScavenge) { tty->print_cr("[oops_do_marking_prologue"); } assert(_oops_do_mark_nmethods == NULL, "must not call oops_do_marking_prologue twice in a row"); // We use cmpxchg_ptr instead of regular assignment here because the user // may fork a bunch of threads, and we need them all to see the same state. @@ -2302,7 +2277,7 @@ void* required = _oops_do_mark_nmethods; void* observed = Atomic::cmpxchg_ptr(NULL, &_oops_do_mark_nmethods, required); guarantee(observed == required, "no races in this sequential code"); - NOT_PRODUCT(if (TraceScavenge) tty->print_cr("oops_do_marking_epilogue]")); + if (TraceScavenge) { tty->print_cr("oops_do_marking_epilogue]"); } } class DetectScavengeRoot: public OopClosure { @@ -3373,6 +3348,14 @@ #endif // !PRODUCT #if INCLUDE_JVMCI +void nmethod::clear_jvmci_installed_code() { + // This must be done carefully to maintain nmethod remembered sets properly + BarrierSet* bs = Universe::heap()->barrier_set(); + bs->write_ref_nmethod_pre(&_jvmci_installed_code, this); + _jvmci_installed_code = NULL; + bs->write_ref_nmethod_post(&_jvmci_installed_code, this); +} + void nmethod::maybe_invalidate_installed_code() { if (_jvmci_installed_code != NULL) { if (!is_alive()) { @@ -3382,7 +3365,7 @@ // might want to invalidate all existing activations. InstalledCode::set_address(_jvmci_installed_code, 0); InstalledCode::set_entryPoint(_jvmci_installed_code, 0); - _jvmci_installed_code = NULL; + clear_jvmci_installed_code(); } else if (is_not_entrant()) { InstalledCode::set_entryPoint(_jvmci_installed_code, 0); } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/code/nmethod.hpp --- a/hotspot/src/share/vm/code/nmethod.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/code/nmethod.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -602,7 +602,7 @@ #if INCLUDE_JVMCI oop jvmci_installed_code() { return _jvmci_installed_code ; } char* jvmci_installed_code_name(char* buf, size_t buflen); - void set_jvmci_installed_code(oop installed_code) { _jvmci_installed_code = installed_code; } + void clear_jvmci_installed_code(); void maybe_invalidate_installed_code(); oop speculation_log() { return _speculation_log ; } void set_speculation_log(oop speculation_log) { _speculation_log = speculation_log; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/compiler/compileBroker.cpp --- a/hotspot/src/share/vm/compiler/compileBroker.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/compiler/compileBroker.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -26,6 +26,7 @@ #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" #include "code/codeCache.hpp" +#include "code/dependencyContext.hpp" #include "compiler/compileBroker.hpp" #include "compiler/compileLog.hpp" #include "compiler/compilerOracle.hpp" @@ -237,10 +238,27 @@ task->set_code_handle(NULL); thread->set_env(NULL); if (task->is_blocking()) { - MutexLocker notifier(task->lock(), thread); - task->mark_complete(); - // Notify the waiting thread that the compilation has completed. - task->lock()->notify_all(); + bool free_task = false; + { + MutexLocker notifier(task->lock(), thread); + task->mark_complete(); +#if INCLUDE_JVMCI + if (CompileBroker::compiler(task->comp_level())->is_jvmci() && + !task->has_waiter()) { + // The waiting thread timed out and thus did not free the task. + free_task = true; + } +#endif + if (!free_task) { + // Notify the waiting thread that the compilation has completed + // so that it can free the task. + task->lock()->notify_all(); + } + } + if (free_task) { + // The task can only be freed once the task lock is released. + CompileTask::free(task); + } } else { task->mark_complete(); @@ -547,7 +565,6 @@ PerfData::U_Ticks, CHECK); } - if (UsePerfData) { EXCEPTION_MARK; @@ -1302,6 +1319,11 @@ return new_task; } +// 1 second should be long enough to complete most JVMCI compilations +// and not too long to stall a blocking JVMCI compilation that +// is trying to acquire a lock held by the app thread that submitted the +// compilation. +static const long BLOCKING_JVMCI_COMPILATION_TIMEOUT = 1000; /** * Wait for the compilation task to complete. @@ -1318,30 +1340,47 @@ thread->set_blocked_on_compilation(true); methodHandle method(thread, task->method()); + bool free_task; +#if INCLUDE_JVMCI + if (compiler(task->comp_level())->is_jvmci()) { + MutexLocker waiter(task->lock(), thread); + // No need to check if compilation has completed - just + // rely on the time out. The JVMCI compiler thread will + // recycle the CompileTask. + task->lock()->wait(!Mutex::_no_safepoint_check_flag, BLOCKING_JVMCI_COMPILATION_TIMEOUT); + // If the compilation completes while has_waiter is true then + // this thread is responsible for freeing the task. Otherwise + // the compiler thread will free the task. + task->clear_waiter(); + free_task = task->is_complete(); + } else +#endif { MutexLocker waiter(task->lock(), thread); - + free_task = true; while (!task->is_complete() && !is_compilation_disabled_forever()) { task->lock()->wait(); } } thread->set_blocked_on_compilation(false); - if (is_compilation_disabled_forever()) { - CompileTask::free(task); - return; - } + if (free_task) { + if (is_compilation_disabled_forever()) { + CompileTask::free(task); + return; + } - // It is harmless to check this status without the lock, because - // completion is a stable property (until the task object is recycled). - assert(task->is_complete(), "Compilation should have completed"); - assert(task->code_handle() == NULL, "must be reset"); + // It is harmless to check this status without the lock, because + // completion is a stable property (until the task object is recycled). + assert(task->is_complete(), "Compilation should have completed"); + assert(task->code_handle() == NULL, "must be reset"); - // By convention, the waiter is responsible for recycling a - // blocking CompileTask. Since there is only one waiter ever - // waiting on a CompileTask, we know that no one else will - // be using this CompileTask; we can free it. - CompileTask::free(task); + // By convention, the waiter is responsible for recycling a + // blocking CompileTask. Since there is only one waiter ever + // waiting on a CompileTask, we know that no one else will + // be using this CompileTask; we can free it. + CompileTask::free(task); + } } /** @@ -1676,13 +1715,7 @@ bool should_break = false; int task_level = task->comp_level(); - // Look up matching directives - DirectiveSet* directive = DirectivesStack::getMatchingDirective(task->method(), compiler(task_level)); - - should_break = directive->BreakAtExecuteOption || task->check_break_at_flags(); - if (should_log && !directive->LogOption) { - should_log = false; - } + DirectiveSet* directive; { // create the handle inside it's own block so it can't // accidentally be referenced once the thread transitions to @@ -1691,12 +1724,20 @@ methodHandle method(thread, task->method()); assert(!method->is_native(), "no longer compile natives"); + // Look up matching directives + directive = DirectivesStack::getMatchingDirective(method, compiler(task_level)); + // Save information about this method in case of failure. set_last_compile(thread, method, is_osr, task_level); DTRACE_METHOD_COMPILE_BEGIN_PROBE(method, compiler_name(task_level)); } + should_break = directive->BreakAtExecuteOption || task->check_break_at_flags(); + if (should_log && !directive->LogOption) { + should_log = false; + } + // Allocate a new set of JNI handles. push_jni_handle_block(); Method* target_handle = task->method(); @@ -1716,7 +1757,8 @@ EventCompilation event; JVMCIEnv env(task, system_dictionary_modification_counter); - jvmci->compile_method(target_handle, osr_bci, &env); + methodHandle method(thread, target_handle); + jvmci->compile_method(method, osr_bci, &env); post_compile(thread, task, event, task->code() != NULL, NULL); } else diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/compiler/compileTask.cpp --- a/hotspot/src/share/vm/compiler/compileTask.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/compiler/compileTask.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -47,7 +47,7 @@ } else { task = new CompileTask(); DEBUG_ONLY(_num_allocated_tasks++;) - assert (WhiteBoxAPI || _num_allocated_tasks < 10000, "Leaking compilation tasks?"); + assert (WhiteBoxAPI || JVMCI_ONLY(UseJVMCICompiler ||) _num_allocated_tasks < 10000, "Leaking compilation tasks?"); task->set_next(NULL); task->set_is_free(true); } @@ -90,6 +90,7 @@ _method_holder = JNIHandles::make_global(method->method_holder()->klass_holder()); _osr_bci = osr_bci; _is_blocking = is_blocking; + JVMCI_ONLY(_has_waiter = CompileBroker::compiler(comp_level)->is_jvmci();) _comp_level = comp_level; _num_inlined_bytecodes = 0; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/compiler/compileTask.hpp --- a/hotspot/src/share/vm/compiler/compileTask.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/compiler/compileTask.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -53,6 +53,9 @@ bool _is_complete; bool _is_success; bool _is_blocking; +#if INCLUDE_JVMCI + bool _has_waiter; +#endif int _comp_level; int _num_inlined_bytecodes; nmethodLocker* _code_handle; // holder of eventual result @@ -85,6 +88,10 @@ bool is_complete() const { return _is_complete; } bool is_blocking() const { return _is_blocking; } bool is_success() const { return _is_success; } +#if INCLUDE_JVMCI + bool has_waiter() const { return _has_waiter; } + void clear_waiter() { _has_waiter = false; } +#endif nmethodLocker* code_handle() const { return _code_handle; } void set_code_handle(nmethodLocker* l) { _code_handle = l; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/compiler/compilerDirectives.cpp --- a/hotspot/src/share/vm/compiler/compilerDirectives.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/compiler/compilerDirectives.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -527,12 +527,14 @@ DirectiveSet* DirectivesStack::getMatchingDirective(methodHandle method, AbstractCompiler *comp) { assert(_depth > 0, "Must never be empty"); - CompilerDirectives* dir = _top; - assert(dir != NULL, "Must be initialized"); DirectiveSet* match = NULL; { MutexLockerEx locker(DirectivesStack_lock, Mutex::_no_safepoint_check_flag); + + CompilerDirectives* dir = _top; + assert(dir != NULL, "Must be initialized"); + while (dir != NULL) { if (dir->is_default_directive() || dir->match(method)) { match = dir->get_for(comp); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/compiler/compilerDirectives.hpp --- a/hotspot/src/share/vm/compiler/compilerDirectives.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/compiler/compilerDirectives.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -67,7 +67,7 @@ cflags(VectorizeDebug, bool, false, VectorizeDebug) \ cflags(CloneMapDebug, bool, false, CloneMapDebug) \ cflags(DoReserveCopyInSuperWordDebug, bool, false, DoReserveCopyInSuperWordDebug) \ - NOT_PRODUCT( cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLevel)) \ + cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLevel) \ cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit) #else #define compilerdirectives_c2_flags(cflags) diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/gc/cms/parNewGeneration.cpp --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1148,7 +1148,6 @@ } assert(new_obj != NULL, "just checking"); -#ifndef PRODUCT // This code must come after the CAS test, or it will print incorrect // information. if (TraceScavenge) { @@ -1156,7 +1155,6 @@ is_in_reserved(new_obj) ? "copying" : "tenuring", new_obj->klass()->internal_name(), p2i(old), p2i(new_obj), new_obj->size()); } -#endif if (forward_ptr == NULL) { oop obj_to_push = new_obj; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/gc/cms/parOopClosures.inline.hpp --- a/hotspot/src/share/vm/gc/cms/parOopClosures.inline.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/gc/cms/parOopClosures.inline.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -108,14 +108,11 @@ if (m->is_marked()) { // Contains forwarding pointer. new_obj = ParNewGeneration::real_forwardee(obj); oopDesc::encode_store_heap_oop_not_null(p, new_obj); -#ifndef PRODUCT if (TraceScavenge) { gclog_or_tty->print_cr("{%s %s ( " PTR_FORMAT " ) " PTR_FORMAT " -> " PTR_FORMAT " (%d)}", "forwarded ", new_obj->klass()->internal_name(), p2i(p), p2i((void *)obj), p2i((void *)new_obj), new_obj->size()); } -#endif - } else { size_t obj_sz = obj->size_given_klass(objK); new_obj = _g->copy_to_survivor_space(_par_scan_state, obj, obj_sz, m); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/gc/parallel/psPromotionManager.cpp --- a/hotspot/src/share/vm/gc/parallel/psPromotionManager.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/gc/parallel/psPromotionManager.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -430,7 +430,6 @@ obj = obj->forwardee(); } -#ifndef PRODUCT if (TraceScavenge) { gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " (%d)}", "promotion-failure", @@ -438,7 +437,6 @@ p2i(obj), obj->size()); } -#endif return obj; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/gc/parallel/psPromotionManager.inline.hpp --- a/hotspot/src/share/vm/gc/parallel/psPromotionManager.inline.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/gc/parallel/psPromotionManager.inline.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -260,7 +260,6 @@ new_obj = o->forwardee(); } -#ifndef PRODUCT // This code must come after the CAS test, or it will print incorrect // information. if (TraceScavenge) { @@ -268,7 +267,6 @@ should_scavenge(&new_obj) ? "copying" : "tenuring", new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size()); } -#endif return new_obj; } @@ -285,15 +283,13 @@ ? o->forwardee() : copy_to_survivor_space(o); -#ifndef PRODUCT // This code must come after the CAS test, or it will print incorrect // information. - if (TraceScavenge && o->is_forwarded()) { + if (TraceScavenge && o->is_forwarded()) { gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}", "forwarding", new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size()); } -#endif oopDesc::encode_store_heap_oop_not_null(p, new_obj); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/gc/parallel/psScavenge.inline.hpp --- a/hotspot/src/share/vm/gc/parallel/psScavenge.inline.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/gc/parallel/psScavenge.inline.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -138,7 +138,6 @@ // If the klass has not been dirtied we know that there's // no references into the young gen and we can skip it. -#ifndef PRODUCT if (TraceScavenge) { ResourceMark rm; gclog_or_tty->print_cr("PSScavengeKlassClosure::do_klass " PTR_FORMAT ", %s, dirty: %s", @@ -146,7 +145,6 @@ klass->external_name(), klass->has_modified_oops() ? "true" : "false"); } -#endif if (klass->has_modified_oops()) { // Clean the klass since we're going to scavenge all the metadata. diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/gc/serial/defNewGeneration.cpp --- a/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -134,7 +134,6 @@ void FastScanClosure::do_oop(narrowOop* p) { FastScanClosure::do_oop_work(p); } void KlassScanClosure::do_klass(Klass* klass) { -#ifndef PRODUCT if (TraceScavenge) { ResourceMark rm; gclog_or_tty->print_cr("KlassScanClosure::do_klass " PTR_FORMAT ", %s, dirty: %s", @@ -142,7 +141,6 @@ klass->external_name(), klass->has_modified_oops() ? "true" : "false"); } -#endif // If the klass has not been dirtied we know that there's // no references into the young gen and we can skip it. diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/interpreter/interpreter.cpp --- a/hotspot/src/share/vm/interpreter/interpreter.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/interpreter/interpreter.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -300,7 +300,10 @@ } // Accessor method? - if (m->is_accessor()) { + if (m->is_getter()) { + // TODO: We should have used ::is_accessor above, but fast accessors in Zero expect only getters. + // See CppInterpreter::accessor_entry in cppInterpreter_zero.cpp. This should be fixed in Zero, + // then the call above updated to ::is_accessor assert(m->size_of_parameters() == 1, "fast code for accessors assumes parameter size = 1"); return accessor; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciCodeInstaller.cpp --- a/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -71,62 +71,97 @@ return CompilerToVM::asMethod(hotspot_method); } -VMReg getVMRegFromLocation(oop location, int total_frame_size) { - oop reg = code_Location::reg(location); +VMReg getVMRegFromLocation(Handle location, int total_frame_size, TRAPS) { + if (location.is_null()) { + THROW_NULL(vmSymbols::java_lang_NullPointerException()); + } + + Handle reg = code_Location::reg(location); jint offset = code_Location::offset(location); - if (reg != NULL) { + if (reg.not_null()) { // register jint number = code_Register::number(reg); - VMReg vmReg = CodeInstaller::get_hotspot_reg(number); - assert(offset % 4 == 0, "must be aligned"); - return vmReg->next(offset / 4); + VMReg vmReg = CodeInstaller::get_hotspot_reg(number, CHECK_NULL); + if (offset % 4 == 0) { + return vmReg->next(offset / 4); + } else { + JVMCI_ERROR_NULL("unaligned subregister offset %d in oop map", offset); + } } else { // stack slot - assert(offset % 4 == 0, "must be aligned"); - return VMRegImpl::stack2reg(offset / 4); + if (offset % 4 == 0) { + return VMRegImpl::stack2reg(offset / 4); + } else { + JVMCI_ERROR_NULL("unaligned stack offset %d in oop map", offset); + } } } // creates a HotSpot oop map out of the byte arrays provided by DebugInfo -OopMap* CodeInstaller::create_oop_map(oop debug_info) { - oop reference_map = DebugInfo::referenceMap(debug_info); +OopMap* CodeInstaller::create_oop_map(Handle debug_info, TRAPS) { + Handle reference_map = DebugInfo::referenceMap(debug_info); + if (reference_map.is_null()) { + THROW_NULL(vmSymbols::java_lang_NullPointerException()); + } + if (!reference_map->is_a(HotSpotReferenceMap::klass())) { + JVMCI_ERROR_NULL("unknown reference map: %s", reference_map->klass()->signature_name()); + } if (HotSpotReferenceMap::maxRegisterSize(reference_map) > 16) { _has_wide_vector = true; } OopMap* map = new OopMap(_total_frame_size, _parameter_count); - objArrayOop objects = HotSpotReferenceMap::objects(reference_map); - objArrayOop derivedBase = HotSpotReferenceMap::derivedBase(reference_map); - typeArrayOop sizeInBytes = HotSpotReferenceMap::sizeInBytes(reference_map); + objArrayHandle objects = HotSpotReferenceMap::objects(reference_map); + objArrayHandle derivedBase = HotSpotReferenceMap::derivedBase(reference_map); + typeArrayHandle sizeInBytes = HotSpotReferenceMap::sizeInBytes(reference_map); + if (objects.is_null() || derivedBase.is_null() || sizeInBytes.is_null()) { + THROW_NULL(vmSymbols::java_lang_NullPointerException()); + } + if (objects->length() != derivedBase->length() || objects->length() != sizeInBytes->length()) { + JVMCI_ERROR_NULL("arrays in reference map have different sizes: %d %d %d", objects->length(), derivedBase->length(), sizeInBytes->length()); + } for (int i = 0; i < objects->length(); i++) { - oop location = objects->obj_at(i); - oop baseLocation = derivedBase->obj_at(i); + Handle location = objects->obj_at(i); + Handle baseLocation = derivedBase->obj_at(i); int bytes = sizeInBytes->int_at(i); - VMReg vmReg = getVMRegFromLocation(location, _total_frame_size); - if (baseLocation != NULL) { + VMReg vmReg = getVMRegFromLocation(location, _total_frame_size, CHECK_NULL); + if (baseLocation.not_null()) { // derived oop - assert(bytes == 8, "derived oop can't be compressed"); - VMReg baseReg = getVMRegFromLocation(baseLocation, _total_frame_size); - map->set_derived_oop(vmReg, baseReg); +#ifdef _LP64 + if (bytes == 8) { +#else + if (bytes == 4) { +#endif + VMReg baseReg = getVMRegFromLocation(baseLocation, _total_frame_size, CHECK_NULL); + map->set_derived_oop(vmReg, baseReg); + } else { + JVMCI_ERROR_NULL("invalid derived oop size in ReferenceMap: %d", bytes); + } +#ifdef _LP64 } else if (bytes == 8) { // wide oop map->set_oop(vmReg); - } else { + } else if (bytes == 4) { // narrow oop - assert(bytes == 4, "wrong size"); map->set_narrowoop(vmReg); +#else + } else if (bytes == 4) { + map->set_oop(vmReg); +#endif + } else { + JVMCI_ERROR_NULL("invalid oop size in ReferenceMap: %d", bytes); } } - oop callee_save_info = (oop) DebugInfo::calleeSaveInfo(debug_info); - if (callee_save_info != NULL) { - objArrayOop registers = RegisterSaveLayout::registers(callee_save_info); - typeArrayOop slots = RegisterSaveLayout::slots(callee_save_info); + Handle callee_save_info = (oop) DebugInfo::calleeSaveInfo(debug_info); + if (callee_save_info.not_null()) { + objArrayHandle registers = RegisterSaveLayout::registers(callee_save_info); + typeArrayHandle slots = RegisterSaveLayout::slots(callee_save_info); for (jint i = 0; i < slots->length(); i++) { - oop jvmci_reg = registers->obj_at(i); + Handle jvmci_reg = registers->obj_at(i); jint jvmci_reg_number = code_Register::number(jvmci_reg); - VMReg hotspot_reg = CodeInstaller::get_hotspot_reg(jvmci_reg_number); + VMReg hotspot_reg = CodeInstaller::get_hotspot_reg(jvmci_reg_number, CHECK_NULL); // HotSpot stack slots are 4 bytes jint jvmci_slot = slots->int_at(i); jint hotspot_slot = jvmci_slot * VMRegImpl::slots_per_word; @@ -142,7 +177,7 @@ return map; } -Metadata* CodeInstaller::record_metadata_reference(Handle& constant) { +Metadata* CodeInstaller::record_metadata_reference(Handle constant, TRAPS) { oop obj = HotSpotMetaspaceConstantImpl::metaspaceObject(constant); if (obj->is_a(HotSpotResolvedObjectTypeImpl::klass())) { Klass* klass = java_lang_Class::as_Klass(HotSpotResolvedObjectTypeImpl::javaClass(obj)); @@ -157,16 +192,18 @@ TRACE_jvmci_3("metadata[%d of %d] = %s", index, _oop_recorder->metadata_count(), method->name()->as_C_string()); return method; } else { - fatal("unexpected metadata reference for constant of type %s", obj->klass()->name()->as_C_string()); - return NULL; + JVMCI_ERROR_NULL("unexpected metadata reference for constant of type %s", obj->klass()->signature_name()); } } #ifdef _LP64 -narrowKlass CodeInstaller::record_narrow_metadata_reference(Handle& constant) { +narrowKlass CodeInstaller::record_narrow_metadata_reference(Handle constant, TRAPS) { oop obj = HotSpotMetaspaceConstantImpl::metaspaceObject(constant); assert(HotSpotMetaspaceConstantImpl::compressed(constant), "unexpected uncompressed pointer"); - assert(obj->is_a(HotSpotResolvedObjectTypeImpl::klass()), "unexpected compressed pointer of type %s", obj->klass()->name()->as_C_string()); + + if (!obj->is_a(HotSpotResolvedObjectTypeImpl::klass())) { + JVMCI_ERROR_0("unexpected compressed pointer of type %s", obj->klass()->signature_name()); + } Klass* klass = java_lang_Class::as_Klass(HotSpotResolvedObjectTypeImpl::javaClass(obj)); int index = _oop_recorder->find_index(klass); @@ -175,9 +212,9 @@ } #endif -Location::Type CodeInstaller::get_oop_type(oop value) { - oop lirKind = Value::lirKind(value); - oop platformKind = LIRKind::platformKind(lirKind); +Location::Type CodeInstaller::get_oop_type(Handle value) { + Handle lirKind = Value::lirKind(value); + Handle platformKind = LIRKind::platformKind(lirKind); assert(LIRKind::referenceMask(lirKind) == 1, "unexpected referenceMask"); if (platformKind == word_kind()) { @@ -187,24 +224,29 @@ } } -ScopeValue* CodeInstaller::get_scope_value(oop value, BasicType type, GrowableArray* objects, ScopeValue* &second) { +ScopeValue* CodeInstaller::get_scope_value(Handle value, BasicType type, GrowableArray* objects, ScopeValue* &second, TRAPS) { second = NULL; - if (value == Value::ILLEGAL()) { - assert(type == T_ILLEGAL, "expected legal value"); + if (value.is_null()) { + THROW_NULL(vmSymbols::java_lang_NullPointerException()); + } else if (value == Value::ILLEGAL()) { + if (type != T_ILLEGAL) { + JVMCI_ERROR_NULL("unexpected illegal value, expected %s", basictype_to_str(type)); + } return _illegal_value; } else if (value->is_a(RegisterValue::klass())) { - oop reg = RegisterValue::reg(value); + Handle reg = RegisterValue::reg(value); jint number = code_Register::number(reg); - VMReg hotspotRegister = get_hotspot_reg(number); + VMReg hotspotRegister = get_hotspot_reg(number, CHECK_NULL); if (is_general_purpose_reg(hotspotRegister)) { Location::Type locationType; if (type == T_OBJECT) { locationType = get_oop_type(value); } else if (type == T_LONG) { locationType = Location::lng; + } else if (type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN) { + locationType = Location::int_in_long; } else { - assert(type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN, "unexpected type in cpu register"); - locationType = Location::int_in_long; + JVMCI_ERROR_NULL("unexpected type %s in cpu register", basictype_to_str(type)); } ScopeValue* value = new LocationValue(Location::new_reg_loc(locationType, hotspotRegister)); if (type == T_LONG) { @@ -212,13 +254,14 @@ } return value; } else { - assert(type == T_FLOAT || type == T_DOUBLE, "only float and double expected in xmm register"); Location::Type locationType; if (type == T_FLOAT) { // this seems weird, but the same value is used in c1_LinearScan locationType = Location::normal; + } else if (type == T_DOUBLE) { + locationType = Location::dbl; } else { - locationType = Location::dbl; + JVMCI_ERROR_NULL("unexpected type %s in floating point register", basictype_to_str(type)); } ScopeValue* value = new LocationValue(Location::new_reg_loc(locationType, hotspotRegister)); if (type == T_DOUBLE) { @@ -239,9 +282,10 @@ locationType = Location::lng; } else if (type == T_DOUBLE) { locationType = Location::dbl; + } else if (type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN) { + locationType = Location::normal; } else { - assert(type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN, "unexpected type in stack slot"); - locationType = Location::normal; + JVMCI_ERROR_NULL("unexpected type %s in stack slot", basictype_to_str(type)); } ScopeValue* value = new LocationValue(Location::new_stk_loc(locationType, offset)); if (type == T_DOUBLE || type == T_LONG) { @@ -254,7 +298,10 @@ jlong prim = PrimitiveConstant::primitive(value); return new ConstantLongValue(prim); } else { - assert(type == JVMCIRuntime::kindToBasicType(JavaKind::typeChar(PrimitiveConstant::kind(value))), "primitive constant type doesn't match"); + BasicType constantType = JVMCIRuntime::kindToBasicType(PrimitiveConstant::kind(value), CHECK_NULL); + if (type != constantType) { + JVMCI_ERROR_NULL("primitive constant type doesn't match, expected %s but got %s", basictype_to_str(type), basictype_to_str(constantType)); + } if (type == T_INT || type == T_FLOAT) { jint prim = (jint)PrimitiveConstant::primitive(value); switch (prim) { @@ -264,53 +311,63 @@ case 2: return _int_2_scope_value; default: return new ConstantIntValue(prim); } - } else { - assert(type == T_LONG || type == T_DOUBLE, "unexpected primitive constant type"); + } else if (type == T_LONG || type == T_DOUBLE) { jlong prim = PrimitiveConstant::primitive(value); second = _int_1_scope_value; return new ConstantLongValue(prim); + } else { + JVMCI_ERROR_NULL("unexpected primitive constant type %s", basictype_to_str(type)); } } - } else { - assert(type == T_OBJECT, "unexpected object constant"); - if (value->is_a(NullConstant::klass()) || value->is_a(HotSpotCompressedNullConstant::klass())) { + } else if (value->is_a(NullConstant::klass()) || value->is_a(HotSpotCompressedNullConstant::klass())) { + if (type == T_OBJECT) { return _oop_null_scope_value; } else { - assert(value->is_a(HotSpotObjectConstantImpl::klass()), "unexpected constant type"); + JVMCI_ERROR_NULL("unexpected null constant, expected %s", basictype_to_str(type)); + } + } else if (value->is_a(HotSpotObjectConstantImpl::klass())) { + if (type == T_OBJECT) { oop obj = HotSpotObjectConstantImpl::object(value); - assert(obj != NULL, "null value must be in NullConstant"); + if (obj == NULL) { + JVMCI_ERROR_NULL("null value must be in NullConstant"); + } return new ConstantOopWriteValue(JNIHandles::make_local(obj)); + } else { + JVMCI_ERROR_NULL("unexpected object constant, expected %s", basictype_to_str(type)); } } } else if (value->is_a(VirtualObject::klass())) { - assert(type == T_OBJECT, "unexpected virtual object"); - int id = VirtualObject::id(value); - ScopeValue* object = objects->at(id); - assert(object != NULL, "missing value"); - return object; - } else { - value->klass()->print(); - value->print(); + if (type == T_OBJECT) { + int id = VirtualObject::id(value); + if (0 <= id && id < objects->length()) { + ScopeValue* object = objects->at(id); + if (object != NULL) { + return object; + } + } + JVMCI_ERROR_NULL("unknown virtual object id %d", id); + } else { + JVMCI_ERROR_NULL("unexpected virtual object, expected %s", basictype_to_str(type)); + } } - ShouldNotReachHere(); - return NULL; + + JVMCI_ERROR_NULL("unexpected value in scope: %s", value->klass()->signature_name()) } -void CodeInstaller::record_object_value(ObjectValue* sv, oop value, GrowableArray* objects) { - oop type = VirtualObject::type(value); +void CodeInstaller::record_object_value(ObjectValue* sv, Handle value, GrowableArray* objects, TRAPS) { + Handle type = VirtualObject::type(value); int id = VirtualObject::id(value); oop javaMirror = HotSpotResolvedObjectTypeImpl::javaClass(type); Klass* klass = java_lang_Class::as_Klass(javaMirror); bool isLongArray = klass == Universe::longArrayKlassObj(); - objArrayOop values = VirtualObject::values(value); - objArrayOop slotKinds = VirtualObject::slotKinds(value); + objArrayHandle values = VirtualObject::values(value); + objArrayHandle slotKinds = VirtualObject::slotKinds(value); for (jint i = 0; i < values->length(); i++) { ScopeValue* cur_second = NULL; - oop object = values->obj_at(i); - oop kind = slotKinds->obj_at(i); - BasicType type = JVMCIRuntime::kindToBasicType(JavaKind::typeChar(kind)); - ScopeValue* value = get_scope_value(object, type, objects, cur_second); + Handle object = values->obj_at(i); + BasicType type = JVMCIRuntime::kindToBasicType(slotKinds->obj_at(i), CHECK); + ScopeValue* value = get_scope_value(object, type, objects, cur_second, CHECK); if (isLongArray && cur_second == NULL) { // we're trying to put ints into a long array... this isn't really valid, but it's used for some optimizations. @@ -326,14 +383,19 @@ } } -MonitorValue* CodeInstaller::get_monitor_value(oop value, GrowableArray* objects) { - guarantee(value->is_a(StackLockValue::klass()), "Monitors must be of type StackLockValue"); +MonitorValue* CodeInstaller::get_monitor_value(Handle value, GrowableArray* objects, TRAPS) { + if (value.is_null()) { + THROW_NULL(vmSymbols::java_lang_NullPointerException()); + } + if (!value->is_a(StackLockValue::klass())) { + JVMCI_ERROR_NULL("Monitors must be of type StackLockValue, got %s", value->klass()->signature_name()); + } ScopeValue* second = NULL; - ScopeValue* owner_value = get_scope_value(StackLockValue::owner(value), T_OBJECT, objects, second); + ScopeValue* owner_value = get_scope_value(StackLockValue::owner(value), T_OBJECT, objects, second, CHECK_NULL); assert(second == NULL, "monitor cannot occupy two stack slots"); - ScopeValue* lock_data_value = get_scope_value(StackLockValue::slot(value), T_LONG, objects, second); + ScopeValue* lock_data_value = get_scope_value(StackLockValue::slot(value), T_LONG, objects, second, CHECK_NULL); assert(second == lock_data_value, "monitor is LONG value that occupies two stack slots"); assert(lock_data_value->is_location(), "invalid monitor location"); Location lock_data_loc = ((LocationValue*)lock_data_value)->location(); @@ -346,7 +408,7 @@ return new MonitorValue(owner_value, lock_data_loc, eliminated); } -void CodeInstaller::initialize_dependencies(oop compiled_code, OopRecorder* recorder) { +void CodeInstaller::initialize_dependencies(oop compiled_code, OopRecorder* recorder, TRAPS) { JavaThread* thread = JavaThread::current(); CompilerThread* compilerThread = thread->is_Compiler_thread() ? thread->as_CompilerThread() : NULL; _oop_recorder = recorder; @@ -368,8 +430,7 @@ } else if (assumption->klass() == Assumptions_CallSiteTargetValue::klass()) { assumption_CallSiteTargetValue(assumption); } else { - assumption->print(); - fatal("unexpected Assumption subclass"); + JVMCI_ERROR("unexpected Assumption subclass %s", assumption->klass()->signature_name()); } } } @@ -414,18 +475,19 @@ _size = bytes; } -JVMCIEnv::CodeInstallResult CodeInstaller::gather_metadata(Handle target, Handle& compiled_code, CodeMetadata& metadata) { +JVMCIEnv::CodeInstallResult CodeInstaller::gather_metadata(Handle target, Handle compiled_code, CodeMetadata& metadata, TRAPS) { CodeBuffer buffer("JVMCI Compiler CodeBuffer for Metadata"); jobject compiled_code_obj = JNIHandles::make_local(compiled_code()); - initialize_dependencies(JNIHandles::resolve(compiled_code_obj), NULL); + initialize_dependencies(JNIHandles::resolve(compiled_code_obj), NULL, CHECK_OK); // Get instructions and constants CodeSections early because we need it. _instructions = buffer.insts(); _constants = buffer.consts(); - initialize_fields(target(), JNIHandles::resolve(compiled_code_obj)); - if (!initialize_buffer(buffer)) { - return JVMCIEnv::code_too_large; + initialize_fields(target(), JNIHandles::resolve(compiled_code_obj), CHECK_OK); + JVMCIEnv::CodeInstallResult result = initialize_buffer(buffer, CHECK_OK); + if (result != JVMCIEnv::ok) { + return result; } process_exception_handlers(); @@ -446,18 +508,18 @@ } // constructor used to create a method -JVMCIEnv::CodeInstallResult CodeInstaller::install(JVMCICompiler* compiler, Handle target, Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log) { +JVMCIEnv::CodeInstallResult CodeInstaller::install(JVMCICompiler* compiler, Handle target, Handle compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log, TRAPS) { CodeBuffer buffer("JVMCI Compiler CodeBuffer"); jobject compiled_code_obj = JNIHandles::make_local(compiled_code()); OopRecorder* recorder = new OopRecorder(&_arena, true); - initialize_dependencies(JNIHandles::resolve(compiled_code_obj), recorder); + initialize_dependencies(JNIHandles::resolve(compiled_code_obj), recorder, CHECK_OK); // Get instructions and constants CodeSections early because we need it. _instructions = buffer.insts(); _constants = buffer.consts(); - initialize_fields(target(), JNIHandles::resolve(compiled_code_obj)); - JVMCIEnv::CodeInstallResult result = initialize_buffer(buffer); + initialize_fields(target(), JNIHandles::resolve(compiled_code_obj), CHECK_OK); + JVMCIEnv::CodeInstallResult result = initialize_buffer(buffer, CHECK_OK); if (result != JVMCIEnv::ok) { return result; } @@ -500,7 +562,7 @@ return result; } -void CodeInstaller::initialize_fields(oop target, oop compiled_code) { +void CodeInstaller::initialize_fields(oop target, oop compiled_code, TRAPS) { if (compiled_code->is_a(HotSpotCompiledNmethod::klass())) { Handle hotspotJavaMethod = HotSpotCompiledNmethod::method(compiled_code); methodHandle method = getMethodFromHotSpotMethod(hotspotJavaMethod()); @@ -521,7 +583,9 @@ // Pre-calculate the constants section size. This is required for PC-relative addressing. _data_section_handle = JNIHandles::make_local(HotSpotCompiledCode::dataSection(compiled_code)); - guarantee(HotSpotCompiledCode::dataSectionAlignment(compiled_code) <= _constants->alignment(), "Alignment inside constants section is restricted by alignment of section begin"); + if ((_constants->alignment() % HotSpotCompiledCode::dataSectionAlignment(compiled_code)) != 0) { + JVMCI_ERROR("invalid data section alignment: %d", HotSpotCompiledCode::dataSectionAlignment(compiled_code)); + } _constants_size = data_section()->length(); _data_section_patches_handle = JNIHandles::make_local(HotSpotCompiledCode::dataSectionPatches(compiled_code)); @@ -538,16 +602,18 @@ _word_kind_handle = JNIHandles::make_local(Architecture::wordKind(arch)); } -int CodeInstaller::estimate_stubs_size() { +int CodeInstaller::estimate_stubs_size(TRAPS) { // Estimate the number of static call stubs that might be emitted. int static_call_stubs = 0; objArrayOop sites = this->sites(); for (int i = 0; i < sites->length(); i++) { oop site = sites->obj_at(i); - if (site->is_a(CompilationResult_Mark::klass())) { + if (site != NULL && site->is_a(CompilationResult_Mark::klass())) { oop id_obj = CompilationResult_Mark::id(site); if (id_obj != NULL) { - assert(java_lang_boxing_object::is_instance(id_obj, T_INT), "Integer id expected"); + if (!java_lang_boxing_object::is_instance(id_obj, T_INT)) { + JVMCI_ERROR_0("expected Integer id, got %s", id_obj->klass()->signature_name()); + } jint id = id_obj->int_field(java_lang_boxing_object::value_offset_in_bytes(T_INT)); if (id == INVOKESTATIC || id == INVOKESPECIAL) { static_call_stubs++; @@ -559,7 +625,7 @@ } // perform data and call relocation on the CodeBuffer -JVMCIEnv::CodeInstallResult CodeInstaller::initialize_buffer(CodeBuffer& buffer) { +JVMCIEnv::CodeInstallResult CodeInstaller::initialize_buffer(CodeBuffer& buffer, TRAPS) { HandleMark hm; objArrayHandle sites = this->sites(); int locs_buffer_size = sites->length() * (relocInfo::length_limit + sizeof(relocInfo)); @@ -568,7 +634,7 @@ // stubs. Stubs have extra relocs but they are managed by the stub // section itself so they don't need to be accounted for in the // locs_buffer above. - int stubs_size = estimate_stubs_size(); + int stubs_size = estimate_stubs_size(CHECK_OK); int total_size = round_to(_code_size, buffer.insts()->alignment()) + round_to(_constants_size, buffer.consts()->alignment()) + round_to(stubs_size, buffer.stubs()->alignment()); if (total_size > JVMCINMethodSizeLimit) { @@ -600,19 +666,30 @@ for (int i = 0; i < data_section_patches()->length(); i++) { Handle patch = data_section_patches()->obj_at(i); + if (patch.is_null()) { + THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok); + } Handle reference = CompilationResult_DataPatch::reference(patch); - assert(reference->is_a(CompilationResult_ConstantReference::klass()), "patch in data section must be a ConstantReference"); + if (reference.is_null()) { + THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok); + } + if (!reference->is_a(CompilationResult_ConstantReference::klass())) { + JVMCI_ERROR_OK("invalid patch in data section: %s", reference->klass()->signature_name()); + } Handle constant = CompilationResult_ConstantReference::constant(reference); + if (constant.is_null()) { + THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok); + } address dest = _constants->start() + CompilationResult_Site::pcOffset(patch); if (constant->is_a(HotSpotMetaspaceConstantImpl::klass())) { if (HotSpotMetaspaceConstantImpl::compressed(constant)) { #ifdef _LP64 - *((narrowKlass*) dest) = record_narrow_metadata_reference(constant); + *((narrowKlass*) dest) = record_narrow_metadata_reference(constant, CHECK_OK); #else - fatal("unexpected compressed Klass* in 32-bit mode"); + JVMCI_ERROR_OK("unexpected compressed Klass* in 32-bit mode"); #endif } else { - *((Metadata**) dest) = record_metadata_reference(constant); + *((Metadata**) dest) = record_metadata_reference(constant, CHECK_OK); } } else if (constant->is_a(HotSpotObjectConstantImpl::klass())) { Handle obj = HotSpotObjectConstantImpl::object(constant); @@ -623,48 +700,49 @@ #ifdef _LP64 _constants->relocate(dest, oop_Relocation::spec(oop_index), relocInfo::narrow_oop_in_const); #else - fatal("unexpected compressed oop in 32-bit mode"); + JVMCI_ERROR_OK("unexpected compressed oop in 32-bit mode"); #endif } else { _constants->relocate(dest, oop_Relocation::spec(oop_index)); } } else { - ShouldNotReachHere(); + JVMCI_ERROR_OK("invalid constant in data section: %s", constant->klass()->signature_name()); } } jint last_pc_offset = -1; for (int i = 0; i < sites->length(); i++) { - { - No_Safepoint_Verifier no_safepoint; - oop site = sites->obj_at(i); - jint pc_offset = CompilationResult_Site::pcOffset(site); + Handle site = sites->obj_at(i); + if (site.is_null()) { + THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok); + } + + jint pc_offset = CompilationResult_Site::pcOffset(site); - if (site->is_a(CompilationResult_Call::klass())) { - TRACE_jvmci_4("call at %i", pc_offset); - site_Call(buffer, pc_offset, site); - } else if (site->is_a(CompilationResult_Infopoint::klass())) { - // three reasons for infopoints denote actual safepoints - oop reason = CompilationResult_Infopoint::reason(site); - if (InfopointReason::SAFEPOINT() == reason || InfopointReason::CALL() == reason || InfopointReason::IMPLICIT_EXCEPTION() == reason) { - TRACE_jvmci_4("safepoint at %i", pc_offset); - site_Safepoint(buffer, pc_offset, site); - } else { - // if the infopoint is not an actual safepoint, it must have one of the other reasons - // (safeguard against new safepoint types that require handling above) - assert(InfopointReason::METHOD_START() == reason || InfopointReason::METHOD_END() == reason || InfopointReason::LINE_NUMBER() == reason, ""); - site_Infopoint(buffer, pc_offset, site); - } - } else if (site->is_a(CompilationResult_DataPatch::klass())) { - TRACE_jvmci_4("datapatch at %i", pc_offset); - site_DataPatch(buffer, pc_offset, site); - } else if (site->is_a(CompilationResult_Mark::klass())) { - TRACE_jvmci_4("mark at %i", pc_offset); - site_Mark(buffer, pc_offset, site); - } else { - fatal("unexpected Site subclass"); - } - last_pc_offset = pc_offset; + if (site->is_a(CompilationResult_Call::klass())) { + TRACE_jvmci_4("call at %i", pc_offset); + site_Call(buffer, pc_offset, site, CHECK_OK); + } else if (site->is_a(CompilationResult_Infopoint::klass())) { + // three reasons for infopoints denote actual safepoints + oop reason = CompilationResult_Infopoint::reason(site); + if (InfopointReason::SAFEPOINT() == reason || InfopointReason::CALL() == reason || InfopointReason::IMPLICIT_EXCEPTION() == reason) { + TRACE_jvmci_4("safepoint at %i", pc_offset); + site_Safepoint(buffer, pc_offset, site, CHECK_OK); + } else if (InfopointReason::METHOD_START() == reason || InfopointReason::METHOD_END() == reason || InfopointReason::LINE_NUMBER() == reason) { + site_Infopoint(buffer, pc_offset, site, CHECK_OK); + } else { + JVMCI_ERROR_OK("unknown infopoint reason at %i", pc_offset); + } + } else if (site->is_a(CompilationResult_DataPatch::klass())) { + TRACE_jvmci_4("datapatch at %i", pc_offset); + site_DataPatch(buffer, pc_offset, site, CHECK_OK); + } else if (site->is_a(CompilationResult_Mark::klass())) { + TRACE_jvmci_4("mark at %i", pc_offset); + site_Mark(buffer, pc_offset, site, CHECK_OK); + } else { + JVMCI_ERROR_OK("unexpected site subclass: %s", site->klass()->signature_name()); } + last_pc_offset = pc_offset; + if (CodeInstallSafepointChecks && SafepointSynchronize::do_call_back()) { // this is a hacky way to force a safepoint check but nothing else was jumping out at me. ThreadToNativeFromVM ttnfv(JavaThread::current()); @@ -673,7 +751,6 @@ #ifndef PRODUCT if (comments() != NULL) { - No_Safepoint_Verifier no_safepoint; for (int i = 0; i < comments()->length(); i++) { oop comment = comments()->obj_at(i); assert(comment->is_a(HotSpotCompiledCode_Comment::klass()), "cce"); @@ -759,56 +836,61 @@ return true; } -GrowableArray* CodeInstaller::record_virtual_objects(oop debug_info) { - objArrayOop virtualObjects = DebugInfo::virtualObjectMapping(debug_info); - if (virtualObjects == NULL) { +GrowableArray* CodeInstaller::record_virtual_objects(Handle debug_info, TRAPS) { + objArrayHandle virtualObjects = DebugInfo::virtualObjectMapping(debug_info); + if (virtualObjects.is_null()) { return NULL; } GrowableArray* objects = new GrowableArray(virtualObjects->length(), virtualObjects->length(), NULL); // Create the unique ObjectValues for (int i = 0; i < virtualObjects->length(); i++) { - oop value = virtualObjects->obj_at(i); + Handle value = virtualObjects->obj_at(i); int id = VirtualObject::id(value); - oop type = VirtualObject::type(value); + Handle type = VirtualObject::type(value); oop javaMirror = HotSpotResolvedObjectTypeImpl::javaClass(type); ObjectValue* sv = new ObjectValue(id, new ConstantOopWriteValue(JNIHandles::make_local(Thread::current(), javaMirror))); - assert(objects->at(id) == NULL, "once"); + if (id < 0 || id >= objects->length()) { + JVMCI_ERROR_NULL("virtual object id %d out of bounds", id); + } + if (objects->at(id) != NULL) { + JVMCI_ERROR_NULL("duplicate virtual object id %d", id); + } objects->at_put(id, sv); } // All the values which could be referenced by the VirtualObjects // exist, so now describe all the VirtualObjects themselves. for (int i = 0; i < virtualObjects->length(); i++) { - oop value = virtualObjects->obj_at(i); + Handle value = virtualObjects->obj_at(i); int id = VirtualObject::id(value); - record_object_value(objects->at(id)->as_ObjectValue(), value, objects); + record_object_value(objects->at(id)->as_ObjectValue(), value, objects, CHECK_NULL); } _debug_recorder->dump_object_pool(objects); return objects; } -void CodeInstaller::record_scope(jint pc_offset, oop debug_info) { - oop position = DebugInfo::bytecodePosition(debug_info); - if (position == NULL) { +void CodeInstaller::record_scope(jint pc_offset, Handle debug_info, TRAPS) { + Handle position = DebugInfo::bytecodePosition(debug_info); + if (position.is_null()) { // Stubs do not record scope info, just oop maps return; } - GrowableArray* objectMapping = record_virtual_objects(debug_info); - record_scope(pc_offset, position, objectMapping); + GrowableArray* objectMapping = record_virtual_objects(debug_info, CHECK); + record_scope(pc_offset, position, objectMapping, CHECK); } -void CodeInstaller::record_scope(jint pc_offset, oop position, GrowableArray* objects) { - oop frame = NULL; +void CodeInstaller::record_scope(jint pc_offset, Handle position, GrowableArray* objects, TRAPS) { + Handle frame; if (position->is_a(BytecodeFrame::klass())) { frame = position; } - oop caller_frame = BytecodePosition::caller(position); - if (caller_frame != NULL) { - record_scope(pc_offset, caller_frame, objects); + Handle caller_frame = BytecodePosition::caller(position); + if (caller_frame.not_null()) { + record_scope(pc_offset, caller_frame, objects, CHECK); } - oop hotspot_method = BytecodePosition::method(position); - Method* method = getMethodFromHotSpotMethod(hotspot_method); + Handle hotspot_method = BytecodePosition::method(position); + Method* method = getMethodFromHotSpotMethod(hotspot_method()); jint bci = BytecodePosition::bci(position); if (bci == BytecodeFrame::BEFORE_BCI()) { bci = SynchronizationEntryBCI; @@ -817,13 +899,13 @@ TRACE_jvmci_2("Recording scope pc_offset=%d bci=%d method=%s", pc_offset, bci, method->name_and_sig_as_C_string()); bool reexecute = false; - if (frame != NULL) { + if (frame.not_null()) { if (bci == SynchronizationEntryBCI){ reexecute = false; } else { Bytecodes::Code code = Bytecodes::java_code_at(method, method->bcp_from(bci)); reexecute = bytecode_should_reexecute(code); - if (frame != NULL) { + if (frame.not_null()) { reexecute = (BytecodeFrame::duringCall(frame) == JNI_FALSE); } } @@ -834,15 +916,22 @@ DebugToken* monitors_token = NULL; bool throw_exception = false; - if (frame != NULL) { + if (frame.not_null()) { jint local_count = BytecodeFrame::numLocals(frame); jint expression_count = BytecodeFrame::numStack(frame); jint monitor_count = BytecodeFrame::numLocks(frame); - objArrayOop values = BytecodeFrame::values(frame); - objArrayOop slotKinds = BytecodeFrame::slotKinds(frame); + objArrayHandle values = BytecodeFrame::values(frame); + objArrayHandle slotKinds = BytecodeFrame::slotKinds(frame); - assert(local_count + expression_count + monitor_count == values->length(), "unexpected values length"); - assert(local_count + expression_count == slotKinds->length(), "unexpected slotKinds length"); + if (values.is_null() || slotKinds.is_null()) { + THROW(vmSymbols::java_lang_NullPointerException()); + } + if (local_count + expression_count + monitor_count != values->length()) { + JVMCI_ERROR("unexpected values length %d in scope (%d locals, %d expressions, %d monitors)", values->length(), local_count, expression_count, monitor_count); + } + if (local_count + expression_count != slotKinds->length()) { + JVMCI_ERROR("unexpected slotKinds length %d in scope (%d locals, %d expressions)", slotKinds->length(), local_count, expression_count); + } GrowableArray* locals = local_count > 0 ? new GrowableArray (local_count) : NULL; GrowableArray* expressions = expression_count > 0 ? new GrowableArray (expression_count) : NULL; @@ -853,30 +942,30 @@ for (jint i = 0; i < values->length(); i++) { ScopeValue* second = NULL; - oop value = values->obj_at(i); + Handle value = values->obj_at(i); if (i < local_count) { - oop kind = slotKinds->obj_at(i); - BasicType type = JVMCIRuntime::kindToBasicType(JavaKind::typeChar(kind)); - ScopeValue* first = get_scope_value(value, type, objects, second); + BasicType type = JVMCIRuntime::kindToBasicType(slotKinds->obj_at(i), CHECK); + ScopeValue* first = get_scope_value(value, type, objects, second, CHECK); if (second != NULL) { locals->append(second); } locals->append(first); } else if (i < local_count + expression_count) { - oop kind = slotKinds->obj_at(i); - BasicType type = JVMCIRuntime::kindToBasicType(JavaKind::typeChar(kind)); - ScopeValue* first = get_scope_value(value, type, objects, second); + BasicType type = JVMCIRuntime::kindToBasicType(slotKinds->obj_at(i), CHECK); + ScopeValue* first = get_scope_value(value, type, objects, second, CHECK); if (second != NULL) { expressions->append(second); } expressions->append(first); } else { - monitors->append(get_monitor_value(value, objects)); + MonitorValue *monitor = get_monitor_value(value, objects, CHECK); + monitors->append(monitor); } if (second != NULL) { i++; - assert(i < values->length(), "double-slot value not followed by Value.ILLEGAL"); - assert(values->obj_at(i) == Value::ILLEGAL(), "double-slot value not followed by Value.ILLEGAL"); + if (i >= values->length() || values->obj_at(i) != Value::ILLEGAL()) { + JVMCI_ERROR("double-slot value not followed by Value.ILLEGAL"); + } } } @@ -891,32 +980,37 @@ locals_token, expressions_token, monitors_token); } -void CodeInstaller::site_Safepoint(CodeBuffer& buffer, jint pc_offset, oop site) { - oop debug_info = CompilationResult_Infopoint::debugInfo(site); - assert(debug_info != NULL, "debug info expected"); +void CodeInstaller::site_Safepoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) { + Handle debug_info = CompilationResult_Infopoint::debugInfo(site); + if (debug_info.is_null()) { + JVMCI_ERROR("debug info expected at safepoint at %i", pc_offset); + } // address instruction = _instructions->start() + pc_offset; // jint next_pc_offset = Assembler::locate_next_instruction(instruction) - _instructions->start(); - _debug_recorder->add_safepoint(pc_offset, create_oop_map(debug_info)); - record_scope(pc_offset, debug_info); + OopMap *map = create_oop_map(debug_info, CHECK); + _debug_recorder->add_safepoint(pc_offset, map); + record_scope(pc_offset, debug_info, CHECK); _debug_recorder->end_safepoint(pc_offset); } -void CodeInstaller::site_Infopoint(CodeBuffer& buffer, jint pc_offset, oop site) { - oop debug_info = CompilationResult_Infopoint::debugInfo(site); - assert(debug_info != NULL, "debug info expected"); +void CodeInstaller::site_Infopoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) { + Handle debug_info = CompilationResult_Infopoint::debugInfo(site); + if (debug_info.is_null()) { + JVMCI_ERROR("debug info expected at infopoint at %i", pc_offset); + } _debug_recorder->add_non_safepoint(pc_offset); - record_scope(pc_offset, debug_info); + record_scope(pc_offset, debug_info, CHECK); _debug_recorder->end_non_safepoint(pc_offset); } -void CodeInstaller::site_Call(CodeBuffer& buffer, jint pc_offset, oop site) { - oop target = CompilationResult_Call::target(site); +void CodeInstaller::site_Call(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) { + Handle target = CompilationResult_Call::target(site); InstanceKlass* target_klass = InstanceKlass::cast(target->klass()); - oop hotspot_method = NULL; // JavaMethod - oop foreign_call = NULL; + Handle hotspot_method; // JavaMethod + Handle foreign_call; if (target_klass->is_subclass_of(SystemDictionary::HotSpotForeignCallTarget_klass())) { foreign_call = target; @@ -924,27 +1018,29 @@ hotspot_method = target; } - oop debug_info = CompilationResult_Call::debugInfo(site); + Handle debug_info = CompilationResult_Call::debugInfo(site); - assert(!!hotspot_method ^ !!foreign_call, "Call site needs exactly one type"); + assert(hotspot_method.not_null() ^ foreign_call.not_null(), "Call site needs exactly one type"); NativeInstruction* inst = nativeInstruction_at(_instructions->start() + pc_offset); - jint next_pc_offset = CodeInstaller::pd_next_offset(inst, pc_offset, hotspot_method); + jint next_pc_offset = CodeInstaller::pd_next_offset(inst, pc_offset, hotspot_method, CHECK); - if (debug_info != NULL) { - _debug_recorder->add_safepoint(next_pc_offset, create_oop_map(debug_info)); - record_scope(next_pc_offset, debug_info); + if (debug_info.not_null()) { + OopMap *map = create_oop_map(debug_info, CHECK); + _debug_recorder->add_safepoint(next_pc_offset, map); + record_scope(next_pc_offset, debug_info, CHECK); } - if (foreign_call != NULL) { + if (foreign_call.not_null()) { jlong foreign_call_destination = HotSpotForeignCallTarget::address(foreign_call); - CodeInstaller::pd_relocate_ForeignCall(inst, foreign_call_destination); + CodeInstaller::pd_relocate_ForeignCall(inst, foreign_call_destination, CHECK); } else { // method != NULL - assert(hotspot_method != NULL, "unexpected JavaMethod"); - assert(debug_info != NULL, "debug info expected"); + if (debug_info.is_null()) { + JVMCI_ERROR("debug info expected at call at %i", pc_offset); + } TRACE_jvmci_3("method call"); - CodeInstaller::pd_relocate_JavaMethod(hotspot_method, pc_offset); + CodeInstaller::pd_relocate_JavaMethod(hotspot_method, pc_offset, CHECK); if (_next_call_type == INVOKESTATIC || _next_call_type == INVOKESPECIAL) { // Need a static call stub for transitions from compiled to interpreted. CompiledStaticCall::emit_to_interp_stub(buffer, _instructions->start() + pc_offset); @@ -953,38 +1049,45 @@ _next_call_type = INVOKE_INVALID; - if (debug_info != NULL) { + if (debug_info.not_null()) { _debug_recorder->end_safepoint(next_pc_offset); } } -void CodeInstaller::site_DataPatch(CodeBuffer& buffer, jint pc_offset, oop site) { - oop reference = CompilationResult_DataPatch::reference(site); - if (reference->is_a(CompilationResult_ConstantReference::klass())) { +void CodeInstaller::site_DataPatch(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) { + Handle reference = CompilationResult_DataPatch::reference(site); + if (reference.is_null()) { + THROW(vmSymbols::java_lang_NullPointerException()); + } else if (reference->is_a(CompilationResult_ConstantReference::klass())) { Handle constant = CompilationResult_ConstantReference::constant(reference); - if (constant->is_a(HotSpotObjectConstantImpl::klass())) { - pd_patch_OopConstant(pc_offset, constant); + if (constant.is_null()) { + THROW(vmSymbols::java_lang_NullPointerException()); + } else if (constant->is_a(HotSpotObjectConstantImpl::klass())) { + pd_patch_OopConstant(pc_offset, constant, CHECK); } else if (constant->is_a(HotSpotMetaspaceConstantImpl::klass())) { - pd_patch_MetaspaceConstant(pc_offset, constant); - } else if (constant->is_a(HotSpotSentinelConstant::klass())) { - fatal("sentinel constant unsupported"); + pd_patch_MetaspaceConstant(pc_offset, constant, CHECK); } else { - fatal("unknown constant type in data patch"); + JVMCI_ERROR("unknown constant type in data patch: %s", constant->klass()->signature_name()); } } else if (reference->is_a(CompilationResult_DataSectionReference::klass())) { int data_offset = CompilationResult_DataSectionReference::offset(reference); - assert(0 <= data_offset && data_offset < _constants_size, "data offset 0x%X points outside data section (size 0x%X)", data_offset, _constants_size); - pd_patch_DataSectionReference(pc_offset, data_offset); + if (0 <= data_offset && data_offset < _constants_size) { + pd_patch_DataSectionReference(pc_offset, data_offset); + } else { + JVMCI_ERROR("data offset 0x%X points outside data section (size 0x%X)", data_offset, _constants_size); + } } else { - fatal("unknown data patch type"); + JVMCI_ERROR("unknown data patch type: %s", reference->klass()->signature_name()); } } -void CodeInstaller::site_Mark(CodeBuffer& buffer, jint pc_offset, oop site) { - oop id_obj = CompilationResult_Mark::id(site); +void CodeInstaller::site_Mark(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) { + Handle id_obj = CompilationResult_Mark::id(site); - if (id_obj != NULL) { - assert(java_lang_boxing_object::is_instance(id_obj, T_INT), "Integer id expected"); + if (id_obj.not_null()) { + if (!java_lang_boxing_object::is_instance(id_obj(), T_INT)) { + JVMCI_ERROR("expected Integer id, got %s", id_obj->klass()->signature_name()); + } jint id = id_obj->int_field(java_lang_boxing_object::value_offset_in_bytes(T_INT)); address pc = _instructions->start() + pc_offset; @@ -1017,7 +1120,7 @@ case POLL_FAR: case POLL_RETURN_NEAR: case POLL_RETURN_FAR: - pd_relocate_poll(pc, id); + pd_relocate_poll(pc, id, CHECK); break; case CARD_TABLE_SHIFT: case CARD_TABLE_ADDRESS: @@ -1027,7 +1130,7 @@ case CRC_TABLE_ADDRESS: break; default: - ShouldNotReachHere(); + JVMCI_ERROR("invalid mark id: %d", id); break; } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciCodeInstaller.hpp --- a/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -154,13 +154,13 @@ static ConstantIntValue* _int_2_scope_value; static LocationValue* _illegal_value; - jint pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method); - void pd_patch_OopConstant(int pc_offset, Handle& constant); - void pd_patch_MetaspaceConstant(int pc_offset, Handle& constant); + jint pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS); + void pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS); + void pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS); void pd_patch_DataSectionReference(int pc_offset, int data_offset); - void pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination); - void pd_relocate_JavaMethod(oop method, jint pc_offset); - void pd_relocate_poll(address pc, jint mark); + void pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS); + void pd_relocate_JavaMethod(Handle method, jint pc_offset, TRAPS); + void pd_relocate_poll(address pc, jint mark, TRAPS); objArrayOop sites() { return (objArrayOop) JNIHandles::resolve(_sites_handle); } arrayOop code() { return (arrayOop) JNIHandles::resolve(_code_handle); } @@ -177,33 +177,33 @@ CodeInstaller() : _arena(mtCompiler) {} - JVMCIEnv::CodeInstallResult gather_metadata(Handle target, Handle& compiled_code, CodeMetadata& metadata); - JVMCIEnv::CodeInstallResult install(JVMCICompiler* compiler, Handle target, Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log); + JVMCIEnv::CodeInstallResult gather_metadata(Handle target, Handle compiled_code, CodeMetadata& metadata, TRAPS); + JVMCIEnv::CodeInstallResult install(JVMCICompiler* compiler, Handle target, Handle compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log, TRAPS); static address runtime_call_target_address(oop runtime_call); - static VMReg get_hotspot_reg(jint jvmciRegisterNumber); + static VMReg get_hotspot_reg(jint jvmciRegisterNumber, TRAPS); static bool is_general_purpose_reg(VMReg hotspotRegister); const OopMapSet* oopMapSet() const { return _debug_recorder->_oopmaps; } protected: - Location::Type get_oop_type(oop value); - ScopeValue* get_scope_value(oop value, BasicType type, GrowableArray* objects, ScopeValue* &second); - MonitorValue* get_monitor_value(oop value, GrowableArray* objects); + Location::Type get_oop_type(Handle value); + ScopeValue* get_scope_value(Handle value, BasicType type, GrowableArray* objects, ScopeValue* &second, TRAPS); + MonitorValue* get_monitor_value(Handle value, GrowableArray* objects, TRAPS); - Metadata* record_metadata_reference(Handle& constant); + Metadata* record_metadata_reference(Handle constant, TRAPS); #ifdef _LP64 - narrowKlass record_narrow_metadata_reference(Handle& constant); + narrowKlass record_narrow_metadata_reference(Handle constant, TRAPS); #endif // extract the fields of the CompilationResult - void initialize_fields(oop target, oop target_method); - void initialize_dependencies(oop target_method, OopRecorder* oop_recorder); + void initialize_fields(oop target, oop target_method, TRAPS); + void initialize_dependencies(oop target_method, OopRecorder* oop_recorder, TRAPS); - int estimate_stubs_size(); + int estimate_stubs_size(TRAPS); // perform data and call relocation on the CodeBuffer - JVMCIEnv::CodeInstallResult initialize_buffer(CodeBuffer& buffer); + JVMCIEnv::CodeInstallResult initialize_buffer(CodeBuffer& buffer, TRAPS); void assumption_NoFinalizableSubclass(Handle assumption); void assumption_ConcreteSubtype(Handle assumption); @@ -211,19 +211,19 @@ void assumption_ConcreteMethod(Handle assumption); void assumption_CallSiteTargetValue(Handle assumption); - void site_Safepoint(CodeBuffer& buffer, jint pc_offset, oop site); - void site_Infopoint(CodeBuffer& buffer, jint pc_offset, oop site); - void site_Call(CodeBuffer& buffer, jint pc_offset, oop site); - void site_DataPatch(CodeBuffer& buffer, jint pc_offset, oop site); - void site_Mark(CodeBuffer& buffer, jint pc_offset, oop site); + void site_Safepoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS); + void site_Infopoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS); + void site_Call(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS); + void site_DataPatch(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS); + void site_Mark(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS); - OopMap* create_oop_map(oop debug_info); + OopMap* create_oop_map(Handle debug_info, TRAPS); - void record_scope(jint pc_offset, oop debug_info); - void record_scope(jint pc_offset, oop code_pos, GrowableArray* objects); - void record_object_value(ObjectValue* sv, oop value, GrowableArray* objects); + void record_scope(jint pc_offset, Handle debug_info, TRAPS); + void record_scope(jint pc_offset, Handle code_pos, GrowableArray* objects, TRAPS); + void record_object_value(ObjectValue* sv, Handle value, GrowableArray* objects, TRAPS); - GrowableArray* record_virtual_objects(oop debug_info); + GrowableArray* record_virtual_objects(Handle debug_info, TRAPS); void process_exception_handlers(); int estimateStubSpace(int static_call_stubs); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciCompiler.cpp --- a/hotspot/src/share/vm/jvmci/jvmciCompiler.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciCompiler.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -112,7 +112,7 @@ _bootstrapping = false; } -void JVMCICompiler::compile_method(methodHandle method, int entry_bci, JVMCIEnv* env) { +void JVMCICompiler::compile_method(const methodHandle& method, int entry_bci, JVMCIEnv* env) { JVMCI_EXCEPTION_CONTEXT bool is_osr = entry_bci != InvocationEntryBci; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciCompiler.hpp --- a/hotspot/src/share/vm/jvmci/jvmciCompiler.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciCompiler.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -71,7 +71,7 @@ // Compilation entry point for methods virtual void compile_method(ciEnv* env, ciMethod* target, int entry_bci, DirectiveSet* directive); - void compile_method(methodHandle target, int entry_bci, JVMCIEnv* env); + void compile_method(const methodHandle& target, int entry_bci, JVMCIEnv* env); virtual bool is_trivial(Method* method); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciCompilerToVM.cpp --- a/hotspot/src/share/vm/jvmci/jvmciCompilerToVM.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciCompilerToVM.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -670,7 +670,7 @@ TraceTime install_time("installCode", JVMCICompiler::codeInstallTimer()); CodeInstaller installer; - JVMCIEnv::CodeInstallResult result = installer.install(compiler, target_handle, compiled_code_handle, cb, installed_code_handle, speculation_log_handle); + JVMCIEnv::CodeInstallResult result = installer.install(compiler, target_handle, compiled_code_handle, cb, installed_code_handle, speculation_log_handle, CHECK_0); if (PrintCodeCacheOnCompilation) { stringStream s; @@ -690,6 +690,7 @@ assert(installed_code_handle->is_a(InstalledCode::klass()), "wrong type"); CompilerToVM::invalidate_installed_code(installed_code_handle, CHECK_0); InstalledCode::set_address(installed_code_handle, (jlong) cb); + InstalledCode::set_version(installed_code_handle, InstalledCode::version(installed_code_handle) + 1); if (cb->is_nmethod()) { InstalledCode::set_entryPoint(installed_code_handle, (jlong) cb->as_nmethod_or_null()->verified_entry_point()); } else { @@ -726,7 +727,7 @@ CodeBlob *cb = NULL; CodeInstaller installer; - JVMCIEnv::CodeInstallResult result = installer.gather_metadata(target_handle, compiled_code_handle, code_metadata); //cb, pc_descs, nr_pc_descs, scopes_descs, scopes_size, reloc_buffer); + JVMCIEnv::CodeInstallResult result = installer.gather_metadata(target_handle, compiled_code_handle, code_metadata, CHECK_0); //cb, pc_descs, nr_pc_descs, scopes_descs, scopes_size, reloc_buffer); if (result != JVMCIEnv::ok) { return result; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciCompilerToVM.hpp diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciEnv.cpp --- a/hotspot/src/share/vm/jvmci/jvmciEnv.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciEnv.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -161,7 +161,7 @@ } // ------------------------------------------------------------------ -KlassHandle JVMCIEnv::get_klass_by_name(KlassHandle& accessing_klass, +KlassHandle JVMCIEnv::get_klass_by_name(KlassHandle accessing_klass, Symbol* klass_name, bool require_local) { ResourceMark rm; @@ -177,7 +177,7 @@ KlassHandle JVMCIEnv::get_klass_by_index_impl(const constantPoolHandle& cpool, int index, bool& is_accessible, - KlassHandle& accessor) { + KlassHandle accessor) { JVMCI_EXCEPTION_CONTEXT; KlassHandle klass (THREAD, ConstantPool::klass_at_if_loaded(cpool, index)); Symbol* klass_name = NULL; @@ -218,7 +218,7 @@ KlassHandle JVMCIEnv::get_klass_by_index(const constantPoolHandle& cpool, int index, bool& is_accessible, - KlassHandle& accessor) { + KlassHandle accessor) { ResourceMark rm; KlassHandle result = get_klass_by_index_impl(cpool, index, is_accessible, accessor); return result; @@ -229,7 +229,7 @@ // // Implementation note: the results of field lookups are cached // in the accessor klass. -void JVMCIEnv::get_field_by_index_impl(instanceKlassHandle& klass, fieldDescriptor& field_desc, +void JVMCIEnv::get_field_by_index_impl(instanceKlassHandle klass, fieldDescriptor& field_desc, int index) { JVMCI_EXCEPTION_CONTEXT; @@ -270,7 +270,7 @@ // ------------------------------------------------------------------ // Get a field by index from a klass's constant pool. -void JVMCIEnv::get_field_by_index(instanceKlassHandle& accessor, fieldDescriptor& fd, int index) { +void JVMCIEnv::get_field_by_index(instanceKlassHandle accessor, fieldDescriptor& fd, int index) { ResourceMark rm; return get_field_by_index_impl(accessor, fd, index); } @@ -278,8 +278,8 @@ // ------------------------------------------------------------------ // Perform an appropriate method lookup based on accessor, holder, // name, signature, and bytecode. -methodHandle JVMCIEnv::lookup_method(instanceKlassHandle& h_accessor, - instanceKlassHandle& h_holder, +methodHandle JVMCIEnv::lookup_method(instanceKlassHandle h_accessor, + instanceKlassHandle h_holder, Symbol* name, Symbol* sig, Bytecodes::Code bc) { @@ -314,7 +314,7 @@ // ------------------------------------------------------------------ methodHandle JVMCIEnv::get_method_by_index_impl(const constantPoolHandle& cpool, int index, Bytecodes::Code bc, - instanceKlassHandle& accessor) { + instanceKlassHandle accessor) { if (bc == Bytecodes::_invokedynamic) { ConstantPoolCacheEntry* cpce = cpool->invokedynamic_cp_cache_entry_at(index); bool is_resolved = !cpce->is_f1_null(); @@ -379,7 +379,7 @@ } // ------------------------------------------------------------------ -instanceKlassHandle JVMCIEnv::get_instance_klass_for_declared_method_holder(KlassHandle& method_holder) { +instanceKlassHandle JVMCIEnv::get_instance_klass_for_declared_method_holder(KlassHandle method_holder) { // For the case of .clone(), the method holder can be an ArrayKlass* // instead of an InstanceKlass*. For that case simply pretend that the // declared holder is Object.clone since that's where the call will bottom out. @@ -397,7 +397,7 @@ // ------------------------------------------------------------------ methodHandle JVMCIEnv::get_method_by_index(const constantPoolHandle& cpool, int index, Bytecodes::Code bc, - instanceKlassHandle& accessor) { + instanceKlassHandle accessor) { ResourceMark rm; return get_method_by_index_impl(cpool, index, bc, accessor); } @@ -452,7 +452,7 @@ // ------------------------------------------------------------------ JVMCIEnv::CodeInstallResult JVMCIEnv::register_method( - methodHandle& method, + const methodHandle& method, nmethod*& nm, int entry_bci, CodeOffsets* offsets, diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciEnv.hpp --- a/hotspot/src/share/vm/jvmci/jvmciEnv.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciEnv.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -78,7 +78,7 @@ // The CI treats a klass as loaded if it is consistently defined in // another loader, even if it hasn't yet been loaded in all loaders // that could potentially see it via delegation. - static KlassHandle get_klass_by_name(KlassHandle& accessing_klass, + static KlassHandle get_klass_by_name(KlassHandle accessing_klass, Symbol* klass_name, bool require_local); @@ -86,12 +86,12 @@ static KlassHandle get_klass_by_index(const constantPoolHandle& cpool, int klass_index, bool& is_accessible, - KlassHandle& loading_klass); - static void get_field_by_index(instanceKlassHandle& loading_klass, fieldDescriptor& fd, + KlassHandle loading_klass); + static void get_field_by_index(instanceKlassHandle loading_klass, fieldDescriptor& fd, int field_index); static methodHandle get_method_by_index(const constantPoolHandle& cpool, int method_index, Bytecodes::Code bc, - instanceKlassHandle& loading_klass); + instanceKlassHandle loading_klass); JVMCIEnv(CompileTask* task, int system_dictionary_modification_counter); @@ -112,17 +112,17 @@ static KlassHandle get_klass_by_index_impl(const constantPoolHandle& cpool, int klass_index, bool& is_accessible, - KlassHandle& loading_klass); - static void get_field_by_index_impl(instanceKlassHandle& loading_klass, fieldDescriptor& fd, + KlassHandle loading_klass); + static void get_field_by_index_impl(instanceKlassHandle loading_klass, fieldDescriptor& fd, int field_index); static methodHandle get_method_by_index_impl(const constantPoolHandle& cpool, int method_index, Bytecodes::Code bc, - instanceKlassHandle& loading_klass); + instanceKlassHandle loading_klass); // Helper methods static bool check_klass_accessibility(KlassHandle accessing_klass, KlassHandle resolved_klass); - static methodHandle lookup_method(instanceKlassHandle& accessor, - instanceKlassHandle& holder, + static methodHandle lookup_method(instanceKlassHandle accessor, + instanceKlassHandle holder, Symbol* name, Symbol* sig, Bytecodes::Code bc); @@ -142,7 +142,7 @@ // Register the result of a compilation. static JVMCIEnv::CodeInstallResult register_method( - methodHandle& target, + const methodHandle& target, nmethod*& nm, int entry_bci, CodeOffsets* offsets, @@ -166,7 +166,7 @@ // InstanceKlass*. This is needed since the holder of a method in // the bytecodes could be an array type. Basically this converts // array types into java/lang/Object and other types stay as they are. - static instanceKlassHandle get_instance_klass_for_declared_method_holder(KlassHandle& klass); + static instanceKlassHandle get_instance_klass_for_declared_method_holder(KlassHandle klass); }; #endif // SHARE_VM_JVMCI_JVMCIENV_HPP diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciJavaClasses.cpp --- a/hotspot/src/share/vm/jvmci/jvmciJavaClasses.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciJavaClasses.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -30,7 +30,7 @@ // This function is similar to javaClasses.cpp, it computes the field offset of a (static or instance) field. // It looks up the name and signature symbols without creating new ones, all the symbols of these classes need to be already loaded. -void compute_offset(int &dest_offset, Klass* klass, const char* name, const char* signature, bool static_field) { +void compute_offset(int &dest_offset, Klass* klass, const char* name, const char* signature, bool static_field, TRAPS) { InstanceKlass* ik = InstanceKlass::cast(klass); Symbol* name_symbol = SymbolTable::probe(name, (int)strlen(name)); Symbol* signature_symbol = SymbolTable::probe(signature, (int)strlen(signature)); @@ -49,6 +49,11 @@ guarantee(fd.is_static() == static_field, "static/instance mismatch"); dest_offset = fd.offset(); assert(dest_offset != 0, "must be valid offset"); + if (static_field) { + // Must ensure classes for static fields are initialized as the + // accessor itself does not include a class initialization check. + ik->initialize(CHECK); + } } // This piece of macro magic creates the contents of the jvmci_compute_offsets method that initializes the field indices of all the access classes. @@ -57,7 +62,7 @@ #define END_CLASS } -#define FIELD(klass, name, signature, static_field) compute_offset(klass::_##name##_offset, k, #name, signature, static_field); +#define FIELD(klass, name, signature, static_field) compute_offset(klass::_##name##_offset, k, #name, signature, static_field, CHECK); #define CHAR_FIELD(klass, name) FIELD(klass, name, "C", false) #define INT_FIELD(klass, name) FIELD(klass, name, "I", false) #define BOOLEAN_FIELD(klass, name) FIELD(klass, name, "Z", false) @@ -69,7 +74,7 @@ #define STATIC_BOOLEAN_FIELD(klass, name) FIELD(klass, name, "Z", true) -void JVMCIJavaClasses::compute_offsets() { +void JVMCIJavaClasses::compute_offsets(TRAPS) { COMPILER_CLASSES_DO(START_CLASS, END_CLASS, CHAR_FIELD, INT_FIELD, BOOLEAN_FIELD, LONG_FIELD, FLOAT_FIELD, OOP_FIELD, OOP_FIELD, OOP_FIELD, STATIC_OOP_FIELD, STATIC_OOP_FIELD, STATIC_INT_FIELD, STATIC_BOOLEAN_FIELD) } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciJavaClasses.hpp --- a/hotspot/src/share/vm/jvmci/jvmciJavaClasses.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciJavaClasses.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -29,7 +29,7 @@ class JVMCIJavaClasses : AllStatic { public: - static void compute_offsets(); + static void compute_offsets(TRAPS); }; /* This macro defines the structure of the CompilationResult - classes. @@ -306,7 +306,7 @@ assert(obj->is_a(SystemDictionary::name##_klass()), "wrong class, " #name " expected, found %s", obj->klass()->external_name()); \ assert(offset != 0, "must be valid offset"); \ } \ - static void compute_offsets(); \ + static void compute_offsets(TRAPS); \ public: \ static InstanceKlass* klass() { return SystemDictionary::name##_klass(); } @@ -315,10 +315,10 @@ #define FIELD(name, type, accessor, cast) \ static int _##name##_offset; \ static type name(oop obj) { check(obj, #name, _##name##_offset); return cast obj->accessor(_##name##_offset); } \ - static type name(Handle& obj) { check(obj(), #name, _##name##_offset); return cast obj->accessor(_##name##_offset); } \ + static type name(Handle obj) { check(obj(), #name, _##name##_offset); return cast obj->accessor(_##name##_offset); } \ static type name(jobject obj) { check(JNIHandles::resolve(obj), #name, _##name##_offset); return cast JNIHandles::resolve(obj)->accessor(_##name##_offset); } \ static void set_##name(oop obj, type x) { check(obj, #name, _##name##_offset); obj->accessor##_put(_##name##_offset, x); } \ - static void set_##name(Handle& obj, type x) { check(obj(), #name, _##name##_offset); obj->accessor##_put(_##name##_offset, x); } \ + static void set_##name(Handle obj, type x) { check(obj(), #name, _##name##_offset); obj->accessor##_put(_##name##_offset, x); } \ static void set_##name(jobject obj, type x) { check(JNIHandles::resolve(obj), #name, _##name##_offset); JNIHandles::resolve(obj)->accessor##_put(_##name##_offset, x); } #define EMPTY_CAST @@ -392,6 +392,6 @@ #undef STATIC_BOOLEAN_FIELD #undef EMPTY_CAST -void compute_offset(int &dest_offset, Klass* klass, const char* name, const char* signature, bool static_field); +void compute_offset(int &dest_offset, Klass* klass, const char* name, const char* signature, bool static_field, TRAPS); #endif // SHARE_VM_JVMCI_JVMCIJAVACLASSES_HPP diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciRuntime.cpp --- a/hotspot/src/share/vm/jvmci/jvmciRuntime.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciRuntime.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -59,7 +59,11 @@ static const char* OPTION_PREFIX = "jvmci.option."; static const size_t OPTION_PREFIX_LEN = strlen(OPTION_PREFIX); -BasicType JVMCIRuntime::kindToBasicType(jchar ch) { +BasicType JVMCIRuntime::kindToBasicType(Handle kind, TRAPS) { + if (kind.is_null()) { + THROW_(vmSymbols::java_lang_NullPointerException(), T_ILLEGAL); + } + jchar ch = JavaKind::typeChar(kind); switch(ch) { case 'z': return T_BOOLEAN; case 'b': return T_BYTE; @@ -72,10 +76,8 @@ case 'a': return T_OBJECT; case '-': return T_ILLEGAL; default: - fatal("unexpected Kind: %c", ch); - break; + JVMCI_ERROR_(T_ILLEGAL, "unexpected Kind: %c", ch); } - return T_ILLEGAL; } // Simple helper to see if the caller of a runtime stub which @@ -718,7 +720,7 @@ if (JVMCIRuntime::_well_known_classes_initialized == false) { SystemDictionary::WKID scan = SystemDictionary::FIRST_JVMCI_WKID; SystemDictionary::initialize_wk_klasses_through(SystemDictionary::LAST_JVMCI_WKID, scan, CHECK); - JVMCIJavaClasses::compute_offsets(); + JVMCIJavaClasses::compute_offsets(CHECK); JVMCIRuntime::_well_known_classes_initialized = true; } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/jvmciRuntime.hpp --- a/hotspot/src/share/vm/jvmci/jvmciRuntime.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/jvmciRuntime.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -29,6 +29,17 @@ #include "runtime/arguments.hpp" #include "runtime/deoptimization.hpp" +#define JVMCI_ERROR(...) \ + { Exceptions::fthrow(THREAD_AND_LOCATION, vmSymbols::jdk_vm_ci_common_JVMCIError(), __VA_ARGS__); return; } + +#define JVMCI_ERROR_(ret, ...) \ + { Exceptions::fthrow(THREAD_AND_LOCATION, vmSymbols::jdk_vm_ci_common_JVMCIError(), __VA_ARGS__); return ret; } + +#define JVMCI_ERROR_0(...) JVMCI_ERROR_(0, __VA_ARGS__) +#define JVMCI_ERROR_NULL(...) JVMCI_ERROR_(NULL, __VA_ARGS__) +#define JVMCI_ERROR_OK(...) JVMCI_ERROR_(JVMCIEnv::ok, __VA_ARGS__) +#define CHECK_OK CHECK_(JVMCIEnv::ok) + class ParseClosure : public StackObj { int _lineNo; char* _filename; @@ -171,7 +182,7 @@ } \ (void)(0 - static BasicType kindToBasicType(jchar ch); + static BasicType kindToBasicType(Handle kind, TRAPS); // The following routines are all called from compiled JVMCI code diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/jvmci/vmSymbols_jvmci.hpp --- a/hotspot/src/share/vm/jvmci/vmSymbols_jvmci.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/jvmci/vmSymbols_jvmci.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -86,6 +86,7 @@ template(jdk_vm_ci_code_VirtualObject, "jdk/vm/ci/code/VirtualObject") \ template(jdk_vm_ci_code_RegisterSaveLayout, "jdk/vm/ci/code/RegisterSaveLayout") \ template(jdk_vm_ci_code_InvalidInstalledCodeException, "jdk/vm/ci/code/InvalidInstalledCodeException") \ + template(jdk_vm_ci_common_JVMCIError, "jdk/vm/ci/common/JVMCIError") \ template(compileMethod_name, "compileMethod") \ template(compileMethod_signature, "(Ljdk/vm/ci/hotspot/HotSpotResolvedJavaMethod;IJI)V") \ template(fromMetaspace_name, "fromMetaspace") \ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/memory/heap.cpp --- a/hotspot/src/share/vm/memory/heap.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/memory/heap.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -47,7 +47,10 @@ _freelist_segments = 0; _freelist_length = 0; _max_allocated_capacity = 0; - _was_full = false; + _blob_count = 0; + _nmethod_count = 0; + _adapter_count = 0; + _full_count = 0; } @@ -185,6 +188,7 @@ assert(!block->free(), "must be marked free"); DEBUG_ONLY(memset((void*)block->allocated_space(), badCodeHeapNewVal, instance_size)); _max_allocated_capacity = MAX2(_max_allocated_capacity, allocated_capacity()); + _blob_count++; return block->allocated_space(); } @@ -198,6 +202,7 @@ _next_segment += number_of_segments; DEBUG_ONLY(memset((void *)b->allocated_space(), badCodeHeapNewVal, instance_size)); _max_allocated_capacity = MAX2(_max_allocated_capacity, allocated_capacity()); + _blob_count++; return b->allocated_space(); } else { return NULL; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/memory/heap.hpp --- a/hotspot/src/share/vm/memory/heap.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/memory/heap.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -100,7 +100,11 @@ const char* _name; // Name of the CodeHeap const int _code_blob_type; // CodeBlobType it contains - bool _was_full; // True if the code heap was full + int _blob_count; // Number of CodeBlobs + int _nmethod_count; // Number of nmethods + int _adapter_count; // Number of adapters + int _full_count; // Number of times the code heap was full + enum { free_sentinel = 0xFF }; @@ -179,8 +183,13 @@ // Debugging / Profiling const char* name() const { return _name; } - bool was_full() { return _was_full; } - void report_full() { _was_full = true; } + int blob_count() { return _blob_count; } + int nmethod_count() { return _nmethod_count; } + void set_nmethod_count(int count) { _nmethod_count = count; } + int adapter_count() { return _adapter_count; } + void set_adapter_count(int count) { _adapter_count = count; } + int full_count() { return _full_count; } + void report_full() { _full_count++; } private: size_t heap_unallocated_capacity() const; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/oops/instanceKlass.cpp --- a/hotspot/src/share/vm/oops/instanceKlass.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/oops/instanceKlass.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -27,6 +27,7 @@ #include "classfile/systemDictionary.hpp" #include "classfile/verifier.hpp" #include "classfile/vmSymbols.hpp" +#include "code/dependencyContext.hpp" #include "compiler/compileBroker.hpp" #include "gc/shared/collectedHeap.inline.hpp" #include "gc/shared/specialized_oop_closures.hpp" @@ -203,7 +204,6 @@ int iksize = InstanceKlass::size(vtable_len, itable_len, nonstatic_oop_map_size, access_flags.is_interface(), is_anonymous); - set_vtable_length(vtable_len); set_itable_length(itable_len); set_static_field_size(static_field_size); @@ -232,7 +232,7 @@ set_static_oop_field_count(0); set_nonstatic_field_size(0); set_is_marked_dependent(false); - set_has_unloaded_dependent(false); + _dep_context = DependencyContext::EMPTY; set_init_state(InstanceKlass::allocated); set_init_thread(NULL); set_reference_type(rt); @@ -246,7 +246,6 @@ set_annotations(NULL); set_jvmti_cached_class_field_map(NULL); set_initial_method_idnum(0); - _dependencies = NULL; set_jvmti_cached_class_field_map(NULL); set_cached_class_file(NULL); set_initial_method_idnum(0); @@ -1854,200 +1853,30 @@ return id; } -int nmethodBucket::decrement() { - return Atomic::add(-1, (volatile int *)&_count); -} - -// -// Walk the list of dependent nmethods searching for nmethods which -// are dependent on the changes that were passed in and mark them for -// deoptimization. Returns the number of nmethods found. -// -int nmethodBucket::mark_dependent_nmethods(nmethodBucket* deps, DepChange& changes) { - assert_locked_or_safepoint(CodeCache_lock); - int found = 0; - for (nmethodBucket* b = deps; b != NULL; b = b->next()) { - nmethod* nm = b->get_nmethod(); - // since dependencies aren't removed until an nmethod becomes a zombie, - // the dependency list may contain nmethods which aren't alive. - if (b->count() > 0 && nm->is_alive() && !nm->is_marked_for_deoptimization() && nm->check_dependency_on(changes)) { - if (TraceDependencies) { - ResourceMark rm; - tty->print_cr("Marked for deoptimization"); - changes.print(); - nm->print(); - nm->print_dependencies(); - } - nm->mark_for_deoptimization(); - found++; - } - } - return found; -} - -// -// Add an nmethodBucket to the list of dependencies for this nmethod. -// It's possible that an nmethod has multiple dependencies on this klass -// so a count is kept for each bucket to guarantee that creation and -// deletion of dependencies is consistent. Returns new head of the list. -// -nmethodBucket* nmethodBucket::add_dependent_nmethod(nmethodBucket* deps, nmethod* nm) { - assert_locked_or_safepoint(CodeCache_lock); - for (nmethodBucket* b = deps; b != NULL; b = b->next()) { - if (nm == b->get_nmethod()) { - b->increment(); - return deps; - } - } - return new nmethodBucket(nm, deps); +inline DependencyContext InstanceKlass::dependencies() { + DependencyContext dep_context(&_dep_context); + return dep_context; } -// -// Decrement count of the nmethod in the dependency list and remove -// the bucket completely when the count goes to 0. This method must -// find a corresponding bucket otherwise there's a bug in the -// recording of dependencies. Returns true if the bucket was deleted, -// or marked ready for reclaimation. -bool nmethodBucket::remove_dependent_nmethod(nmethodBucket** deps, nmethod* nm, bool delete_immediately) { - assert_locked_or_safepoint(CodeCache_lock); - - nmethodBucket* first = *deps; - nmethodBucket* last = NULL; - - for (nmethodBucket* b = first; b != NULL; b = b->next()) { - if (nm == b->get_nmethod()) { - int val = b->decrement(); - guarantee(val >= 0, "Underflow: %d", val); - if (val == 0) { - if (delete_immediately) { - if (last == NULL) { - *deps = b->next(); - } else { - last->set_next(b->next()); - } - delete b; - } - } - return true; - } - last = b; - } - -#ifdef ASSERT - tty->print_raw_cr("### can't find dependent nmethod"); - nm->print(); -#endif // ASSERT - ShouldNotReachHere(); - return false; -} - -// Convenience overload, for callers that don't want to delete the nmethodBucket entry. -bool nmethodBucket::remove_dependent_nmethod(nmethodBucket* deps, nmethod* nm) { - nmethodBucket** deps_addr = &deps; - return remove_dependent_nmethod(deps_addr, nm, false /* Don't delete */); -} - -// -// Reclaim all unused buckets. Returns new head of the list. -// -nmethodBucket* nmethodBucket::clean_dependent_nmethods(nmethodBucket* deps) { - nmethodBucket* first = deps; - nmethodBucket* last = NULL; - nmethodBucket* b = first; - - while (b != NULL) { - assert(b->count() >= 0, "bucket count: %d", b->count()); - nmethodBucket* next = b->next(); - if (b->count() == 0) { - if (last == NULL) { - first = next; - } else { - last->set_next(next); - } - delete b; - // last stays the same. - } else { - last = b; - } - b = next; - } - return first; -} - -#ifndef PRODUCT -void nmethodBucket::print_dependent_nmethods(nmethodBucket* deps, bool verbose) { - int idx = 0; - for (nmethodBucket* b = deps; b != NULL; b = b->next()) { - nmethod* nm = b->get_nmethod(); - tty->print("[%d] count=%d { ", idx++, b->count()); - if (!verbose) { - nm->print_on(tty, "nmethod"); - tty->print_cr(" } "); - } else { - nm->print(); - nm->print_dependencies(); - tty->print_cr("--- } "); - } - } -} - -bool nmethodBucket::is_dependent_nmethod(nmethodBucket* deps, nmethod* nm) { - for (nmethodBucket* b = deps; b != NULL; b = b->next()) { - if (nm == b->get_nmethod()) { -#ifdef ASSERT - int count = b->count(); - assert(count >= 0, "count shouldn't be negative: %d", count); -#endif - return true; - } - } - return false; -} -#endif //PRODUCT - int InstanceKlass::mark_dependent_nmethods(DepChange& changes) { - assert_locked_or_safepoint(CodeCache_lock); - return nmethodBucket::mark_dependent_nmethods(_dependencies, changes); -} - -void InstanceKlass::clean_dependent_nmethods() { - assert_locked_or_safepoint(CodeCache_lock); - - if (has_unloaded_dependent()) { - _dependencies = nmethodBucket::clean_dependent_nmethods(_dependencies); - set_has_unloaded_dependent(false); - } -#ifdef ASSERT - else { - // Verification - for (nmethodBucket* b = _dependencies; b != NULL; b = b->next()) { - assert(b->count() >= 0, "bucket count: %d", b->count()); - assert(b->count() != 0, "empty buckets need to be cleaned"); - } - } -#endif + return dependencies().mark_dependent_nmethods(changes); } void InstanceKlass::add_dependent_nmethod(nmethod* nm) { - assert_locked_or_safepoint(CodeCache_lock); - _dependencies = nmethodBucket::add_dependent_nmethod(_dependencies, nm); + dependencies().add_dependent_nmethod(nm); } void InstanceKlass::remove_dependent_nmethod(nmethod* nm, bool delete_immediately) { - assert_locked_or_safepoint(CodeCache_lock); - - if (nmethodBucket::remove_dependent_nmethod(&_dependencies, nm, delete_immediately)) { - set_has_unloaded_dependent(true); - } + dependencies().remove_dependent_nmethod(nm, delete_immediately); } #ifndef PRODUCT void InstanceKlass::print_dependent_nmethods(bool verbose) { - nmethodBucket::print_dependent_nmethods(_dependencies, verbose); + dependencies().print_dependent_nmethods(verbose); } bool InstanceKlass::is_dependent_nmethod(nmethod* nm) { - return nmethodBucket::is_dependent_nmethod(_dependencies, nm); + return dependencies().is_dependent_nmethod(nm); } #endif //PRODUCT @@ -2055,7 +1884,9 @@ clean_implementors_list(is_alive); clean_method_data(is_alive); - clean_dependent_nmethods(); + // Since GC iterates InstanceKlasses sequentially, it is safe to remove stale entries here. + DependencyContext dep_context(&_dep_context); + dep_context.expunge_stale_entries(); } void InstanceKlass::clean_implementors_list(BoolObjectClosure* is_alive) { @@ -2102,6 +1933,8 @@ constants()->remove_unshareable_info(); + assert(_dep_context == DependencyContext::EMPTY, "dependency context is not shareable"); + for (int i = 0; i < methods()->length(); i++) { Method* m = methods()->at(i); m->remove_unshareable_info(); @@ -2231,12 +2064,10 @@ } // release dependencies - nmethodBucket* b = _dependencies; - _dependencies = NULL; - while (b != NULL) { - nmethodBucket* next = b->next(); - delete b; - b = next; + { + DependencyContext ctx(&_dep_context); + int marked = ctx.remove_all_dependents(); + assert(marked == 0, "all dependencies should be already invalidated"); } // Deallocate breakpoint records @@ -3558,199 +3389,3 @@ unsigned char * InstanceKlass::get_cached_class_file_bytes() { return VM_RedefineClasses::get_cached_class_file_bytes(_cached_class_file); } - - -/////////////// Unit tests /////////////// - -#ifndef PRODUCT - -class TestNmethodBucketContext { - public: - nmethod* _nmethodLast; - nmethod* _nmethodMiddle; - nmethod* _nmethodFirst; - - nmethodBucket* _bucketLast; - nmethodBucket* _bucketMiddle; - nmethodBucket* _bucketFirst; - - nmethodBucket* _bucketList; - - TestNmethodBucketContext() { - CodeCache_lock->lock_without_safepoint_check(); - - _nmethodLast = reinterpret_cast(0x8 * 0); - _nmethodMiddle = reinterpret_cast(0x8 * 1); - _nmethodFirst = reinterpret_cast(0x8 * 2); - - _bucketLast = new nmethodBucket(_nmethodLast, NULL); - _bucketMiddle = new nmethodBucket(_nmethodMiddle, _bucketLast); - _bucketFirst = new nmethodBucket(_nmethodFirst, _bucketMiddle); - - _bucketList = _bucketFirst; - } - - ~TestNmethodBucketContext() { - delete _bucketLast; - delete _bucketMiddle; - delete _bucketFirst; - - CodeCache_lock->unlock(); - } -}; - -class TestNmethodBucket { - public: - static void testRemoveDependentNmethodFirstDeleteImmediately() { - TestNmethodBucketContext c; - - nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodFirst, true /* delete */); - - assert(c._bucketList == c._bucketMiddle, "check"); - assert(c._bucketList->next() == c._bucketLast, "check"); - assert(c._bucketList->next()->next() == NULL, "check"); - - // Cleanup before context is deleted. - c._bucketFirst = NULL; - } - - static void testRemoveDependentNmethodMiddleDeleteImmediately() { - TestNmethodBucketContext c; - - nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodMiddle, true /* delete */); - - assert(c._bucketList == c._bucketFirst, "check"); - assert(c._bucketList->next() == c._bucketLast, "check"); - assert(c._bucketList->next()->next() == NULL, "check"); - - // Cleanup before context is deleted. - c._bucketMiddle = NULL; - } - - static void testRemoveDependentNmethodLastDeleteImmediately() { - TestNmethodBucketContext c; - - nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodLast, true /* delete */); - - assert(c._bucketList == c._bucketFirst, "check"); - assert(c._bucketList->next() == c._bucketMiddle, "check"); - assert(c._bucketList->next()->next() == NULL, "check"); - - // Cleanup before context is deleted. - c._bucketLast = NULL; - } - - static void testRemoveDependentNmethodFirstDeleteDeferred() { - TestNmethodBucketContext c; - - nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodFirst, false /* delete */); - - assert(c._bucketList == c._bucketFirst, "check"); - assert(c._bucketList->next() == c._bucketMiddle, "check"); - assert(c._bucketList->next()->next() == c._bucketLast, "check"); - assert(c._bucketList->next()->next()->next() == NULL, "check"); - - assert(c._bucketFirst->count() == 0, "check"); - assert(c._bucketMiddle->count() == 1, "check"); - assert(c._bucketLast->count() == 1, "check"); - } - - static void testRemoveDependentNmethodMiddleDeleteDeferred() { - TestNmethodBucketContext c; - - nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodMiddle, false /* delete */); - - assert(c._bucketList == c._bucketFirst, "check"); - assert(c._bucketList->next() == c._bucketMiddle, "check"); - assert(c._bucketList->next()->next() == c._bucketLast, "check"); - assert(c._bucketList->next()->next()->next() == NULL, "check"); - - assert(c._bucketFirst->count() == 1, "check"); - assert(c._bucketMiddle->count() == 0, "check"); - assert(c._bucketLast->count() == 1, "check"); - } - - static void testRemoveDependentNmethodLastDeleteDeferred() { - TestNmethodBucketContext c; - - nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodLast, false /* delete */); - - assert(c._bucketList == c._bucketFirst, "check"); - assert(c._bucketList->next() == c._bucketMiddle, "check"); - assert(c._bucketList->next()->next() == c._bucketLast, "check"); - assert(c._bucketList->next()->next()->next() == NULL, "check"); - - assert(c._bucketFirst->count() == 1, "check"); - assert(c._bucketMiddle->count() == 1, "check"); - assert(c._bucketLast->count() == 0, "check"); - } - - static void testRemoveDependentNmethodConvenienceFirst() { - TestNmethodBucketContext c; - - nmethodBucket::remove_dependent_nmethod(c._bucketList, c._nmethodFirst); - - assert(c._bucketList == c._bucketFirst, "check"); - assert(c._bucketList->next() == c._bucketMiddle, "check"); - assert(c._bucketList->next()->next() == c._bucketLast, "check"); - assert(c._bucketList->next()->next()->next() == NULL, "check"); - - assert(c._bucketFirst->count() == 0, "check"); - assert(c._bucketMiddle->count() == 1, "check"); - assert(c._bucketLast->count() == 1, "check"); - } - - static void testRemoveDependentNmethodConvenienceMiddle() { - TestNmethodBucketContext c; - - nmethodBucket::remove_dependent_nmethod(c._bucketList, c._nmethodMiddle); - - assert(c._bucketList == c._bucketFirst, "check"); - assert(c._bucketList->next() == c._bucketMiddle, "check"); - assert(c._bucketList->next()->next() == c._bucketLast, "check"); - assert(c._bucketList->next()->next()->next() == NULL, "check"); - - assert(c._bucketFirst->count() == 1, "check"); - assert(c._bucketMiddle->count() == 0, "check"); - assert(c._bucketLast->count() == 1, "check"); - } - - static void testRemoveDependentNmethodConvenienceLast() { - TestNmethodBucketContext c; - - nmethodBucket::remove_dependent_nmethod(c._bucketList, c._nmethodLast); - - assert(c._bucketList == c._bucketFirst, "check"); - assert(c._bucketList->next() == c._bucketMiddle, "check"); - assert(c._bucketList->next()->next() == c._bucketLast, "check"); - assert(c._bucketList->next()->next()->next() == NULL, "check"); - - assert(c._bucketFirst->count() == 1, "check"); - assert(c._bucketMiddle->count() == 1, "check"); - assert(c._bucketLast->count() == 0, "check"); - } - - static void testRemoveDependentNmethod() { - testRemoveDependentNmethodFirstDeleteImmediately(); - testRemoveDependentNmethodMiddleDeleteImmediately(); - testRemoveDependentNmethodLastDeleteImmediately(); - - testRemoveDependentNmethodFirstDeleteDeferred(); - testRemoveDependentNmethodMiddleDeleteDeferred(); - testRemoveDependentNmethodLastDeleteDeferred(); - - testRemoveDependentNmethodConvenienceFirst(); - testRemoveDependentNmethodConvenienceMiddle(); - testRemoveDependentNmethodConvenienceLast(); - } - - static void test() { - testRemoveDependentNmethod(); - } -}; - -void TestNmethodBucket_test() { - TestNmethodBucket::test(); -} - -#endif diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/oops/instanceKlass.hpp --- a/hotspot/src/share/vm/oops/instanceKlass.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/oops/instanceKlass.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -53,15 +53,15 @@ // forward declaration for class -- see below for definition -class SuperTypeClosure; -class JNIid; +class BreakpointInfo; +class DepChange; +class DependencyContext; +class fieldDescriptor; class jniIdMapBase; -class BreakpointInfo; -class fieldDescriptor; -class DepChange; -class nmethodBucket; +class JNIid; class JvmtiCachedClassFieldMap; class MemberNameTable; +class SuperTypeClosure; // This is used in iterators below. class FieldClosure: public StackObj { @@ -198,7 +198,6 @@ // _is_marked_dependent can be set concurrently, thus cannot be part of the // _misc_flags. bool _is_marked_dependent; // used for marking during flushing and deoptimization - bool _has_unloaded_dependent; // The low two bits of _misc_flags contains the kind field. // This can be used to quickly discriminate among the four kinds of @@ -235,7 +234,7 @@ MemberNameTable* _member_names; // Member names JNIid* _jni_ids; // First JNI identifier for static fields in this class jmethodID* _methods_jmethod_ids; // jmethodIDs corresponding to method_idnum, or NULL if none - nmethodBucket* _dependencies; // list of dependent nmethods + intptr_t _dep_context; // packed DependencyContext structure nmethod* _osr_nmethods_head; // Head of list of on-stack replacement nmethods for this class BreakpointInfo* _breakpoints; // bpt lists, managed by Method* // Linked instanceKlasses of previous versions @@ -468,9 +467,6 @@ bool is_marked_dependent() const { return _is_marked_dependent; } void set_is_marked_dependent(bool value) { _is_marked_dependent = value; } - bool has_unloaded_dependent() const { return _has_unloaded_dependent; } - void set_has_unloaded_dependent(bool value) { _has_unloaded_dependent = value; } - // initialization (virtuals from Klass) bool should_be_initialized() const; // means that initialize should be called void initialize(TRAPS); @@ -835,7 +831,8 @@ JNIid* jni_id_for(int offset); // maintenance of deoptimization dependencies - int mark_dependent_nmethods(DepChange& changes); + inline DependencyContext dependencies(); + int mark_dependent_nmethods(DepChange& changes); void add_dependent_nmethod(nmethod* nm); void remove_dependent_nmethod(nmethod* nm, bool delete_immediately); @@ -1027,7 +1024,6 @@ void clean_weak_instanceklass_links(BoolObjectClosure* is_alive); void clean_implementors_list(BoolObjectClosure* is_alive); void clean_method_data(BoolObjectClosure* is_alive); - void clean_dependent_nmethods(); // Explicit metaspace deallocation of fields // For RedefineClasses and class file parsing errors, we need to deallocate @@ -1320,48 +1316,6 @@ void verify(Klass* holder); }; - -// -// nmethodBucket is used to record dependent nmethods for -// deoptimization. nmethod dependencies are actually -// pairs but we really only care about the klass part for purposes of -// finding nmethods which might need to be deoptimized. Instead of -// recording the method, a count of how many times a particular nmethod -// was recorded is kept. This ensures that any recording errors are -// noticed since an nmethod should be removed as many times are it's -// added. -// -class nmethodBucket: public CHeapObj { - friend class VMStructs; - private: - nmethod* _nmethod; - int _count; - nmethodBucket* _next; - - public: - nmethodBucket(nmethod* nmethod, nmethodBucket* next) { - _nmethod = nmethod; - _next = next; - _count = 1; - } - int count() { return _count; } - int increment() { _count += 1; return _count; } - int decrement(); - nmethodBucket* next() { return _next; } - void set_next(nmethodBucket* b) { _next = b; } - nmethod* get_nmethod() { return _nmethod; } - - static int mark_dependent_nmethods(nmethodBucket* deps, DepChange& changes); - static nmethodBucket* add_dependent_nmethod(nmethodBucket* deps, nmethod* nm); - static bool remove_dependent_nmethod(nmethodBucket** deps, nmethod* nm, bool delete_immediately); - static bool remove_dependent_nmethod(nmethodBucket* deps, nmethod* nm); - static nmethodBucket* clean_dependent_nmethods(nmethodBucket* deps); -#ifndef PRODUCT - static void print_dependent_nmethods(nmethodBucket* deps, bool verbose); - static bool is_dependent_nmethod(nmethodBucket* deps, nmethod* nm); -#endif //PRODUCT -}; - // An iterator that's used to access the inner classes indices in the // InstanceKlass::_inner_classes array. class InnerClassesIterator : public StackObj { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/oops/method.cpp --- a/hotspot/src/share/vm/oops/method.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/oops/method.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -579,12 +579,45 @@ } bool Method::is_accessor() const { + return is_getter() || is_setter(); +} + +bool Method::is_getter() const { if (code_size() != 5) return false; if (size_of_parameters() != 1) return false; - if (java_code_at(0) != Bytecodes::_aload_0 ) return false; + if (java_code_at(0) != Bytecodes::_aload_0) return false; if (java_code_at(1) != Bytecodes::_getfield) return false; - if (java_code_at(4) != Bytecodes::_areturn && - java_code_at(4) != Bytecodes::_ireturn ) return false; + switch (java_code_at(4)) { + case Bytecodes::_ireturn: + case Bytecodes::_lreturn: + case Bytecodes::_freturn: + case Bytecodes::_dreturn: + case Bytecodes::_areturn: + break; + default: + return false; + } + return true; +} + +bool Method::is_setter() const { + if (code_size() != 6) return false; + if (java_code_at(0) != Bytecodes::_aload_0) return false; + switch (java_code_at(1)) { + case Bytecodes::_iload_1: + case Bytecodes::_aload_1: + case Bytecodes::_fload_1: + if (size_of_parameters() != 2) return false; + break; + case Bytecodes::_dload_1: + case Bytecodes::_lload_1: + if (size_of_parameters() != 3) return false; + break; + default: + return false; + } + if (java_code_at(2) != Bytecodes::_putfield) return false; + if (java_code_at(5) != Bytecodes::_return) return false; return true; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/oops/method.hpp --- a/hotspot/src/share/vm/oops/method.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/oops/method.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -595,6 +595,12 @@ // returns true if the method is an accessor function (setter/getter). bool is_accessor() const; + // returns true if the method is a getter + bool is_getter() const; + + // returns true if the method is a setter + bool is_setter() const; + // returns true if the method does nothing but return a constant of primitive type bool is_constant_getter() const; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/buildOopMap.cpp --- a/hotspot/src/share/vm/opto/buildOopMap.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/buildOopMap.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -542,10 +542,11 @@ if (i == cfg->number_of_blocks()) { break; // Got 'em all } -#ifndef PRODUCT - if( PrintOpto && Verbose ) + + if (PrintOpto && Verbose) { tty->print_cr("retripping live calc"); -#endif + } + // Force the issue (expensively): recheck everybody for (i = 1; i < cfg->number_of_blocks(); i++) { worklist->push(cfg->get_block(i)); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/c2_globals.hpp --- a/hotspot/src/share/vm/opto/c2_globals.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/c2_globals.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -186,9 +186,9 @@ "Maximum number of unrolls for main loop") \ range(0, max_jint) \ \ - product(bool, SuperWordLoopUnrollAnalysis, false, \ - "Map number of unrolls for main loop via " \ - "Superword Level Parallelism analysis") \ + product_pd(bool, SuperWordLoopUnrollAnalysis, \ + "Map number of unrolls for main loop via " \ + "Superword Level Parallelism analysis") \ \ notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \ "Trace what Superword Level Parallelism analysis applies") \ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/c2compiler.cpp --- a/hotspot/src/share/vm/opto/c2compiler.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/c2compiler.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -451,6 +451,7 @@ case vmIntrinsics::_updateByteBufferAdler32: case vmIntrinsics::_profileBoolean: case vmIntrinsics::_isCompileConstant: + case vmIntrinsics::_Objects_checkIndex: break; default: return false; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/callnode.cpp --- a/hotspot/src/share/vm/opto/callnode.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/callnode.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -778,7 +778,7 @@ } if (is_CallJava() && as_CallJava()->method() != NULL) { ciMethod* meth = as_CallJava()->method(); - if (meth->is_accessor()) { + if (meth->is_getter()) { return false; } // May modify (by reflection) if an boxing object is passed diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/cfgnode.cpp --- a/hotspot/src/share/vm/opto/cfgnode.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/cfgnode.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -984,7 +984,7 @@ #ifdef ASSERT // The following logic has been moved into TypeOopPtr::filter. const Type* jt = t->join_speculative(_type); - if( jt->empty() ) { // Emptied out??? + if (jt->empty()) { // Emptied out??? // Check for evil case of 't' being a class and '_type' expecting an // interface. This can happen because the bytecodes do not contain @@ -995,14 +995,21 @@ // be 'I' or 'j/l/O'. Thus we'll pick 'j/l/O'. If this then flows // into a Phi which "knows" it's an Interface type we'll have to // uplift the type. - if( !t->empty() && ttip && ttip->is_loaded() && ttip->klass()->is_interface() ) - { assert(ft == _type, ""); } // Uplift to interface - else if( !t->empty() && ttkp && ttkp->is_loaded() && ttkp->klass()->is_interface() ) - { assert(ft == _type, ""); } // Uplift to interface - // Otherwise it's something stupid like non-overlapping int ranges - // found on dying counted loops. - else - { assert(ft == Type::TOP, ""); } // Canonical empty value + if (!t->empty() && ttip && ttip->is_loaded() && ttip->klass()->is_interface()) { + assert(ft == _type, ""); // Uplift to interface + } else if (!t->empty() && ttkp && ttkp->is_loaded() && ttkp->klass()->is_interface()) { + assert(ft == _type, ""); // Uplift to interface + } else { + // We also have to handle 'evil cases' of interface- vs. class-arrays + Type::get_arrays_base_elements(jt, _type, NULL, &ttip); + if (!t->empty() && ttip != NULL && ttip->is_loaded() && ttip->klass()->is_interface()) { + assert(ft == _type, ""); // Uplift to array of interface + } else { + // Otherwise it's something stupid like non-overlapping int ranges + // found on dying counted loops. + assert(ft == Type::TOP, ""); // Canonical empty value + } + } } else { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/cfgnode.hpp --- a/hotspot/src/share/vm/opto/cfgnode.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/cfgnode.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -270,7 +270,6 @@ virtual uint size_of() const { return sizeof(*this); } private: - ProjNode* range_check_trap_proj(int& flip, Node*& l, Node*& r); ProjNode* range_check_trap_proj() { int flip_test = 0; Node* l = NULL; @@ -283,7 +282,7 @@ bool is_ctrl_folds(Node* ctrl, PhaseIterGVN* igvn); bool has_shared_region(ProjNode* proj, ProjNode*& success, ProjNode*& fail); bool has_only_uncommon_traps(ProjNode* proj, ProjNode*& success, ProjNode*& fail, PhaseIterGVN* igvn); - static void merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn); + Node* merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn); static void improve_address_types(Node* l, Node* r, ProjNode* fail, PhaseIterGVN* igvn); bool is_cmp_with_loadrange(ProjNode* proj); bool is_null_check(ProjNode* proj, PhaseIterGVN* igvn); @@ -292,6 +291,12 @@ ProjNode* uncommon_trap_proj(CallStaticJavaNode*& call) const; bool fold_compares_helper(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn); +protected: + ProjNode* range_check_trap_proj(int& flip, Node*& l, Node*& r); + Node* Ideal_common(PhaseGVN *phase, bool can_reshape); + Node* dominated_by(Node* prev_dom, PhaseIterGVN* igvn); + Node* search_identical(int dist); + public: // Degrees of branch prediction probability by order of magnitude: @@ -375,8 +380,6 @@ virtual const Type *Value( PhaseTransform *phase ) const; virtual int required_outcnt() const { return 2; } virtual const RegMask &out_RegMask() const; - void dominated_by(Node* prev_dom, PhaseIterGVN* igvn); - int is_range_check(Node* &range, Node* &index, jint &offset); Node* fold_compares(PhaseIterGVN* phase); static Node* up_one_dom(Node* curr, bool linear_only = false); @@ -391,6 +394,20 @@ #endif }; +class RangeCheckNode : public IfNode { +private: + int is_range_check(Node* &range, Node* &index, jint &offset); + +public: + RangeCheckNode(Node* control, Node *b, float p, float fcnt) + : IfNode(control, b, p, fcnt) { + init_class_id(Class_RangeCheck); + } + + virtual int Opcode() const; + virtual Node* Ideal(PhaseGVN *phase, bool can_reshape); +}; + class IfProjNode : public CProjNode { public: IfProjNode(IfNode *ifnode, uint idx) : CProjNode(ifnode,idx) {} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/classes.hpp --- a/hotspot/src/share/vm/opto/classes.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/classes.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -138,6 +138,7 @@ macro(Halt) macro(HasNegatives) macro(If) +macro(RangeCheck) macro(IfFalse) macro(IfTrue) macro(Initialize) diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/compile.cpp --- a/hotspot/src/share/vm/opto/compile.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/compile.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -707,7 +707,7 @@ _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level()); } #endif - set_print_inlining(directive->PrintInliningOption NOT_PRODUCT( || PrintOptoInlining)); + set_print_inlining(directive->PrintInliningOption || PrintOptoInlining); set_print_intrinsics(directive->PrintIntrinsicsOption); set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it @@ -3181,6 +3181,13 @@ n->set_req(MemBarNode::Precedent, top()); } break; + case Op_RangeCheck: { + RangeCheckNode* rc = n->as_RangeCheck(); + Node* iff = new IfNode(rc->in(0), rc->in(1), rc->_prob, rc->_fcnt); + n->subsume_by(iff, this); + frc._tests.push(iff); + break; + } default: assert( !n->is_Call(), "" ); assert( !n->is_Mem(), "" ); @@ -3189,8 +3196,9 @@ } // Collect CFG split points - if (n->is_MultiBranch()) + if (n->is_MultiBranch() && !n->is_RangeCheck()) { frc._tests.push(n); + } } //------------------------------final_graph_reshaping_walk--------------------- diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/doCall.cpp --- a/hotspot/src/share/vm/opto/doCall.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/doCall.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -45,7 +45,7 @@ if (TraceTypeProfile || C->print_inlining()) { outputStream* out = tty; if (!C->print_inlining()) { - if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) { + if (!PrintOpto && !PrintCompilation) { method->print_short_name(); tty->cr(); } @@ -426,12 +426,10 @@ // uncommon-trap when callee is unloaded, uninitialized or will not link // bailout when too many arguments for register representation if (!will_link || can_not_compile_call_site(orig_callee, klass)) { -#ifndef PRODUCT if (PrintOpto && (Verbose || WizardMode)) { method()->print_name(); tty->print_cr(" can not compile call at bci %d to:", bci()); orig_callee->print_name(); tty->cr(); } -#endif return; } assert(holder_klass->is_loaded(), ""); @@ -634,12 +632,10 @@ // If the return type of the method is not loaded, assert that the // value we got is a null. Otherwise, we need to recompile. if (!rtype->is_loaded()) { -#ifndef PRODUCT if (PrintOpto && (Verbose || WizardMode)) { method()->print_name(); tty->print_cr(" asserting nullness of result at bci: %d", bci()); cg->method()->print_name(); tty->cr(); } -#endif if (C->log() != NULL) { C->log()->elem("assert_null reason='return' klass='%d'", C->log()->identify(rtype)); @@ -851,11 +847,9 @@ if (remaining == 1) { push_ex_oop(ex_node); // Push exception oop for handler -#ifndef PRODUCT if (PrintOpto && WizardMode) { tty->print_cr(" Catching every inline exception bci:%d -> handler_bci:%d", bci(), handler_bci); } -#endif merge_exception(handler_bci); // jump to handler return; // No more handling to be done here! } @@ -882,13 +876,11 @@ assert(klass->has_subklass() || tinst->klass_is_exact(), "lost exactness"); Node* ex_oop = _gvn.transform(new CheckCastPPNode(control(), ex_node, tinst)); push_ex_oop(ex_oop); // Push exception oop for handler -#ifndef PRODUCT if (PrintOpto && WizardMode) { tty->print(" Catching inline exception bci:%d -> handler_bci:%d -- ", bci(), handler_bci); klass->print_name(); tty->cr(); } -#endif merge_exception(handler_bci); } set_control(not_subtype_ctrl); @@ -1067,13 +1059,11 @@ // such method can be changed when its class is redefined. ciMethod* exact_method = callee->resolve_invoke(calling_klass, actual_receiver); if (exact_method != NULL) { -#ifndef PRODUCT if (PrintOpto) { tty->print(" Calling method via exact type @%d --- ", bci); exact_method->print_name(); tty->cr(); } -#endif return exact_method; } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/graphKit.cpp --- a/hotspot/src/share/vm/opto/graphKit.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/graphKit.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1457,18 +1457,22 @@ // factory methods in "int adr_idx" Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx, - MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency, bool require_atomic_access) { + MemNode::MemOrd mo, + LoadNode::ControlDependency control_dependency, + bool require_atomic_access, + bool unaligned, + bool mismatched) { assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" ); const TypePtr* adr_type = NULL; // debug-mode-only argument debug_only(adr_type = C->get_adr_type(adr_idx)); Node* mem = memory(adr_idx); Node* ld; if (require_atomic_access && bt == T_LONG) { - ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency); + ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched); } else if (require_atomic_access && bt == T_DOUBLE) { - ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency); + ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched); } else { - ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency); + ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched); } ld = _gvn.transform(ld); if ((bt == T_OBJECT) && C->do_escape_analysis() || C->eliminate_boxing()) { @@ -1481,7 +1485,9 @@ Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt, int adr_idx, MemNode::MemOrd mo, - bool require_atomic_access) { + bool require_atomic_access, + bool unaligned, + bool mismatched) { assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" ); const TypePtr* adr_type = NULL; debug_only(adr_type = C->get_adr_type(adr_idx)); @@ -1494,6 +1500,12 @@ } else { st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo); } + if (unaligned) { + st->as_Store()->set_unaligned_access(); + } + if (mismatched) { + st->as_Store()->set_mismatched_access(); + } st = _gvn.transform(st); set_memory(st, adr_idx); // Back-to-back stores can only remove intermediate store with DU info @@ -1587,7 +1599,8 @@ const TypeOopPtr* val_type, BasicType bt, bool use_precise, - MemNode::MemOrd mo) { + MemNode::MemOrd mo, + bool mismatched) { // Transformation of a value which could be NULL pointer (CastPP #NULL) // could be delayed during Parse (for example, in adjust_map_after_if()). // Execute transformation here to avoid barrier generation in such case. @@ -1607,7 +1620,7 @@ NULL /* pre_val */, bt); - Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo); + Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo, mismatched); post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise); return store; } @@ -1619,7 +1632,8 @@ const TypePtr* adr_type, Node* val, BasicType bt, - MemNode::MemOrd mo) { + MemNode::MemOrd mo, + bool mismatched) { Compile::AliasType* at = C->alias_type(adr_type); const TypeOopPtr* val_type = NULL; if (adr_type->isa_instptr()) { @@ -1638,7 +1652,7 @@ if (val_type == NULL) { val_type = TypeInstPtr::BOTTOM; } - return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo); + return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo, mismatched); } @@ -4373,7 +4387,8 @@ set_memory(mem, TypeAryPtr::BYTES); Node* ch = load_array_element(control(), src, i_byte, TypeAryPtr::BYTES); Node* st = store_to_memory(control(), array_element_address(dst, i_char, T_BYTE), - AndI(ch, intcon(0xff)), T_CHAR, TypeAryPtr::BYTES, MemNode::unordered); + AndI(ch, intcon(0xff)), T_CHAR, TypeAryPtr::BYTES, MemNode::unordered, + false, false, true /* mismatched */); IfNode* iff = create_and_map_if(head, Bool(CmpI(i_byte, count), BoolTest::lt), PROB_FAIR, COUNT_UNKNOWN); head->init_req(2, IfTrue(iff)); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/graphKit.hpp --- a/hotspot/src/share/vm/opto/graphKit.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/graphKit.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -513,23 +513,28 @@ // of volatile fields. Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest, - bool require_atomic_access = false) { + bool require_atomic_access = false, bool unaligned = false, + bool mismatched = false) { // This version computes alias_index from bottom_type return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(), - mo, control_dependency, require_atomic_access); + mo, control_dependency, require_atomic_access, + unaligned, mismatched); } Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type, MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest, - bool require_atomic_access = false) { + bool require_atomic_access = false, bool unaligned = false, + bool mismatched = false) { // This version computes alias_index from an address type assert(adr_type != NULL, "use other make_load factory"); return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type), - mo, control_dependency, require_atomic_access); + mo, control_dependency, require_atomic_access, + unaligned, mismatched); } // This is the base version which is given an alias index. Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx, MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest, - bool require_atomic_access = false); + bool require_atomic_access = false, bool unaligned = false, + bool mismatched = false); // Create & transform a StoreNode and store the effect into the // parser's memory state. @@ -542,19 +547,24 @@ Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt, const TypePtr* adr_type, MemNode::MemOrd mo, - bool require_atomic_access = false) { + bool require_atomic_access = false, + bool unaligned = false, + bool mismatched = false) { // This version computes alias_index from an address type assert(adr_type != NULL, "use other store_to_memory factory"); return store_to_memory(ctl, adr, val, bt, C->get_alias_index(adr_type), - mo, require_atomic_access); + mo, require_atomic_access, + unaligned, mismatched); } // This is the base version which is given alias index // Return the new StoreXNode Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt, int adr_idx, MemNode::MemOrd, - bool require_atomic_access = false); + bool require_atomic_access = false, + bool unaligned = false, + bool mismatched = false); // All in one pre-barrier, store, post_barrier @@ -577,7 +587,8 @@ const TypeOopPtr* val_type, BasicType bt, bool use_precise, - MemNode::MemOrd mo); + MemNode::MemOrd mo, + bool mismatched = false); Node* store_oop_to_object(Node* ctl, Node* obj, // containing obj @@ -608,7 +619,8 @@ const TypePtr* adr_type, Node* val, BasicType bt, - MemNode::MemOrd mo); + MemNode::MemOrd mo, + bool mismatched = false); // For the few case where the barriers need special help void pre_barrier(bool do_load, Node* ctl, diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/idealKit.cpp --- a/hotspot/src/share/vm/opto/idealKit.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/idealKit.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -368,7 +368,8 @@ Node* IdealKit::store(Node* ctl, Node* adr, Node *val, BasicType bt, int adr_idx, - MemNode::MemOrd mo, bool require_atomic_access) { + MemNode::MemOrd mo, bool require_atomic_access, + bool mismatched) { assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory"); const TypePtr* adr_type = NULL; debug_only(adr_type = C->get_adr_type(adr_idx)); @@ -379,6 +380,9 @@ } else { st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo); } + if (mismatched) { + st->as_Store()->set_mismatched_access(); + } st = transform(st); set_memory(st, adr_idx); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/idealKit.hpp --- a/hotspot/src/share/vm/opto/idealKit.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/idealKit.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -229,7 +229,8 @@ BasicType bt, int adr_idx, MemNode::MemOrd mo, - bool require_atomic_access = false); + bool require_atomic_access = false, + bool mismatched = false); // Store a card mark ordered after store_oop Node* storeCM(Node* ctl, diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/ifnode.cpp --- a/hotspot/src/share/vm/opto/ifnode.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/ifnode.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "ci/ciTypeFlow.hpp" #include "memory/allocation.inline.hpp" #include "opto/addnode.hpp" #include "opto/castnode.hpp" @@ -305,12 +306,16 @@ Node *b_c = phase->transform(new BoolNode(cmp_c,b->_test._test)); Node *b_x = phase->transform(new BoolNode(cmp_x,b->_test._test)); // Make the IfNode - IfNode *iff_c = new IfNode(region_c,b_c,iff->_prob,iff->_fcnt); + IfNode* iff_c = iff->clone()->as_If(); + iff_c->set_req(0, region_c); + iff_c->set_req(1, b_c); igvn->set_type_bottom(iff_c); igvn->_worklist.push(iff_c); hook->init_req(2, iff_c); - IfNode *iff_x = new IfNode(region_x,b_x,iff->_prob, iff->_fcnt); + IfNode* iff_x = iff->clone()->as_If(); + iff_x->set_req(0, region_x); + iff_x->set_req(1, b_x); igvn->set_type_bottom(iff_x); igvn->_worklist.push(iff_x); hook->init_req(3, iff_x); @@ -480,7 +485,7 @@ return NULL; } if (l->is_top()) return NULL; // Top input means dead test - if (r->Opcode() != Op_LoadRange) return NULL; + if (r->Opcode() != Op_LoadRange && !is_RangeCheck()) return NULL; // We have recognized one of these forms: // Flip 1: If (Bool[<] CmpU(l, LoadRange)) ... @@ -495,7 +500,7 @@ // Return 0 if not a range check. Return 1 if a range check and set index and // offset. Return 2 if we had to negate the test. Index is NULL if the check // is versus a constant. -int IfNode::is_range_check(Node* &range, Node* &index, jint &offset) { +int RangeCheckNode::is_range_check(Node* &range, Node* &index, jint &offset) { int flip_test = 0; Node* l = NULL; Node* r = NULL; @@ -520,9 +525,9 @@ return 0; } else if (l->Opcode() == Op_AddI) { if ((off = l->in(1)->find_int_con(0)) != 0) { - ind = l->in(2); + ind = l->in(2)->uncast(); } else if ((off = l->in(2)->find_int_con(0)) != 0) { - ind = l->in(1); + ind = l->in(1)->uncast(); } } else if ((off = l->find_int_con(-1)) >= 0) { // constant offset with no variable index @@ -723,7 +728,7 @@ return ctrl != NULL && ctrl->is_Proj() && ctrl->in(0) != NULL && - ctrl->in(0)->is_If() && + ctrl->in(0)->Opcode() == Op_If && ctrl->in(0)->outcnt() == 2 && ctrl->in(0)->as_If()->cmpi_folds(igvn) && // Must compare same value @@ -771,6 +776,11 @@ CallStaticJavaNode* dom_unc = otherproj->is_uncommon_trap_proj(Deoptimization::Reason_none); if (otherproj->outcnt() == 1 && dom_unc != NULL) { + // We need to re-execute the folded Ifs after deoptimization from the merged traps + if (!dom_unc->jvms()->should_reexecute()) { + return false; + } + CallStaticJavaNode* unc = NULL; ProjNode* unc_proj = uncommon_trap_proj(unc); if (unc_proj != NULL && unc_proj->outcnt() == 1) { @@ -784,12 +794,41 @@ } else if (dom_unc->in(0) != otherproj || unc->in(0) != unc_proj) { return false; } + + // Different methods and methods containing jsrs are not supported. + ciMethod* method = unc->jvms()->method(); + ciMethod* dom_method = dom_unc->jvms()->method(); + if (method != dom_method || method->has_jsrs()) { + return false; + } + // Check that both traps are in the same activation of the method (instead + // of two activations being inlined through different call sites) by verifying + // that the call stacks are equal for both JVMStates. + JVMState* dom_caller = dom_unc->jvms()->caller(); + JVMState* caller = unc->jvms()->caller(); + if ((dom_caller == NULL) != (caller == NULL)) { + // The current method must either be inlined into both dom_caller and + // caller or must not be inlined at all (top method). Bail out otherwise. + return false; + } else if (dom_caller != NULL && !dom_caller->same_calls_as(caller)) { + return false; + } + // Check that the bci of the dominating uncommon trap dominates the bci + // of the dominated uncommon trap. Otherwise we may not re-execute + // the dominated check after deoptimization from the merged uncommon trap. + ciTypeFlow* flow = dom_method->get_flow_analysis(); + int bci = unc->jvms()->bci(); + int dom_bci = dom_unc->jvms()->bci(); + if (!flow->is_dominated_by(bci, dom_bci)) { + return false; + } + // See merge_uncommon_traps: the reason of the uncommon trap // will be changed and the state of the dominating If will be // used. Checked that we didn't apply this transformation in a // previous compilation and it didn't cause too many traps - if (!igvn->C->too_many_traps(dom_unc->jvms()->method(), dom_unc->jvms()->bci(), Deoptimization::Reason_unstable_fused_if) && - !igvn->C->too_many_traps(dom_unc->jvms()->method(), dom_unc->jvms()->bci(), Deoptimization::Reason_range_check)) { + if (!igvn->C->too_many_traps(dom_method, dom_bci, Deoptimization::Reason_unstable_fused_if) && + !igvn->C->too_many_traps(dom_method, dom_bci, Deoptimization::Reason_range_check)) { success = unc_proj; fail = unc_proj->other_if_proj(); return true; @@ -941,8 +980,8 @@ if (failtype->_lo > failtype->_hi) { // previous if determines the result of this if so // replace Bool with constant - igvn->hash_delete(this); - set_req(1, igvn->intcon(success->_con)); + igvn->_worklist.push(in(1)); + igvn->replace_input_of(this, 1, igvn->intcon(success->_con)); return true; } } @@ -961,7 +1000,8 @@ Node* newbool = igvn->transform(new BoolNode(newcmp, cond)); igvn->replace_input_of(dom_iff, 1, igvn->intcon(proj->_con)); - set_req(1, newbool); + igvn->_worklist.push(in(1)); + igvn->replace_input_of(this, 1, newbool); return true; } @@ -971,7 +1011,10 @@ // Merge the branches that trap for this If and the dominating If into // a single region that branches to the uncommon trap for the // dominating If -void IfNode::merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn) { +Node* IfNode::merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn) { + Node* res = this; + assert(success->in(0) == this, "bad projection"); + ProjNode* otherproj = proj->other_if_proj(); CallStaticJavaNode* unc = success->is_uncommon_trap_proj(Deoptimization::Reason_none); @@ -1007,6 +1050,8 @@ trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_range_check, action); improve_address_types(l, r, fail, igvn); + + res = igvn->transform(new RangeCheckNode(in(0), in(1), _prob, _fcnt)); } else if (unc != dom_unc) { // If we trap we won't know what CmpI would have caused the trap // so use a special trap reason to mark this pair of CmpI nodes as @@ -1016,6 +1061,7 @@ trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_unstable_fused_if, action); } igvn->replace_input_of(dom_unc, TypeFunc::Parms, igvn->intcon(trap_request)); + return res; } // If we are turning 2 CmpI nodes into a CmpU that follows the pattern @@ -1209,8 +1255,7 @@ if (has_only_uncommon_traps(dom_cmp, success, fail, igvn) && // Next call modifies graph so must be last fold_compares_helper(dom_cmp, success, fail, igvn)) { - merge_uncommon_traps(dom_cmp, success, fail, igvn); - return this; + return merge_uncommon_traps(dom_cmp, success, fail, igvn); } return NULL; } else if (ctrl->in(0) != NULL && @@ -1229,8 +1274,7 @@ // Next call modifies graph so must be last fold_compares_helper(dom_cmp, success, fail, igvn)) { reroute_side_effect_free_unc(other_cmp, dom_cmp, igvn); - merge_uncommon_traps(dom_cmp, success, fail, igvn); - return this; + return merge_uncommon_traps(dom_cmp, success, fail, igvn); } } } @@ -1311,14 +1355,10 @@ jint off; }; -//------------------------------Ideal------------------------------------------ -// Return a node which is more "ideal" than the current node. Strip out -// control copies -Node *IfNode::Ideal(PhaseGVN *phase, bool can_reshape) { +Node* IfNode::Ideal_common(PhaseGVN *phase, bool can_reshape) { if (remove_dead_region(phase, can_reshape)) return this; // No Def-Use info? if (!can_reshape) return NULL; - PhaseIterGVN *igvn = phase->is_IterGVN(); // Don't bother trying to transform a dead if if (in(0)->is_top()) return NULL; @@ -1334,24 +1374,291 @@ if (idt_if != NULL) return idt_if; // Try to split the IF + PhaseIterGVN *igvn = phase->is_IterGVN(); Node *s = split_if(this, igvn); if (s != NULL) return s; + return NodeSentinel; +} + +//------------------------------Ideal------------------------------------------ +// Return a node which is more "ideal" than the current node. Strip out +// control copies +Node* IfNode::Ideal(PhaseGVN *phase, bool can_reshape) { + Node* res = Ideal_common(phase, can_reshape); + if (res != NodeSentinel) { + return res; + } + // Check for people making a useless boolean: things like // if( (x < y ? true : false) ) { ... } // Replace with if( x < y ) { ... } Node *bol2 = remove_useless_bool(this, phase); if( bol2 ) return bol2; + if (in(0) == NULL) return NULL; // Dead loop? + + PhaseIterGVN *igvn = phase->is_IterGVN(); + Node* result = fold_compares(igvn); + if (result != NULL) { + return result; + } + + // Scan for an equivalent test + Node *cmp; + int dist = 0; // Cutoff limit for search + int op = Opcode(); + if( op == Op_If && + (cmp=in(1)->in(1))->Opcode() == Op_CmpP ) { + if( cmp->in(2) != NULL && // make sure cmp is not already dead + cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) { + dist = 64; // Limit for null-pointer scans + } else { + dist = 4; // Do not bother for random pointer tests + } + } else { + dist = 4; // Limit for random junky scans + } + + Node* prev_dom = search_identical(dist); + + if (prev_dom == NULL) { + return NULL; + } + + // Replace dominated IfNode + return dominated_by(prev_dom, igvn); +} + +//------------------------------dominated_by----------------------------------- +Node* IfNode::dominated_by(Node* prev_dom, PhaseIterGVN *igvn) { +#ifndef PRODUCT + if (TraceIterativeGVN) { + tty->print(" Removing IfNode: "); this->dump(); + } + if (VerifyOpto && !igvn->allow_progress()) { + // Found an equivalent dominating test, + // we can not guarantee reaching a fix-point for these during iterativeGVN + // since intervening nodes may not change. + return NULL; + } +#endif + + igvn->hash_delete(this); // Remove self to prevent spurious V-N + Node *idom = in(0); + // Need opcode to decide which way 'this' test goes + int prev_op = prev_dom->Opcode(); + Node *top = igvn->C->top(); // Shortcut to top + + // Loop predicates may have depending checks which should not + // be skipped. For example, range check predicate has two checks + // for lower and upper bounds. + ProjNode* unc_proj = proj_out(1 - prev_dom->as_Proj()->_con)->as_Proj(); + if (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL) + prev_dom = idom; + + // Now walk the current IfNode's projections. + // Loop ends when 'this' has no more uses. + for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) { + Node *ifp = last_out(i); // Get IfTrue/IfFalse + igvn->add_users_to_worklist(ifp); + // Check which projection it is and set target. + // Data-target is either the dominating projection of the same type + // or TOP if the dominating projection is of opposite type. + // Data-target will be used as the new control edge for the non-CFG + // nodes like Casts and Loads. + Node *data_target = (ifp->Opcode() == prev_op) ? prev_dom : top; + // Control-target is just the If's immediate dominator or TOP. + Node *ctrl_target = (ifp->Opcode() == prev_op) ? idom : top; + + // For each child of an IfTrue/IfFalse projection, reroute. + // Loop ends when projection has no more uses. + for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) { + Node* s = ifp->last_out(j); // Get child of IfTrue/IfFalse + if( !s->depends_only_on_test() ) { + // Find the control input matching this def-use edge. + // For Regions it may not be in slot 0. + uint l; + for( l = 0; s->in(l) != ifp; l++ ) { } + igvn->replace_input_of(s, l, ctrl_target); + } else { // Else, for control producers, + igvn->replace_input_of(s, 0, data_target); // Move child to data-target + } + } // End for each child of a projection + + igvn->remove_dead_node(ifp); + } // End for each IfTrue/IfFalse child of If + + // Kill the IfNode + igvn->remove_dead_node(this); + + // Must return either the original node (now dead) or a new node + // (Do not return a top here, since that would break the uniqueness of top.) + return new ConINode(TypeInt::ZERO); +} + +Node* IfNode::search_identical(int dist) { // Setup to scan up the CFG looking for a dominating test - Node *dom = in(0); - Node *prev_dom = this; + Node* dom = in(0); + Node* prev_dom = this; + int op = Opcode(); + // Search up the dominator tree for an If with an identical test + while( dom->Opcode() != op || // Not same opcode? + dom->in(1) != in(1) || // Not same input 1? + (req() == 3 && dom->in(2) != in(2)) || // Not same input 2? + prev_dom->in(0) != dom ) { // One path of test does not dominate? + if( dist < 0 ) return NULL; + + dist--; + prev_dom = dom; + dom = up_one_dom( dom ); + if( !dom ) return NULL; + } + + // Check that we did not follow a loop back to ourselves + if( this == dom ) + return NULL; + + if( dist > 2 ) // Add to count of NULL checks elided + explicit_null_checks_elided++; + + return prev_dom; +} + +//------------------------------Identity--------------------------------------- +// If the test is constant & we match, then we are the input Control +Node *IfProjNode::Identity(PhaseTransform *phase) { + // Can only optimize if cannot go the other way + const TypeTuple *t = phase->type(in(0))->is_tuple(); + if (t == TypeTuple::IFNEITHER || + // kill dead branch first otherwise the IfNode's control will + // have 2 control uses (the IfNode that doesn't go away because + // it still has uses and this branch of the + // If). Node::has_special_unique_user() will cause this node to + // be reprocessed once the dead branch is killed. + (always_taken(t) && in(0)->outcnt() == 1)) { + // IfNode control + return in(0)->in(0); + } + // no progress + return this; +} + +#ifndef PRODUCT +//-------------------------------related--------------------------------------- +// An IfProjNode's related node set consists of its input (an IfNode) including +// the IfNode's condition, plus all of its outputs at level 1. In compact mode, +// the restrictions for IfNode apply (see IfNode::rel). +void IfProjNode::related(GrowableArray *in_rel, GrowableArray *out_rel, bool compact) const { + Node* ifNode = this->in(0); + in_rel->append(ifNode); + if (compact) { + ifNode->collect_nodes(in_rel, 3, false, true); + } else { + ifNode->collect_nodes_in_all_data(in_rel, false); + } + this->collect_nodes(out_rel, -1, false, false); +} + +//------------------------------dump_spec-------------------------------------- +void IfNode::dump_spec(outputStream *st) const { + st->print("P=%f, C=%f",_prob,_fcnt); +} + +//-------------------------------related--------------------------------------- +// For an IfNode, the set of related output nodes is just the output nodes till +// depth 2, i.e, the IfTrue/IfFalse projection nodes plus the nodes they refer. +// The related input nodes contain no control nodes, but all data nodes +// pertaining to the condition. In compact mode, the input nodes are collected +// up to a depth of 3. +void IfNode::related(GrowableArray *in_rel, GrowableArray *out_rel, bool compact) const { + if (compact) { + this->collect_nodes(in_rel, 3, false, true); + } else { + this->collect_nodes_in_all_data(in_rel, false); + } + this->collect_nodes(out_rel, -2, false, false); +} +#endif + +//------------------------------idealize_test---------------------------------- +// Try to canonicalize tests better. Peek at the Cmp/Bool/If sequence and +// come up with a canonical sequence. Bools getting 'eq', 'gt' and 'ge' forms +// converted to 'ne', 'le' and 'lt' forms. IfTrue/IfFalse get swapped as +// needed. +static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff) { + assert(iff->in(0) != NULL, "If must be live"); + + if (iff->outcnt() != 2) return NULL; // Malformed projections. + Node* old_if_f = iff->proj_out(false); + Node* old_if_t = iff->proj_out(true); + + // CountedLoopEnds want the back-control test to be TRUE, irregardless of + // whether they are testing a 'gt' or 'lt' condition. The 'gt' condition + // happens in count-down loops + if (iff->is_CountedLoopEnd()) return NULL; + if (!iff->in(1)->is_Bool()) return NULL; // Happens for partially optimized IF tests + BoolNode *b = iff->in(1)->as_Bool(); + BoolTest bt = b->_test; + // Test already in good order? + if( bt.is_canonical() ) + return NULL; + + // Flip test to be canonical. Requires flipping the IfFalse/IfTrue and + // cloning the IfNode. + Node* new_b = phase->transform( new BoolNode(b->in(1), bt.negate()) ); + if( !new_b->is_Bool() ) return NULL; + b = new_b->as_Bool(); + + PhaseIterGVN *igvn = phase->is_IterGVN(); + assert( igvn, "Test is not canonical in parser?" ); + + // The IF node never really changes, but it needs to be cloned + iff = iff->clone()->as_If(); + iff->set_req(1, b); + iff->_prob = 1.0-iff->_prob; + + Node *prior = igvn->hash_find_insert(iff); + if( prior ) { + igvn->remove_dead_node(iff); + iff = (IfNode*)prior; + } else { + // Cannot call transform on it just yet + igvn->set_type_bottom(iff); + } + igvn->_worklist.push(iff); + + // Now handle projections. Cloning not required. + Node* new_if_f = (Node*)(new IfFalseNode( iff )); + Node* new_if_t = (Node*)(new IfTrueNode ( iff )); + + igvn->register_new_node_with_optimizer(new_if_f); + igvn->register_new_node_with_optimizer(new_if_t); + // Flip test, so flip trailing control + igvn->replace_node(old_if_f, new_if_t); + igvn->replace_node(old_if_t, new_if_f); + + // Progress + return iff; +} + +Node* RangeCheckNode::Ideal(PhaseGVN *phase, bool can_reshape) { + Node* res = Ideal_common(phase, can_reshape); + if (res != NodeSentinel) { + return res; + } + + PhaseIterGVN *igvn = phase->is_IterGVN(); + // Setup to scan up the CFG looking for a dominating test + Node* prev_dom = this; // Check for range-check vs other kinds of tests - Node *index1, *range1; + Node* index1; + Node* range1; jint offset1; int flip1 = is_range_check(range1, index1, offset1); - if( flip1 ) { + if (flip1) { + Node* dom = in(0); // Try to remove extra range checks. All 'up_one_dom' gives up at merges // so all checks we inspect post-dominate the top-most check we find. // If we are going to fail the current check and we reach the top check @@ -1372,13 +1679,14 @@ // Scan for the top checks and collect range of offsets for (int dist = 0; dist < 999; dist++) { // Range-Check scan limit - if (dom->Opcode() == Op_If && // Not same opcode? + if (dom->Opcode() == Op_RangeCheck && // Not same opcode? prev_dom->in(0) == dom) { // One path of test does dominate? if (dom == this) return NULL; // dead loop // See if this is a range check - Node *index2, *range2; + Node* index2; + Node* range2; jint offset2; - int flip2 = dom->as_If()->is_range_check(range2, index2, offset2); + int flip2 = dom->as_RangeCheck()->is_range_check(range2, index2, offset2); // See if this is a _matching_ range check, checking against // the same array bounds. if (flip2 == flip1 && range2 == range1 && index2 == index1 && @@ -1486,237 +1794,14 @@ prev_dom = rc0.ctl; } } - - } else { // Scan for an equivalent test - - Node *cmp; - int dist = 0; // Cutoff limit for search - int op = Opcode(); - if( op == Op_If && - (cmp=in(1)->in(1))->Opcode() == Op_CmpP ) { - if( cmp->in(2) != NULL && // make sure cmp is not already dead - cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) { - dist = 64; // Limit for null-pointer scans - } else { - dist = 4; // Do not bother for random pointer tests - } - } else { - dist = 4; // Limit for random junky scans - } - - // Normal equivalent-test check. - if( !dom ) return NULL; // Dead loop? - - Node* result = fold_compares(igvn); - if (result != NULL) { - return result; - } + } else { + prev_dom = search_identical(4); - // Search up the dominator tree for an If with an identical test - while( dom->Opcode() != op || // Not same opcode? - dom->in(1) != in(1) || // Not same input 1? - (req() == 3 && dom->in(2) != in(2)) || // Not same input 2? - prev_dom->in(0) != dom ) { // One path of test does not dominate? - if( dist < 0 ) return NULL; - - dist--; - prev_dom = dom; - dom = up_one_dom( dom ); - if( !dom ) return NULL; - } - - // Check that we did not follow a loop back to ourselves - if( this == dom ) + if (prev_dom == NULL) { return NULL; - - if( dist > 2 ) // Add to count of NULL checks elided - explicit_null_checks_elided++; - - } // End of Else scan for an equivalent test - - // Hit! Remove this IF -#ifndef PRODUCT - if( TraceIterativeGVN ) { - tty->print(" Removing IfNode: "); this->dump(); + } } - if( VerifyOpto && !phase->allow_progress() ) { - // Found an equivalent dominating test, - // we can not guarantee reaching a fix-point for these during iterativeGVN - // since intervening nodes may not change. - return NULL; - } -#endif // Replace dominated IfNode - dominated_by( prev_dom, igvn ); - - // Must return either the original node (now dead) or a new node - // (Do not return a top here, since that would break the uniqueness of top.) - return new ConINode(TypeInt::ZERO); -} - -//------------------------------dominated_by----------------------------------- -void IfNode::dominated_by( Node *prev_dom, PhaseIterGVN *igvn ) { - igvn->hash_delete(this); // Remove self to prevent spurious V-N - Node *idom = in(0); - // Need opcode to decide which way 'this' test goes - int prev_op = prev_dom->Opcode(); - Node *top = igvn->C->top(); // Shortcut to top - - // Loop predicates may have depending checks which should not - // be skipped. For example, range check predicate has two checks - // for lower and upper bounds. - ProjNode* unc_proj = proj_out(1 - prev_dom->as_Proj()->_con)->as_Proj(); - if (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL) - prev_dom = idom; - - // Now walk the current IfNode's projections. - // Loop ends when 'this' has no more uses. - for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) { - Node *ifp = last_out(i); // Get IfTrue/IfFalse - igvn->add_users_to_worklist(ifp); - // Check which projection it is and set target. - // Data-target is either the dominating projection of the same type - // or TOP if the dominating projection is of opposite type. - // Data-target will be used as the new control edge for the non-CFG - // nodes like Casts and Loads. - Node *data_target = (ifp->Opcode() == prev_op) ? prev_dom : top; - // Control-target is just the If's immediate dominator or TOP. - Node *ctrl_target = (ifp->Opcode() == prev_op) ? idom : top; - - // For each child of an IfTrue/IfFalse projection, reroute. - // Loop ends when projection has no more uses. - for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) { - Node* s = ifp->last_out(j); // Get child of IfTrue/IfFalse - if( !s->depends_only_on_test() ) { - // Find the control input matching this def-use edge. - // For Regions it may not be in slot 0. - uint l; - for( l = 0; s->in(l) != ifp; l++ ) { } - igvn->replace_input_of(s, l, ctrl_target); - } else { // Else, for control producers, - igvn->replace_input_of(s, 0, data_target); // Move child to data-target - } - } // End for each child of a projection - - igvn->remove_dead_node(ifp); - } // End for each IfTrue/IfFalse child of If - - // Kill the IfNode - igvn->remove_dead_node(this); -} - -//------------------------------Identity--------------------------------------- -// If the test is constant & we match, then we are the input Control -Node *IfProjNode::Identity(PhaseTransform *phase) { - // Can only optimize if cannot go the other way - const TypeTuple *t = phase->type(in(0))->is_tuple(); - if (t == TypeTuple::IFNEITHER || - // kill dead branch first otherwise the IfNode's control will - // have 2 control uses (the IfNode that doesn't go away because - // it still has uses and this branch of the - // If). Node::has_special_unique_user() will cause this node to - // be reprocessed once the dead branch is killed. - (always_taken(t) && in(0)->outcnt() == 1)) { - // IfNode control - return in(0)->in(0); - } - // no progress - return this; + return dominated_by(prev_dom, igvn); } - -#ifndef PRODUCT -//-------------------------------related--------------------------------------- -// An IfProjNode's related node set consists of its input (an IfNode) including -// the IfNode's condition, plus all of its outputs at level 1. In compact mode, -// the restrictions for IfNode apply (see IfNode::rel). -void IfProjNode::related(GrowableArray *in_rel, GrowableArray *out_rel, bool compact) const { - Node* ifNode = this->in(0); - in_rel->append(ifNode); - if (compact) { - ifNode->collect_nodes(in_rel, 3, false, true); - } else { - ifNode->collect_nodes_in_all_data(in_rel, false); - } - this->collect_nodes(out_rel, -1, false, false); -} - -//------------------------------dump_spec-------------------------------------- -void IfNode::dump_spec(outputStream *st) const { - st->print("P=%f, C=%f",_prob,_fcnt); -} - -//-------------------------------related--------------------------------------- -// For an IfNode, the set of related output nodes is just the output nodes till -// depth 2, i.e, the IfTrue/IfFalse projection nodes plus the nodes they refer. -// The related input nodes contain no control nodes, but all data nodes -// pertaining to the condition. In compact mode, the input nodes are collected -// up to a depth of 3. -void IfNode::related(GrowableArray *in_rel, GrowableArray *out_rel, bool compact) const { - if (compact) { - this->collect_nodes(in_rel, 3, false, true); - } else { - this->collect_nodes_in_all_data(in_rel, false); - } - this->collect_nodes(out_rel, -2, false, false); -} -#endif - -//------------------------------idealize_test---------------------------------- -// Try to canonicalize tests better. Peek at the Cmp/Bool/If sequence and -// come up with a canonical sequence. Bools getting 'eq', 'gt' and 'ge' forms -// converted to 'ne', 'le' and 'lt' forms. IfTrue/IfFalse get swapped as -// needed. -static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff) { - assert(iff->in(0) != NULL, "If must be live"); - - if (iff->outcnt() != 2) return NULL; // Malformed projections. - Node* old_if_f = iff->proj_out(false); - Node* old_if_t = iff->proj_out(true); - - // CountedLoopEnds want the back-control test to be TRUE, irregardless of - // whether they are testing a 'gt' or 'lt' condition. The 'gt' condition - // happens in count-down loops - if (iff->is_CountedLoopEnd()) return NULL; - if (!iff->in(1)->is_Bool()) return NULL; // Happens for partially optimized IF tests - BoolNode *b = iff->in(1)->as_Bool(); - BoolTest bt = b->_test; - // Test already in good order? - if( bt.is_canonical() ) - return NULL; - - // Flip test to be canonical. Requires flipping the IfFalse/IfTrue and - // cloning the IfNode. - Node* new_b = phase->transform( new BoolNode(b->in(1), bt.negate()) ); - if( !new_b->is_Bool() ) return NULL; - b = new_b->as_Bool(); - - PhaseIterGVN *igvn = phase->is_IterGVN(); - assert( igvn, "Test is not canonical in parser?" ); - - // The IF node never really changes, but it needs to be cloned - iff = new IfNode( iff->in(0), b, 1.0-iff->_prob, iff->_fcnt); - - Node *prior = igvn->hash_find_insert(iff); - if( prior ) { - igvn->remove_dead_node(iff); - iff = (IfNode*)prior; - } else { - // Cannot call transform on it just yet - igvn->set_type_bottom(iff); - } - igvn->_worklist.push(iff); - - // Now handle projections. Cloning not required. - Node* new_if_f = (Node*)(new IfFalseNode( iff )); - Node* new_if_t = (Node*)(new IfTrueNode ( iff )); - - igvn->register_new_node_with_optimizer(new_if_f); - igvn->register_new_node_with_optimizer(new_if_t); - // Flip test, so flip trailing control - igvn->replace_node(old_if_f, new_if_t); - igvn->replace_node(old_if_t, new_if_f); - - // Progress - return iff; -} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/lcm.cpp --- a/hotspot/src/share/vm/opto/lcm.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/lcm.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -498,9 +498,13 @@ continue; // Schedule IV increment last. - if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd && - e->in(1)->in(1) == n && n->is_iteratively_computed()) - continue; + if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd) { + // Cmp might be matched into CountedLoopEnd node. + Node *cmp = (e->in(1)->ideal_reg() == Op_RegFlags) ? e->in(1) : e; + if (cmp->req() > 1 && cmp->in(1) == n && n->is_iteratively_computed()) { + continue; + } + } uint n_choice = 2; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/library_call.cpp --- a/hotspot/src/share/vm/opto/library_call.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/library_call.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -238,7 +238,7 @@ // Generates the guards that check whether the result of // Unsafe.getObject should be recorded in an SATB log buffer. void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar); - bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile); + bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool is_unaligned); static bool klass_needs_init_guard(Node* kls); bool inline_unsafe_allocate(); bool inline_unsafe_copyMemory(); @@ -256,6 +256,7 @@ bool inline_native_getLength(); bool inline_array_copyOf(bool is_copyOfRange); bool inline_array_equals(StrIntrinsicNode::ArgEnc ae); + bool inline_objects_checkIndex(); void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark); bool inline_native_clone(bool is_virtual); bool inline_native_Reflection_getCallerClass(); @@ -544,72 +545,72 @@ case vmIntrinsics::_inflateStringC: case vmIntrinsics::_inflateStringB: return inline_string_copy(!is_compress); - case vmIntrinsics::_getObject: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, !is_volatile); - case vmIntrinsics::_getBoolean: return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile); - case vmIntrinsics::_getByte: return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, !is_volatile); - case vmIntrinsics::_getShort: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, !is_volatile); - case vmIntrinsics::_getChar: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, !is_volatile); - case vmIntrinsics::_getInt: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, !is_volatile); - case vmIntrinsics::_getLong: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, !is_volatile); - case vmIntrinsics::_getFloat: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, !is_volatile); - case vmIntrinsics::_getDouble: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, !is_volatile); - case vmIntrinsics::_putObject: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, !is_volatile); - case vmIntrinsics::_putBoolean: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, !is_volatile); - case vmIntrinsics::_putByte: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, !is_volatile); - case vmIntrinsics::_putShort: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, !is_volatile); - case vmIntrinsics::_putChar: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, !is_volatile); - case vmIntrinsics::_putInt: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, !is_volatile); - case vmIntrinsics::_putLong: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, !is_volatile); - case vmIntrinsics::_putFloat: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, !is_volatile); - case vmIntrinsics::_putDouble: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, !is_volatile); - - case vmIntrinsics::_getByte_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE, !is_volatile); - case vmIntrinsics::_getShort_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT, !is_volatile); - case vmIntrinsics::_getChar_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR, !is_volatile); - case vmIntrinsics::_getInt_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_INT, !is_volatile); - case vmIntrinsics::_getLong_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_LONG, !is_volatile); - case vmIntrinsics::_getFloat_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT, !is_volatile); - case vmIntrinsics::_getDouble_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE, !is_volatile); - case vmIntrinsics::_getAddress_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile); - - case vmIntrinsics::_putByte_raw: return inline_unsafe_access( is_native_ptr, is_store, T_BYTE, !is_volatile); - case vmIntrinsics::_putShort_raw: return inline_unsafe_access( is_native_ptr, is_store, T_SHORT, !is_volatile); - case vmIntrinsics::_putChar_raw: return inline_unsafe_access( is_native_ptr, is_store, T_CHAR, !is_volatile); - case vmIntrinsics::_putInt_raw: return inline_unsafe_access( is_native_ptr, is_store, T_INT, !is_volatile); - case vmIntrinsics::_putLong_raw: return inline_unsafe_access( is_native_ptr, is_store, T_LONG, !is_volatile); - case vmIntrinsics::_putFloat_raw: return inline_unsafe_access( is_native_ptr, is_store, T_FLOAT, !is_volatile); - case vmIntrinsics::_putDouble_raw: return inline_unsafe_access( is_native_ptr, is_store, T_DOUBLE, !is_volatile); - case vmIntrinsics::_putAddress_raw: return inline_unsafe_access( is_native_ptr, is_store, T_ADDRESS, !is_volatile); - - case vmIntrinsics::_getObjectVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, is_volatile); - case vmIntrinsics::_getBooleanVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, is_volatile); - case vmIntrinsics::_getByteVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, is_volatile); - case vmIntrinsics::_getShortVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, is_volatile); - case vmIntrinsics::_getCharVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, is_volatile); - case vmIntrinsics::_getIntVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, is_volatile); - case vmIntrinsics::_getLongVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, is_volatile); - case vmIntrinsics::_getFloatVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, is_volatile); - case vmIntrinsics::_getDoubleVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, is_volatile); - - case vmIntrinsics::_putObjectVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, is_volatile); - case vmIntrinsics::_putBooleanVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, is_volatile); - case vmIntrinsics::_putByteVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, is_volatile); - case vmIntrinsics::_putShortVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, is_volatile); - case vmIntrinsics::_putCharVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, is_volatile); - case vmIntrinsics::_putIntVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, is_volatile); - case vmIntrinsics::_putLongVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, is_volatile); - case vmIntrinsics::_putFloatVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, is_volatile); - case vmIntrinsics::_putDoubleVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, is_volatile); - - case vmIntrinsics::_getShortUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, !is_volatile); - case vmIntrinsics::_getCharUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, !is_volatile); - case vmIntrinsics::_getIntUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, !is_volatile); - case vmIntrinsics::_getLongUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, !is_volatile); - - case vmIntrinsics::_putShortUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, !is_volatile); - case vmIntrinsics::_putCharUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, !is_volatile); - case vmIntrinsics::_putIntUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, !is_volatile); - case vmIntrinsics::_putLongUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, !is_volatile); + case vmIntrinsics::_getObject: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, !is_volatile, false); + case vmIntrinsics::_getBoolean: return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile, false); + case vmIntrinsics::_getByte: return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, !is_volatile, false); + case vmIntrinsics::_getShort: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, !is_volatile, false); + case vmIntrinsics::_getChar: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, !is_volatile, false); + case vmIntrinsics::_getInt: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, !is_volatile, false); + case vmIntrinsics::_getLong: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, !is_volatile, false); + case vmIntrinsics::_getFloat: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, !is_volatile, false); + case vmIntrinsics::_getDouble: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, !is_volatile, false); + case vmIntrinsics::_putObject: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, !is_volatile, false); + case vmIntrinsics::_putBoolean: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, !is_volatile, false); + case vmIntrinsics::_putByte: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, !is_volatile, false); + case vmIntrinsics::_putShort: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, !is_volatile, false); + case vmIntrinsics::_putChar: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, !is_volatile, false); + case vmIntrinsics::_putInt: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, !is_volatile, false); + case vmIntrinsics::_putLong: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, !is_volatile, false); + case vmIntrinsics::_putFloat: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, !is_volatile, false); + case vmIntrinsics::_putDouble: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, !is_volatile, false); + + case vmIntrinsics::_getByte_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE, !is_volatile, false); + case vmIntrinsics::_getShort_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT, !is_volatile, false); + case vmIntrinsics::_getChar_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR, !is_volatile, false); + case vmIntrinsics::_getInt_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_INT, !is_volatile, false); + case vmIntrinsics::_getLong_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_LONG, !is_volatile, false); + case vmIntrinsics::_getFloat_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT, !is_volatile, false); + case vmIntrinsics::_getDouble_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE, !is_volatile, false); + case vmIntrinsics::_getAddress_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile, false); + + case vmIntrinsics::_putByte_raw: return inline_unsafe_access( is_native_ptr, is_store, T_BYTE, !is_volatile, false); + case vmIntrinsics::_putShort_raw: return inline_unsafe_access( is_native_ptr, is_store, T_SHORT, !is_volatile, false); + case vmIntrinsics::_putChar_raw: return inline_unsafe_access( is_native_ptr, is_store, T_CHAR, !is_volatile, false); + case vmIntrinsics::_putInt_raw: return inline_unsafe_access( is_native_ptr, is_store, T_INT, !is_volatile, false); + case vmIntrinsics::_putLong_raw: return inline_unsafe_access( is_native_ptr, is_store, T_LONG, !is_volatile, false); + case vmIntrinsics::_putFloat_raw: return inline_unsafe_access( is_native_ptr, is_store, T_FLOAT, !is_volatile, false); + case vmIntrinsics::_putDouble_raw: return inline_unsafe_access( is_native_ptr, is_store, T_DOUBLE, !is_volatile, false); + case vmIntrinsics::_putAddress_raw: return inline_unsafe_access( is_native_ptr, is_store, T_ADDRESS, !is_volatile, false); + + case vmIntrinsics::_getObjectVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, is_volatile, false); + case vmIntrinsics::_getBooleanVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, is_volatile, false); + case vmIntrinsics::_getByteVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, is_volatile, false); + case vmIntrinsics::_getShortVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, is_volatile, false); + case vmIntrinsics::_getCharVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, is_volatile, false); + case vmIntrinsics::_getIntVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, is_volatile, false); + case vmIntrinsics::_getLongVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, is_volatile, false); + case vmIntrinsics::_getFloatVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, is_volatile, false); + case vmIntrinsics::_getDoubleVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, is_volatile, false); + + case vmIntrinsics::_putObjectVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, is_volatile, false); + case vmIntrinsics::_putBooleanVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, is_volatile, false); + case vmIntrinsics::_putByteVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, is_volatile, false); + case vmIntrinsics::_putShortVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, is_volatile, false); + case vmIntrinsics::_putCharVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, is_volatile, false); + case vmIntrinsics::_putIntVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, is_volatile, false); + case vmIntrinsics::_putLongVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, is_volatile, false); + case vmIntrinsics::_putFloatVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, is_volatile, false); + case vmIntrinsics::_putDoubleVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, is_volatile, false); + + case vmIntrinsics::_getShortUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, !is_volatile, true); + case vmIntrinsics::_getCharUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, !is_volatile, true); + case vmIntrinsics::_getIntUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, !is_volatile, true); + case vmIntrinsics::_getLongUnaligned: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, !is_volatile, true); + + case vmIntrinsics::_putShortUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, !is_volatile, true); + case vmIntrinsics::_putCharUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, !is_volatile, true); + case vmIntrinsics::_putIntUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, !is_volatile, true); + case vmIntrinsics::_putLongUnaligned: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, !is_volatile, true); case vmIntrinsics::_compareAndSwapObject: return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg); case vmIntrinsics::_compareAndSwapInt: return inline_unsafe_load_store(T_INT, LS_cmpxchg); @@ -647,6 +648,7 @@ case vmIntrinsics::_copyOfRange: return inline_array_copyOf(true); case vmIntrinsics::_equalsB: return inline_array_equals(StrIntrinsicNode::LL); case vmIntrinsics::_equalsC: return inline_array_equals(StrIntrinsicNode::UU); + case vmIntrinsics::_Objects_checkIndex: return inline_objects_checkIndex(); case vmIntrinsics::_clone: return inline_native_clone(intrinsic()->is_virtual()); case vmIntrinsics::_isAssignableFrom: return inline_native_subtype_check(); @@ -1045,6 +1047,54 @@ return true; } +bool LibraryCallKit::inline_objects_checkIndex() { + Node* index = argument(0); + Node* length = argument(1); + if (too_many_traps(Deoptimization::Reason_intrinsic) || too_many_traps(Deoptimization::Reason_range_check)) { + return false; + } + + Node* len_pos_cmp = _gvn.transform(new CmpINode(length, intcon(0))); + Node* len_pos_bol = _gvn.transform(new BoolNode(len_pos_cmp, BoolTest::ge)); + + { + BuildCutout unless(this, len_pos_bol, PROB_MAX); + uncommon_trap(Deoptimization::Reason_intrinsic, + Deoptimization::Action_make_not_entrant); + } + + if (stopped()) { + return false; + } + + Node* rc_cmp = _gvn.transform(new CmpUNode(index, length)); + BoolTest::mask btest = BoolTest::lt; + Node* rc_bool = _gvn.transform(new BoolNode(rc_cmp, btest)); + RangeCheckNode* rc = new RangeCheckNode(control(), rc_bool, PROB_MAX, COUNT_UNKNOWN); + _gvn.set_type(rc, rc->Value(&_gvn)); + if (!rc_bool->is_Con()) { + record_for_igvn(rc); + } + set_control(_gvn.transform(new IfTrueNode(rc))); + { + PreserveJVMState pjvms(this); + set_control(_gvn.transform(new IfFalseNode(rc))); + uncommon_trap(Deoptimization::Reason_range_check, + Deoptimization::Action_make_not_entrant); + } + + if (stopped()) { + return false; + } + + Node* result = new CastIINode(index, TypeInt::make(0, _gvn.type(length)->is_int()->_hi, Type::WidenMax)); + result->set_req(0, control()); + result = _gvn.transform(result); + set_result(result); + replace_in_map(index, result); + return true; +} + //------------------------------inline_string_indexOf------------------------ bool LibraryCallKit::inline_string_indexOf(StrIntrinsicNode::ArgEnc ae) { if (!Matcher::has_match_rule(Op_StrIndexOf) || !UseSSE42Intrinsics) { @@ -1453,9 +1503,11 @@ Node* adr = array_element_address(value, index, T_CHAR); if (is_store) { - (void) store_to_memory(control(), adr, ch, T_CHAR, TypeAryPtr::BYTES, MemNode::unordered); + (void) store_to_memory(control(), adr, ch, T_CHAR, TypeAryPtr::BYTES, MemNode::unordered, + false, false, true /* mismatched */); } else { - ch = make_load(control(), adr, TypeInt::CHAR, T_CHAR, MemNode::unordered); + ch = make_load(control(), adr, TypeInt::CHAR, T_CHAR, MemNode::unordered, + LoadNode::DependsOnlyOnTest, false, false, true /* mismatched */); set_result(ch); } return true; @@ -2385,7 +2437,7 @@ return NULL; } -bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile) { +bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool unaligned) { if (callee()->is_static()) return false; // caller must have the capability! #ifndef PRODUCT @@ -2527,7 +2579,28 @@ // of safe & unsafe memory. if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder); - if (!is_store) { + assert(alias_type->adr_type() == TypeRawPtr::BOTTOM || alias_type->adr_type() == TypeOopPtr::BOTTOM || + alias_type->field() != NULL || alias_type->element() != NULL, "field, array element or unknown"); + bool mismatched = false; + if (alias_type->element() != NULL || alias_type->field() != NULL) { + BasicType bt; + if (alias_type->element() != NULL) { + const Type* element = alias_type->element(); + bt = element->isa_narrowoop() ? T_OBJECT : element->array_element_basic_type(); + } else { + bt = alias_type->field()->type()->basic_type(); + } + if (bt == T_ARRAY) { + // accessing an array field with getObject is not a mismatch + bt = T_OBJECT; + } + if (bt != type) { + mismatched = true; + } + } + assert(type != T_OBJECT || !unaligned, "unaligned access not supported with object type"); + + if (!is_store) { Node* p = NULL; // Try to constant fold a load from a constant field ciField* field = alias_type->field(); @@ -2543,7 +2616,7 @@ MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered; // To be valid, unsafe loads may depend on other conditions than // the one that guards them: pin the Load node - p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile); + p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile, unaligned, mismatched); // load value switch (type) { case T_BOOLEAN: @@ -2590,12 +2663,12 @@ MemNode::MemOrd mo = is_volatile ? MemNode::release : MemNode::unordered; if (type != T_OBJECT ) { - (void) store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile); + (void) store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile, unaligned, mismatched); } else { // Possibly an oop being stored to Java heap or native memory if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) { // oop to Java heap. - (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo); + (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched); } else { // We can't tell at compile time if we are storing in the Java heap or outside // of it. So we need to emit code to conditionally do the proper type of @@ -2607,11 +2680,11 @@ __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); { // Sync IdealKit and graphKit. sync_kit(ideal); - Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo); + Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched); // Update IdealKit memory. __ sync_kit(this); } __ else_(); { - __ store(__ ctrl(), adr, val, type, alias_type->index(), mo, is_volatile); + __ store(__ ctrl(), adr, val, type, alias_type->index(), mo, is_volatile, mismatched); } __ end_if(); // Final sync IdealKit and GraphKit. final_sync(ideal); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/loopPredicate.cpp --- a/hotspot/src/share/vm/opto/loopPredicate.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/loopPredicate.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -91,7 +91,8 @@ // The true projecttion (if_cont) of the new_iff is returned. // This code is also used to clone predicates to cloned loops. ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, - Deoptimization::DeoptReason reason) { + Deoptimization::DeoptReason reason, + int opcode) { assert(cont_proj->is_uncommon_trap_if_pattern(reason), "must be a uct if pattern!"); IfNode* iff = cont_proj->in(0)->as_If(); @@ -133,8 +134,13 @@ } // Create new_iff IdealLoopTree* lp = get_loop(entry); - IfNode *new_iff = iff->clone()->as_If(); - new_iff->set_req(0, entry); + IfNode* new_iff = NULL; + if (opcode == Op_If) { + new_iff = new IfNode(entry, iff->in(1), iff->_prob, iff->_fcnt); + } else { + assert(opcode == Op_RangeCheck, "no other if variant here"); + new_iff = new RangeCheckNode(entry, iff->in(1), iff->_prob, iff->_fcnt); + } register_control(new_iff, lp, entry); Node *if_cont = new IfTrueNode(new_iff); Node *if_uct = new IfFalseNode(new_iff); @@ -183,7 +189,8 @@ //------------------------------create_new_if_for_predicate------------------------ // Create a new if below new_entry for the predicate to be cloned (IGVN optimization) ProjNode* PhaseIterGVN::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, - Deoptimization::DeoptReason reason) { + Deoptimization::DeoptReason reason, + int opcode) { assert(new_entry != 0, "only used for clone predicate"); assert(cont_proj->is_uncommon_trap_if_pattern(reason), "must be a uct if pattern!"); IfNode* iff = cont_proj->in(0)->as_If(); @@ -208,8 +215,13 @@ } // Create new_iff in new location. - IfNode *new_iff = iff->clone()->as_If(); - new_iff->set_req(0, new_entry); + IfNode* new_iff = NULL; + if (opcode == Op_If) { + new_iff = new IfNode(new_entry, iff->in(1), iff->_prob, iff->_fcnt); + } else { + assert(opcode == Op_RangeCheck, "no other if variant here"); + new_iff = new RangeCheckNode(new_entry, iff->in(1), iff->_prob, iff->_fcnt); + } register_new_node_with_optimizer(new_iff); Node *if_cont = new IfTrueNode(new_iff); @@ -249,9 +261,9 @@ PhaseIterGVN* igvn) { ProjNode* new_predicate_proj; if (loop_phase != NULL) { - new_predicate_proj = loop_phase->create_new_if_for_predicate(predicate_proj, new_entry, reason); + new_predicate_proj = loop_phase->create_new_if_for_predicate(predicate_proj, new_entry, reason, Op_If); } else { - new_predicate_proj = igvn->create_new_if_for_predicate(predicate_proj, new_entry, reason); + new_predicate_proj = igvn->create_new_if_for_predicate(predicate_proj, new_entry, reason, Op_If); } IfNode* iff = new_predicate_proj->in(0)->as_If(); Node* ctrl = iff->in(0); @@ -557,7 +569,7 @@ return false; } Node* range = cmp->in(2); - if (range->Opcode() != Op_LoadRange) { + if (range->Opcode() != Op_LoadRange && !iff->is_RangeCheck()) { const TypeInt* tint = phase->_igvn.type(range)->isa_int(); if (tint == NULL || tint->empty() || tint->_lo < 0) { // Allow predication on positive values that aren't LoadRanges. @@ -714,7 +726,8 @@ while (current_proj != head) { if (loop == get_loop(current_proj) && // still in the loop ? current_proj->is_Proj() && // is a projection ? - current_proj->in(0)->Opcode() == Op_If) { // is a if projection ? + (current_proj->in(0)->Opcode() == Op_If || + current_proj->in(0)->Opcode() == Op_RangeCheck)) { // is a if projection ? if_proj_list.push(current_proj); } current_proj = idom(current_proj); @@ -753,7 +766,8 @@ if (invar.is_invariant(bol)) { // Invariant test new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL, - Deoptimization::Reason_predicate); + Deoptimization::Reason_predicate, + iff->Opcode()); Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0); BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool(); @@ -797,8 +811,8 @@ // lower_bound test will dominate the upper bound test and all // cloned or created nodes will use the lower bound test as // their declared control. - ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate); - ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate); + ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, iff->Opcode()); + ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, iff->Opcode()); assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/loopTransform.cpp --- a/hotspot/src/share/vm/opto/loopTransform.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/loopTransform.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -290,7 +290,7 @@ if (ctrl->is_top()) return false; // Found dead test on live IF? No peeling! // Standard IF only has one input value to check for loop invariance - assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added"); + assert(test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_RangeCheck, "Check this code when new subtype is added"); // Condition is not a member of this loop? if( !is_member(phase->get_loop(ctrl)) && is_loop_exit(test) ) @@ -792,8 +792,10 @@ return false; } - if(cl->do_unroll_only()) { - NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct)); + if (cl->do_unroll_only()) { + if (TraceSuperWordLoopUnrollAnalysis) { + tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct); + } } // Unroll once! (Each trip will soon do double iterations) @@ -818,7 +820,9 @@ if (slp_max_unroll_factor >= future_unroll_ct) { int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor; if (new_limit > LoopUnrollLimit) { - NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit)); + if (TraceSuperWordLoopUnrollAnalysis) { + tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit); + } _local_loop_unroll_limit = new_limit; } } @@ -856,7 +860,8 @@ // loop-invariant. for (uint i = 0; i < _body.size(); i++) { Node *iff = _body[i]; - if (iff->Opcode() == Op_If) { // Test? + if (iff->Opcode() == Op_If || + iff->Opcode() == Op_RangeCheck) { // Test? // Comparing trip+off vs limit Node *bol = iff->in(1); @@ -2035,8 +2040,8 @@ // loop-invariant. for( uint i = 0; i < loop->_body.size(); i++ ) { Node *iff = loop->_body[i]; - if( iff->Opcode() == Op_If ) { // Test? - + if (iff->Opcode() == Op_If || + iff->Opcode() == Op_RangeCheck) { // Test? // Test is an IfNode, has 2 projections. If BOTH are in the loop // we need loop unswitching instead of iteration splitting. Node *exit = loop->is_loop_exit(iff); @@ -2119,10 +2124,9 @@ conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck; } } else { -#ifndef PRODUCT - if( PrintOpto ) + if (PrintOpto) { tty->print_cr("missed RCE opportunity"); -#endif + } continue; // In release mode, ignore it } } else { // Otherwise work on normal compares @@ -2157,10 +2161,9 @@ } break; default: -#ifndef PRODUCT - if( PrintOpto ) + if (PrintOpto) { tty->print_cr("missed RCE opportunity"); -#endif + } continue; // Unhandled case } } @@ -2504,9 +2507,7 @@ return false; } if (should_peel) { // Should we peel? -#ifndef PRODUCT - if (PrintOpto) tty->print_cr("should_peel"); -#endif + if (PrintOpto) { tty->print_cr("should_peel"); } phase->do_peeling(this,old_new); } else if (should_unswitch) { phase->do_unswitching(this, old_new); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/loopUnswitch.cpp --- a/hotspot/src/share/vm/opto/loopUnswitch.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/loopUnswitch.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -132,7 +132,7 @@ head->as_CountedLoop()->set_normal_loop(); } - ProjNode* proj_true = create_slow_version_of_loop(loop, old_new); + ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode()); #ifdef ASSERT Node* uniqc = proj_true->unique_ctrl_out(); @@ -222,7 +222,8 @@ // and inserting an if to select fast-slow versions. // Return control projection of the entry to the fast version. ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop, - Node_List &old_new) { + Node_List &old_new, + int opcode) { LoopNode* head = loop->_head->as_Loop(); bool counted_loop = head->is_CountedLoop(); Node* entry = head->in(LoopNode::EntryControl); @@ -235,7 +236,8 @@ register_node(opq, outer_loop, entry, dom_depth(entry)); Node *bol = new Conv2BNode(opq); register_node(bol, outer_loop, entry, dom_depth(entry)); - IfNode* iff = new IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN); + IfNode* iff = (opcode == Op_RangeCheck) ? new RangeCheckNode(entry, bol, PROB_MAX, COUNT_UNKNOWN) : + new IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN); register_node(iff, outer_loop, entry, dom_depth(entry)); ProjNode* iffast = new IfTrueNode(iff); register_node(iffast, outer_loop, iff, dom_depth(iff)); @@ -359,16 +361,22 @@ } if(!_lpt->_head->is_CountedLoop()) { - NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not counted loop", _lpt->_head->_idx);}) + if (TraceLoopOpts) { + tty->print_cr("CountedLoopReserveKit::create_reserve: %d not counted loop", _lpt->_head->_idx); + } return false; } CountedLoopNode *cl = _lpt->_head->as_CountedLoop(); if (!cl->is_valid_counted_loop()) { - NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not valid counted loop", cl->_idx);}) + if (TraceLoopOpts) { + tty->print_cr("CountedLoopReserveKit::create_reserve: %d not valid counted loop", cl->_idx); + } return false; // skip malformed counted loop } if (!cl->is_main_loop()) { - NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not main loop", cl->_idx);}) + if (TraceLoopOpts) { + tty->print_cr("CountedLoopReserveKit::create_reserve: %d not main loop", cl->_idx); + } return false; // skip normal, pre, and post loops } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/loopnode.cpp --- a/hotspot/src/share/vm/opto/loopnode.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/loopnode.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -329,6 +329,9 @@ Node* phi_incr = NULL; // Trip-counter increment must be commutative & associative. + if (incr->Opcode() == Op_CastII) { + incr = incr->in(1); + } if (incr->is_Phi()) { if (incr->as_Phi()->region() != x || incr->req() != 3) return false; // Not simple trip counter expression @@ -356,6 +359,9 @@ xphi = stride; stride = tmp; } + if (xphi->Opcode() == Op_CastII) { + xphi = xphi->in(1); + } // Stride must be constant int stride_con = stride->get_int(); if (stride_con == 0) @@ -2397,11 +2403,9 @@ // After that switch predicates off and do more loop optimizations. if (!C->major_progress() && (C->predicate_count() > 0)) { C->cleanup_loop_predicates(_igvn); -#ifndef PRODUCT if (TraceLoopOpts) { tty->print_cr("PredicatesOff"); } -#endif C->set_major_progress(); } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/loopnode.hpp --- a/hotspot/src/share/vm/opto/loopnode.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/loopnode.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -916,7 +916,8 @@ // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, - Deoptimization::DeoptReason reason); + Deoptimization::DeoptReason reason, + int opcode); void register_control(Node* n, IdealLoopTree *loop, Node* pred); // Clone loop predicates to cloned loops (peeled, unswitched) @@ -966,7 +967,8 @@ // Create a slow version of the loop by cloning the loop // and inserting an if to select fast-slow versions. ProjNode* create_slow_version_of_loop(IdealLoopTree *loop, - Node_List &old_new); + Node_List &old_new, + int opcode); // Clone a loop and return the clone head (clone_loop_head). // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse, diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/loopopts.cpp --- a/hotspot/src/share/vm/opto/loopopts.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/loopopts.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -199,14 +199,11 @@ // IGVN worklist for later cleanup. Move control-dependent data Nodes on the // live path up to the dominating control. void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) { -#ifndef PRODUCT - if (VerifyLoopOptimizations && PrintOpto) tty->print_cr("dominating test"); -#endif - + if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); } // prevdom is the dominating projection of the dominating test. assert( iff->is_If(), "" ); - assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added"); + assert(iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd || iff->Opcode() == Op_RangeCheck, "Check this code when new subtype is added"); int pop = prevdom->Opcode(); assert( pop == Op_IfFalse || pop == Op_IfTrue, "" ); if (flip) { @@ -617,9 +614,7 @@ } } if (phi == NULL) break; -#ifndef PRODUCT - if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV"); -#endif + if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); } // Move speculative ops for (uint j = 1; j < region->req(); j++) { Node *proj = region->in(j); @@ -963,10 +958,9 @@ } int nodes_left = C->max_node_limit() - C->live_nodes(); if (weight * 8 > nodes_left) { -#ifndef PRODUCT - if (PrintOpto) + if (PrintOpto) { tty->print_cr("*** Split-if bails out: %d nodes, region weight %d", C->unique(), weight); -#endif + } return true; } else { return false; @@ -1123,7 +1117,8 @@ int n_op = n->Opcode(); // Check for an IF being dominated by another IF same test - if (n_op == Op_If) { + if (n_op == Op_If || + n_op == Op_RangeCheck) { Node *bol = n->in(1); uint max = bol->outcnt(); // Check for same test used more than once? @@ -1489,14 +1484,12 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd, Node* side_by_side_idom) { -#ifndef PRODUCT if (C->do_vector_loop() && PrintOpto) { const char* mname = C->method()->name()->as_quoted_ascii(); if (mname != NULL) { tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname); } } -#endif CloneMap& cm = C->clone_map(); Dict* dict = cm.dict(); @@ -1945,7 +1938,10 @@ BoolNode* bol = new BoolNode(cmp, relop); register_node(bol, loop, proj2, ddepth); - IfNode* new_if = new IfNode(proj2, bol, iff->_prob, iff->_fcnt); + int opcode = iff->Opcode(); + assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode"); + IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt): + new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt); register_node(new_if, loop, proj2, ddepth); proj->set_req(0, new_if); // reattach diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/machnode.cpp --- a/hotspot/src/share/vm/opto/machnode.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/machnode.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -433,36 +433,49 @@ if (is_MachTemp()) return true; uint r = rule(); // Match rule - if( r < Matcher::_begin_rematerialize || - r >= Matcher::_end_rematerialize ) + if (r < Matcher::_begin_rematerialize || + r >= Matcher::_end_rematerialize) { return false; + } // For 2-address instructions, the input live range is also the output - // live range. Remateralizing does not make progress on the that live range. - if( two_adr() ) return false; + // live range. Remateralizing does not make progress on the that live range. + if (two_adr()) return false; // Check for rematerializing float constants, or not - if( !Matcher::rematerialize_float_constants ) { + if (!Matcher::rematerialize_float_constants) { int op = ideal_Opcode(); - if( op == Op_ConF || op == Op_ConD ) + if (op == Op_ConF || op == Op_ConD) { return false; + } + } + + // Defining flags - can't spill these! Must remateralize. + if (ideal_reg() == Op_RegFlags) { + return true; } - // Defining flags - can't spill these! Must remateralize. - if( ideal_reg() == Op_RegFlags ) - return true; + // Stretching lots of inputs - don't do it. + if (req() > 2) { + return false; + } - // Stretching lots of inputs - don't do it. - if( req() > 2 ) - return false; + if (req() == 2 && in(1) && in(1)->ideal_reg() == Op_RegFlags) { + // In(1) will be rematerialized, too. + // Stretching lots of inputs - don't do it. + if (in(1)->req() > 2) { + return false; + } + } // Don't remateralize somebody with bound inputs - it stretches a // fixed register lifetime. uint idx = oper_input_base(); if (req() > idx) { const RegMask &rm = in_RegMask(idx); - if (rm.is_bound(ideal_reg())) + if (rm.is_bound(ideal_reg())) { return false; + } } return true; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/machnode.hpp --- a/hotspot/src/share/vm/opto/machnode.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/machnode.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -578,8 +578,8 @@ #ifndef PRODUCT - virtual const char *Name() const { - switch (_spill_type) { + static const char *spill_type(SpillType st) { + switch (st) { case TwoAddress: return "TwoAddressSpillCopy"; case PhiInput: @@ -612,6 +612,10 @@ } } + virtual const char *Name() const { + return spill_type(_spill_type); + } + virtual void format( PhaseRegAlloc *, outputStream *st ) const; #endif }; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/matcher.cpp --- a/hotspot/src/share/vm/opto/matcher.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/matcher.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1569,13 +1569,11 @@ // Can NOT include the match of a subtree when its memory state // is used by any of the other subtrees (input_mem == NodeSentinel) ) { -#ifndef PRODUCT // Print when we exclude matching due to different memory states at input-loads - if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel) - && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) { + if (PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel) + && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem)) { tty->print_cr("invalid input_mem"); } -#endif // Switch to a register-only opcode; this value must be in a register // and cannot be subsumed as part of a larger instruction. s->DFA( m->ideal_reg(), m ); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/matcher.hpp --- a/hotspot/src/share/vm/opto/matcher.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/matcher.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -269,6 +269,10 @@ // should generate this one. static const bool match_rule_supported(int opcode); + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + static const bool match_rule_supported_vector(int opcode, int vlen); + // Some uarchs have different sized float register resources static const int float_pressure(int default_pressure_threshold); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/memnode.cpp --- a/hotspot/src/share/vm/opto/memnode.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/memnode.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -72,8 +72,15 @@ dump_adr_type(this, _adr_type, st); Compile* C = Compile::current(); - if( C->alias_type(_adr_type)->is_volatile() ) + if (C->alias_type(_adr_type)->is_volatile()) { st->print(" Volatile!"); + } + if (_unaligned_access) { + st->print(" unaligned"); + } + if (_mismatched_access) { + st->print(" mismatched"); + } } void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) { @@ -754,7 +761,8 @@ //----------------------------LoadNode::make----------------------------------- // Polymorphic factory method: -Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo, ControlDependency control_dependency) { +Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo, + ControlDependency control_dependency, bool unaligned, bool mismatched) { Compile* C = gvn.C; // sanity check the alias category against the created node type @@ -769,40 +777,68 @@ // oop will be recorded in oop map if load crosses safepoint rt->isa_oopptr() || is_immutable_value(adr), "raw memory operations should have control edge"); + LoadNode* load = NULL; switch (bt) { - case T_BOOLEAN: return new LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); - case T_BYTE: return new LoadBNode (ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); - case T_INT: return new LoadINode (ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); - case T_CHAR: return new LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); - case T_SHORT: return new LoadSNode (ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); - case T_LONG: return new LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency); - case T_FLOAT: return new LoadFNode (ctl, mem, adr, adr_type, rt, mo, control_dependency); - case T_DOUBLE: return new LoadDNode (ctl, mem, adr, adr_type, rt, mo, control_dependency); - case T_ADDRESS: return new LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(), mo, control_dependency); + case T_BOOLEAN: load = new LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); break; + case T_BYTE: load = new LoadBNode (ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); break; + case T_INT: load = new LoadINode (ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); break; + case T_CHAR: load = new LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); break; + case T_SHORT: load = new LoadSNode (ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); break; + case T_LONG: load = new LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency); break; + case T_FLOAT: load = new LoadFNode (ctl, mem, adr, adr_type, rt, mo, control_dependency); break; + case T_DOUBLE: load = new LoadDNode (ctl, mem, adr, adr_type, rt, mo, control_dependency); break; + case T_ADDRESS: load = new LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(), mo, control_dependency); break; case T_OBJECT: #ifdef _LP64 if (adr->bottom_type()->is_ptr_to_narrowoop()) { - Node* load = gvn.transform(new LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo, control_dependency)); - return new DecodeNNode(load, load->bottom_type()->make_ptr()); + load = new LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo, control_dependency); } else #endif { assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop"); - return new LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo, control_dependency); + load = new LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo, control_dependency); } + break; + } + assert(load != NULL, "LoadNode should have been created"); + if (unaligned) { + load->set_unaligned_access(); } - ShouldNotReachHere(); - return (LoadNode*)NULL; + if (mismatched) { + load->set_mismatched_access(); + } + if (load->Opcode() == Op_LoadN) { + Node* ld = gvn.transform(load); + return new DecodeNNode(ld, ld->bottom_type()->make_ptr()); + } + + return load; } -LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, ControlDependency control_dependency) { +LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, + ControlDependency control_dependency, bool unaligned, bool mismatched) { bool require_atomic = true; - return new LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic); + LoadLNode* load = new LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic); + if (unaligned) { + load->set_unaligned_access(); + } + if (mismatched) { + load->set_mismatched_access(); + } + return load; } -LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, ControlDependency control_dependency) { +LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, + ControlDependency control_dependency, bool unaligned, bool mismatched) { bool require_atomic = true; - return new LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic); + LoadDNode* load = new LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic); + if (unaligned) { + load->set_unaligned_access(); + } + if (mismatched) { + load->set_mismatched_access(); + } + return load; } @@ -2393,7 +2429,8 @@ st->Opcode() == Op_StoreVector || Opcode() == Op_StoreVector || phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw || - (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI), // expanded ClearArrayNode + (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI) || // expanded ClearArrayNode + (is_mismatched_access() || st->as_Store()->is_mismatched_access()), "no mismatched stores, except on raw memory: %s %s", NodeClassNames[Opcode()], NodeClassNames[st->Opcode()]); if (st->in(MemNode::Address)->eqv_uncast(address) && @@ -3213,6 +3250,9 @@ // within the initialized memory. intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseTransform* phase, bool can_reshape) { const int FAIL = 0; + if (st->is_unaligned_access()) { + return FAIL; + } if (st->req() != MemNode::ValueIn + 1) return FAIL; // an inscrutable StoreNode (card mark?) Node* ctl = st->in(MemNode::Control); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/memnode.hpp --- a/hotspot/src/share/vm/opto/memnode.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/memnode.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -39,11 +39,14 @@ //------------------------------MemNode---------------------------------------- // Load or Store, possibly throwing a NULL pointer exception class MemNode : public Node { +private: + bool _unaligned_access; // Unaligned access from unsafe + bool _mismatched_access; // Mismatched access from unsafe: byte read in integer array for instance protected: #ifdef ASSERT const TypePtr* _adr_type; // What kind of memory is being addressed? #endif - virtual uint size_of() const; // Size is bigger (ASSERT only) + virtual uint size_of() const; public: enum { Control, // When is it safe to do this load? Memory, // Chunk of memory is being loaded from @@ -57,17 +60,17 @@ } MemOrd; protected: MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at ) - : Node(c0,c1,c2 ) { + : Node(c0,c1,c2 ), _unaligned_access(false), _mismatched_access(false) { init_class_id(Class_Mem); debug_only(_adr_type=at; adr_type();) } MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 ) - : Node(c0,c1,c2,c3) { + : Node(c0,c1,c2,c3), _unaligned_access(false), _mismatched_access(false) { init_class_id(Class_Mem); debug_only(_adr_type=at; adr_type();) } MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4) - : Node(c0,c1,c2,c3,c4) { + : Node(c0,c1,c2,c3,c4), _unaligned_access(false), _mismatched_access(false) { init_class_id(Class_Mem); debug_only(_adr_type=at; adr_type();) } @@ -127,6 +130,11 @@ // the given memory state? (The state may or may not be in(Memory).) Node* can_see_stored_value(Node* st, PhaseTransform* phase) const; + void set_unaligned_access() { _unaligned_access = true; } + bool is_unaligned_access() const { return _unaligned_access; } + void set_mismatched_access() { _mismatched_access = true; } + bool is_mismatched_access() const { return _mismatched_access; } + #ifndef PRODUCT static void dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st); virtual void dump_spec(outputStream *st) const; @@ -190,9 +198,10 @@ } // Polymorphic factory method: - static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr, - const TypePtr* at, const Type *rt, BasicType bt, - MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest); + static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr, + const TypePtr* at, const Type *rt, BasicType bt, + MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, + bool unaligned = false, bool mismatched = false); virtual uint hash() const; // Check the type @@ -367,7 +376,8 @@ virtual BasicType memory_type() const { return T_LONG; } bool require_atomic_access() const { return _require_atomic_access; } static LoadLNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, - const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest); + const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, + bool unaligned = false, bool mismatched = false); #ifndef PRODUCT virtual void dump_spec(outputStream *st) const { LoadNode::dump_spec(st); @@ -418,7 +428,8 @@ virtual BasicType memory_type() const { return T_DOUBLE; } bool require_atomic_access() const { return _require_atomic_access; } static LoadDNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, - const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest); + const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, + bool unaligned = false, bool mismatched = false); #ifndef PRODUCT virtual void dump_spec(outputStream *st) const { LoadNode::dump_spec(st); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/movenode.cpp --- a/hotspot/src/share/vm/opto/movenode.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/movenode.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -230,9 +230,7 @@ // Convert to a bool (flipped) // Build int->bool conversion -#ifndef PRODUCT - if( PrintOpto ) tty->print_cr("CMOV to I2B"); -#endif + if (PrintOpto) { tty->print_cr("CMOV to I2B"); } Node *n = new Conv2BNode( cmp->in(1) ); if( flip ) n = new XorINode( phase->transform(n), phase->intcon(1) ); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/multnode.cpp --- a/hotspot/src/share/vm/opto/multnode.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/multnode.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -44,14 +44,14 @@ //------------------------------proj_out--------------------------------------- // Get a named projection ProjNode* MultiNode::proj_out(uint which_proj) const { - assert(Opcode() != Op_If || which_proj == (uint)true || which_proj == (uint)false, "must be 1 or 0"); - assert(Opcode() != Op_If || outcnt() == 2, "bad if #1"); + assert((Opcode() != Op_If && Opcode() != Op_RangeCheck) || which_proj == (uint)true || which_proj == (uint)false, "must be 1 or 0"); + assert((Opcode() != Op_If && Opcode() != Op_RangeCheck) || outcnt() == 2, "bad if #1"); for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) { Node *p = fast_out(i); if (p->is_Proj()) { ProjNode *proj = p->as_Proj(); if (proj->_con == which_proj) { - assert(Opcode() != Op_If || proj->Opcode() == (which_proj?Op_IfTrue:Op_IfFalse), "bad if #2"); + assert((Opcode() != Op_If && Opcode() != Op_RangeCheck) || proj->Opcode() == (which_proj ? Op_IfTrue : Op_IfFalse), "bad if #2"); return proj; } } else { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/node.hpp --- a/hotspot/src/share/vm/opto/node.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/node.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -125,6 +125,7 @@ class PhiNode; class Pipeline; class ProjNode; +class RangeCheckNode; class RegMask; class RegionNode; class RootNode; @@ -584,6 +585,7 @@ DEFINE_CLASS_ID(Jump, PCTable, 1) DEFINE_CLASS_ID(If, MultiBranch, 1) DEFINE_CLASS_ID(CountedLoopEnd, If, 0) + DEFINE_CLASS_ID(RangeCheck, If, 1) DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2) DEFINE_CLASS_ID(Start, Multi, 2) DEFINE_CLASS_ID(MemBar, Multi, 3) @@ -758,6 +760,7 @@ DEFINE_CLASS_QUERY(FastLock) DEFINE_CLASS_QUERY(FastUnlock) DEFINE_CLASS_QUERY(If) + DEFINE_CLASS_QUERY(RangeCheck) DEFINE_CLASS_QUERY(IfFalse) DEFINE_CLASS_QUERY(IfTrue) DEFINE_CLASS_QUERY(Initialize) diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/output.cpp --- a/hotspot/src/share/vm/opto/output.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/output.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -91,13 +91,10 @@ } // Break before main entry point - if( (_method && C->directive()->BreakAtExecuteOption) -#ifndef PRODUCT - ||(OptoBreakpoint && is_method_compilation()) - ||(OptoBreakpointOSR && is_osr_compilation()) - ||(OptoBreakpointC2R && !_method) -#endif - ) { + if ((_method && C->directive()->BreakAtExecuteOption) || + (OptoBreakpoint && is_method_compilation()) || + (OptoBreakpointOSR && is_osr_compilation()) || + (OptoBreakpointC2R && !_method) ) { // checking for _method means that OptoBreakpoint does not apply to // runtime stubs or frame converters _cfg->insert( entry, 1, new MachBreakpointNode() ); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/parse1.cpp --- a/hotspot/src/share/vm/opto/parse1.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/parse1.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -958,25 +958,22 @@ PPC64_ONLY(wrote_volatile() ||) (AlwaysSafeConstructors && wrote_fields()))) { _exits.insert_mem_bar(Op_MemBarRelease, alloc_with_final()); -#ifndef PRODUCT if (PrintOpto && (Verbose || WizardMode)) { method()->print_name(); tty->print_cr(" writes finals and needs a memory barrier"); } -#endif } - // Any method can write a @Stable field; insert memory barriers after - // those also. If there is a predecessor allocation node, bind the - // barrier there. + // Any method can write a @Stable field; insert memory barriers + // after those also. Can't bind predecessor allocation node (if any) + // with barrier because allocation doesn't always dominate + // MemBarRelease. if (wrote_stable()) { - _exits.insert_mem_bar(Op_MemBarRelease, alloc_with_final()); -#ifndef PRODUCT + _exits.insert_mem_bar(Op_MemBarRelease); if (PrintOpto && (Verbose || WizardMode)) { method()->print_name(); tty->print_cr(" writes @Stable and needs a memory barrier"); } -#endif } for (MergeMemStream mms(_exits.merged_memory()); mms.next_non_empty(); ) { @@ -991,13 +988,18 @@ // In case of concurrent class loading, the type we set for the // ret_phi in build_exits() may have been too optimistic and the // ret_phi may be top now. -#ifdef ASSERT + // Otherwise, we've encountered an error and have to mark the method as + // not compilable. Just using an assertion instead would be dangerous + // as this could lead to an infinite compile loop in non-debug builds. { MutexLockerEx ml(Compile_lock, Mutex::_no_safepoint_check_flag); - assert(ret_type->isa_ptr() && C->env()->system_dictionary_modification_counter_changed(), "return value must be well defined"); + if (C->env()->system_dictionary_modification_counter_changed()) { + C->record_failure(C2Compiler::retry_class_loading_during_parsing()); + } else { + C->record_method_not_compilable("Can't determine return type."); + } } -#endif - C->record_failure(C2Compiler::retry_class_loading_during_parsing()); + return; } _exits.push_node(ret_type->basic_type(), ret_phi); } @@ -2147,15 +2149,24 @@ // here. Node* phi = _exits.argument(0); const TypeInstPtr *tr = phi->bottom_type()->isa_instptr(); - if( tr && tr->klass()->is_loaded() && - tr->klass()->is_interface() ) { + if (tr && tr->klass()->is_loaded() && + tr->klass()->is_interface()) { const TypeInstPtr *tp = value->bottom_type()->isa_instptr(); if (tp && tp->klass()->is_loaded() && !tp->klass()->is_interface()) { // sharpen the type eagerly; this eases certain assert checking if (tp->higher_equal(TypeInstPtr::NOTNULL)) tr = tr->join_speculative(TypeInstPtr::NOTNULL)->is_instptr(); - value = _gvn.transform(new CheckCastPPNode(0,value,tr)); + value = _gvn.transform(new CheckCastPPNode(0, value, tr)); + } + } else { + // Also handle returns of oop-arrays to an arrays-of-interface return + const TypeInstPtr* phi_tip; + const TypeInstPtr* val_tip; + Type::get_arrays_base_elements(phi->bottom_type(), value->bottom_type(), &phi_tip, &val_tip); + if (phi_tip != NULL && phi_tip->is_loaded() && phi_tip->klass()->is_interface() && + val_tip != NULL && val_tip->is_loaded() && !val_tip->klass()->is_interface()) { + value = _gvn.transform(new CheckCastPPNode(0, value, phi->bottom_type())); } } phi->add_req(value); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/parse2.cpp --- a/hotspot/src/share/vm/opto/parse2.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/parse2.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -136,8 +136,16 @@ BoolTest::mask btest = BoolTest::lt; tst = _gvn.transform( new BoolNode(chk, btest) ); } + RangeCheckNode* rc = new RangeCheckNode(control(), tst, PROB_MAX, COUNT_UNKNOWN); + _gvn.set_type(rc, rc->Value(&_gvn)); + if (!tst->is_Con()) { + record_for_igvn(rc); + } + set_control(_gvn.transform(new IfTrueNode(rc))); // Branch to failure if out of bounds - { BuildCutout unless(this, tst, PROB_MAX); + { + PreserveJVMState pjvms(this); + set_control(_gvn.transform(new IfFalseNode(rc))); if (C->allow_range_check_smearing()) { // Do not use builtin_throw, since range checks are sometimes // made more stringent by an optimistic transformation. @@ -940,13 +948,11 @@ //-------------------------------repush_if_args-------------------------------- // Push arguments of an "if" bytecode back onto the stack by adjusting _sp. inline int Parse::repush_if_args() { -#ifndef PRODUCT if (PrintOpto && WizardMode) { tty->print("defending against excessive implicit null exceptions on %s @%d in ", Bytecodes::name(iter().cur_bc()), iter().cur_bci()); method()->print_name(); tty->cr(); } -#endif int bc_depth = - Bytecodes::depth(iter().cur_bc()); assert(bc_depth == 1 || bc_depth == 2, "only two kinds of branches"); DEBUG_ONLY(sync_jvms()); // argument(n) requires a synced jvms @@ -967,10 +973,9 @@ float prob = branch_prediction(cnt, btest, target_bci, c); if (prob == PROB_UNKNOWN) { // (An earlier version of do_ifnull omitted this trap for OSR methods.) -#ifndef PRODUCT - if (PrintOpto && Verbose) - tty->print_cr("Never-taken edge stops compilation at bci %d",bci()); -#endif + if (PrintOpto && Verbose) { + tty->print_cr("Never-taken edge stops compilation at bci %d", bci()); + } repush_if_args(); // to gather stats on loop // We need to mark this branch as taken so that if we recompile we will // see that it is possible. In the tiered system the interpreter doesn't @@ -1049,10 +1054,9 @@ float untaken_prob = 1.0 - prob; if (prob == PROB_UNKNOWN) { -#ifndef PRODUCT - if (PrintOpto && Verbose) - tty->print_cr("Never-taken edge stops compilation at bci %d",bci()); -#endif + if (PrintOpto && Verbose) { + tty->print_cr("Never-taken edge stops compilation at bci %d", bci()); + } repush_if_args(); // to gather stats on loop // We need to mark this branch as taken so that if we recompile we will // see that it is possible. In the tiered system the interpreter doesn't diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/parse3.cpp --- a/hotspot/src/share/vm/opto/parse3.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/parse3.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -213,11 +213,9 @@ // not need to mention the class index, since the class will // already have been loaded if we ever see a non-null value.) // uncommon_trap(iter().get_field_signature_index()); -#ifndef PRODUCT if (PrintOpto && (Verbose || WizardMode)) { method()->print_name(); tty->print_cr(" asserting nullness of field at bci: %d", bci()); } -#endif if (C->log() != NULL) { C->log()->elem("assert_null reason='field' klass='%d'", C->log()->identify(field->type())); @@ -313,9 +311,8 @@ // Preserve allocation ptr to create precedent edge to it in membar // generated on exit from constructor. - if (C->eliminate_boxing() && - adr_type->isa_oopptr() && adr_type->is_oopptr()->is_ptr_to_boxed_value() && - AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) { + // Can't bind stable with its allocation, only record allocation for final field. + if (field->is_final() && AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) { set_alloc_with_final(obj); } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/phaseX.hpp --- a/hotspot/src/share/vm/opto/phaseX.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/phaseX.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -521,7 +521,8 @@ Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check); // Create a new if below new_entry for the predicate to be cloned ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, - Deoptimization::DeoptReason reason); + Deoptimization::DeoptReason reason, + int opcode); void remove_speculative_types(); void check_no_speculative_types() { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/reg_split.cpp --- a/hotspot/src/share/vm/opto/reg_split.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/reg_split.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -55,13 +55,15 @@ // Get a SpillCopy node with wide-enough masks. Use the 'wide-mask', the // wide ideal-register spill-mask if possible. If the 'wide-mask' does // not cover the input (or output), use the input (or output) mask instead. -Node *PhaseChaitin::get_spillcopy_wide(MachSpillCopyNode::SpillType spill_type, Node *def, Node *use, uint uidx ) { +Node *PhaseChaitin::get_spillcopy_wide(MachSpillCopyNode::SpillType spill_type, Node *def, Node *use, uint uidx) { // If ideal reg doesn't exist we've got a bad schedule happening // that is forcing us to spill something that isn't spillable. // Bail rather than abort int ireg = def->ideal_reg(); - if( ireg == 0 || ireg == Op_RegFlags ) { - assert(false, "attempted to spill a non-spillable item"); + if (ireg == 0 || ireg == Op_RegFlags) { + assert(false, "attempted to spill a non-spillable item: %d: %s <- %d: %s, ireg = %d, spill_type: %s", + def->_idx, def->Name(), use->_idx, use->Name(), ireg, + MachSpillCopyNode::spill_type(spill_type)); C->record_method_not_compilable("attempted to spill a non-spillable item"); return NULL; } @@ -308,14 +310,16 @@ //------------------------------split_Rematerialize---------------------------- // Clone a local copy of the def. -Node *PhaseChaitin::split_Rematerialize( Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray splits, int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru ) { +Node *PhaseChaitin::split_Rematerialize(Node *def, Block *b, uint insidx, uint &maxlrg, + GrowableArray splits, int slidx, uint *lrg2reach, + Node **Reachblock, bool walkThru) { // The input live ranges will be stretched to the site of the new // instruction. They might be stretched past a def and will thus // have the old and new values of the same live range alive at the // same time - a definite no-no. Split out private copies of // the inputs. - if( def->req() > 1 ) { - for( uint i = 1; i < def->req(); i++ ) { + if (def->req() > 1) { + for (uint i = 1; i < def->req(); i++) { Node *in = def->in(i); uint lidx = _lrg_map.live_range_id(in); // We do not need this for live ranges that are only defined once. @@ -327,12 +331,29 @@ Block *b_def = _cfg.get_block_for_node(def); int idx_def = b_def->find_node(def); - Node *in_spill = get_spillcopy_wide(MachSpillCopyNode::InputToRematerialization, in, def, i ); - if( !in_spill ) return 0; // Bailed out - insert_proj(b_def,idx_def,in_spill,maxlrg++); - if( b_def == b ) - insidx++; - def->set_req(i,in_spill); + // Cannot spill Op_RegFlags. + Node *in_spill; + if (in->ideal_reg() != Op_RegFlags) { + in_spill = get_spillcopy_wide(MachSpillCopyNode::InputToRematerialization, in, def, i); + if (!in_spill) { return 0; } // Bailed out + insert_proj(b_def, idx_def, in_spill, maxlrg++); + if (b_def == b) { + insidx++; + } + def->set_req(i, in_spill); + } else { + // The 'in' defines a flag register. Flag registers can not be spilled. + // Register allocation handles live ranges with flag registers + // by rematerializing the def (in this case 'in'). Thus, this is not + // critical if the input can be rematerialized, too. + if (!in->rematerialize()) { + assert(false, "Can not rematerialize %d: %s. Prolongs RegFlags live" + " range and defining node %d: %s may not be rematerialized.", + def->_idx, def->Name(), in->_idx, in->Name()); + C->record_method_not_compilable("attempted to spill a non-spillable item with RegFlags input"); + return 0; // Bailed out + } + } } } @@ -506,10 +527,9 @@ // Initialize the split counts to zero splits.append(0); #endif -#ifndef PRODUCT - if( PrintOpto && WizardMode && lrgs(bidx)._was_spilled1 ) + if (PrintOpto && WizardMode && lrgs(bidx)._was_spilled1) { tty->print_cr("Warning, 2nd spill of L%d",bidx); -#endif + } } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/split_if.cpp --- a/hotspot/src/share/vm/opto/split_if.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/split_if.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -390,13 +390,13 @@ // Found an If getting its condition-code input from a Phi in the same block. // Split thru the Region. void PhaseIdealLoop::do_split_if( Node *iff ) { -#ifndef PRODUCT - if( PrintOpto && VerifyLoopOptimizations ) + if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("Split-if"); + } if (TraceLoopOpts) { tty->print_cr("SplitIf"); } -#endif + C->set_major_progress(); Node *region = iff->in(0); Node *region_dom = idom(region); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/stringopts.cpp --- a/hotspot/src/share/vm/opto/stringopts.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/stringopts.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -1293,7 +1293,7 @@ Node* index = __ SubI(charPos, __ intcon((bt == T_BYTE) ? 1 : 2)); Node* ch = __ AddI(r, __ intcon('0')); Node* st = __ store_to_memory(kit.control(), kit.array_element_address(dst_array, index, T_BYTE), - ch, bt, byte_adr_idx, MemNode::unordered); + ch, bt, byte_adr_idx, MemNode::unordered, (bt != T_BYTE) /* mismatched */); iff = kit.create_and_map_if(head, __ Bool(__ CmpI(q, __ intcon(0)), BoolTest::ne), PROB_FAIR, COUNT_UNKNOWN); @@ -1331,7 +1331,7 @@ } else { Node* index = __ SubI(charPos, __ intcon((bt == T_BYTE) ? 1 : 2)); st = __ store_to_memory(kit.control(), kit.array_element_address(dst_array, index, T_BYTE), - sign, bt, byte_adr_idx, MemNode::unordered); + sign, bt, byte_adr_idx, MemNode::unordered, (bt != T_BYTE) /* mismatched */); final_merge->init_req(merge_index + 1, kit.control()); final_mem->init_req(merge_index + 1, st); @@ -1524,7 +1524,7 @@ } else { val = readChar(src_array, i++); } - __ store(__ ctrl(), adr, __ ConI(val), T_CHAR, byte_adr_idx, MemNode::unordered); + __ store(__ ctrl(), adr, __ ConI(val), T_CHAR, byte_adr_idx, MemNode::unordered, true /* mismatched */); index = __ AddI(index, __ ConI(2)); } if (src_is_byte) { @@ -1612,7 +1612,7 @@ } if (!dcon || !dbyte) { // Destination is UTF16. Store a char. - __ store(__ ctrl(), adr, val, T_CHAR, byte_adr_idx, MemNode::unordered); + __ store(__ ctrl(), adr, val, T_CHAR, byte_adr_idx, MemNode::unordered, true /* mismatched */); __ set(end, __ AddI(start, __ ConI(2))); } if (!dcon) { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/superword.cpp --- a/hotspot/src/share/vm/opto/superword.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/superword.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -276,7 +276,9 @@ // stop looking, we already have the max vector to map to. if (cur_max_vector < local_loop_unroll_factor) { is_slp = false; - NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis fails: unroll limit greater than max vector\n")); + if (TraceSuperWordLoopUnrollAnalysis) { + tty->print_cr("slp analysis fails: unroll limit greater than max vector\n"); + } break; } @@ -389,11 +391,9 @@ if (_do_vector_loop) { if (_packset.length() == 0) { -#ifndef PRODUCT if (TraceSuperWord) { tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway"); } -#endif pack_parallel(); } } @@ -558,9 +558,11 @@ assert(!same_velt_type(s, mem_ref), "sanity"); memops.push(s); } - MemNode* best_align_to_mem_ref = find_align_to_ref(memops); + best_align_to_mem_ref = find_align_to_ref(memops); if (best_align_to_mem_ref == NULL) { - NOT_PRODUCT(if (TraceSuperWord) tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");) + if (TraceSuperWord) { + tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL"); + } break; } best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); @@ -582,12 +584,10 @@ } // while (memops.size() != 0 set_align_to_ref(best_align_to_mem_ref); -#ifndef PRODUCT if (TraceSuperWord) { tty->print_cr("\nAfter find_adjacent_refs"); print_packset(); } -#endif } #ifndef PRODUCT @@ -874,7 +874,7 @@ _dg.make_edge(s1, slice_sink); } } -#ifndef PRODUCT + if (TraceSuperWord) { tty->print_cr("\nDependence graph for slice: %d", n->_idx); for (int q = 0; q < _nlist.length(); q++) { @@ -882,11 +882,10 @@ } tty->cr(); } -#endif + _nlist.clear(); } -#ifndef PRODUCT if (TraceSuperWord) { tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE"); for (int r = 0; r < _disjoint_ptrs.length(); r++) { @@ -895,7 +894,7 @@ } tty->cr(); } -#endif + } //---------------------------mem_slice_preds--------------------------- @@ -912,7 +911,9 @@ if (out->is_Load()) { if (in_bb(out)) { preds.push(out); - NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);) + if (TraceSuperWord && Verbose) { + tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx); + } } } else { // FIXME @@ -931,7 +932,9 @@ }//for if (n == stop) break; preds.push(n); - NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);) + if (TraceSuperWord && Verbose) { + tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx); + } prev = n; assert(n->is_Mem(), "unexpected node %s", n->Name()); n = n->in(MemNode::Memory); @@ -1123,12 +1126,10 @@ } } -#ifndef PRODUCT if (TraceSuperWord) { tty->print_cr("\nAfter extend_packlist"); print_packset(); } -#endif } //------------------------------follow_use_defs--------------------------- @@ -1412,12 +1413,10 @@ } } -#ifndef PRODUCT if (TraceSuperWord) { tty->print_cr("\nAfter combine_packs"); print_packset(); } -#endif } //-----------------------------construct_my_pack_map-------------------------- @@ -2244,10 +2243,15 @@ if (cl->has_passed_slp()) { uint slp_max_unroll_factor = cl->slp_max_unroll(); if (slp_max_unroll_factor == max_vlen) { - NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte)); + if (TraceSuperWordLoopUnrollAnalysis) { + tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte); + } // For atomic unrolled loops which are vector mapped, instigate more unrolling. cl->set_notpassed_slp(); - C->set_major_progress(); + // if vector resources are limited, do not allow additional unrolling + if (FLOATPRESSURE > 8) { + C->set_major_progress(); + } cl->mark_do_unroll_only(); } } @@ -2650,10 +2654,10 @@ } ct++; } while (again); -#ifndef PRODUCT - if (TraceSuperWord && Verbose) + + if (TraceSuperWord && Verbose) { tty->print_cr("compute_max_depth iterated: %d times", ct); -#endif + } } //-------------------------compute_vector_element_type----------------------- @@ -2664,10 +2668,9 @@ // Normally the type of the add is integer, but for packed character // operations the type of the add needs to be char. void SuperWord::compute_vector_element_type() { -#ifndef PRODUCT - if (TraceSuperWord && Verbose) + if (TraceSuperWord && Verbose) { tty->print_cr("\ncompute_velt_type:"); -#endif + } // Initial type for (int i = 0; i < _block.length(); i++) { @@ -2758,7 +2761,9 @@ offset += iv_adjust*p.memory_size(); int off_rem = offset % vw; int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; - NOT_PRODUCT(if(TraceSuperWord && Verbose) tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);) + if (TraceSuperWord && Verbose) { + tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod); + } return off_mod; } @@ -4046,11 +4051,9 @@ }//for (int i... if (_ii_first == -1 || _ii_last == -1) { -#ifndef PRODUCT if (TraceSuperWord && Verbose) { tty->print_cr("SuperWord::mark_generations unknown error, something vent wrong"); } -#endif return -1; // something vent wrong } // collect nodes in the first and last generations @@ -4083,11 +4086,9 @@ }//for if (found == false) { -#ifndef PRODUCT if (TraceSuperWord && Verbose) { tty->print_cr("SuperWord::mark_generations: Cannot build order of iterations - no dependent Store for %d", nd->_idx); } -#endif _ii_order.clear(); return -1; } @@ -4153,11 +4154,10 @@ return true; } -#ifndef PRODUCT if (TraceSuperWord && Verbose) { tty->print_cr("SuperWord::fix_commutative_inputs: cannot fix node %d", fix->_idx); } -#endif + return false; } @@ -4224,11 +4224,9 @@ for (int i = 0; i < _mem_slice_head.length(); i++) { Node* n = _mem_slice_head.at(i); if ( !in_bb(n) || !n->is_Phi() || n->bottom_type() != Type::MEMORY) { -#ifndef PRODUCT if (TraceSuperWord && Verbose) { tty->print_cr("SuperWord::hoist_loads_in_graph: skipping unexpected node n=%d", n->_idx); } -#endif continue; } @@ -4275,11 +4273,10 @@ restart(); // invalidate all basic structures, since we rebuilt the graph -#ifndef PRODUCT if (TraceSuperWord && Verbose) { tty->print_cr("\nSuperWord::hoist_loads_in_graph() the graph was rebuilt, all structures invalidated and need rebuild"); } -#endif + return true; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/type.cpp --- a/hotspot/src/share/vm/opto/type.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/type.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -150,6 +150,33 @@ return bt; } +// For two instance arrays of same dimension, return the base element types. +// Otherwise or if the arrays have different dimensions, return NULL. +void Type::get_arrays_base_elements(const Type *a1, const Type *a2, + const TypeInstPtr **e1, const TypeInstPtr **e2) { + + if (e1) *e1 = NULL; + if (e2) *e2 = NULL; + const TypeAryPtr* a1tap = (a1 == NULL) ? NULL : a1->isa_aryptr(); + const TypeAryPtr* a2tap = (a2 == NULL) ? NULL : a2->isa_aryptr(); + + if (a1tap != NULL && a2tap != NULL) { + // Handle multidimensional arrays + const TypePtr* a1tp = a1tap->elem()->make_ptr(); + const TypePtr* a2tp = a2tap->elem()->make_ptr(); + while (a1tp && a1tp->isa_aryptr() && a2tp && a2tp->isa_aryptr()) { + a1tap = a1tp->is_aryptr(); + a2tap = a2tp->is_aryptr(); + a1tp = a1tap->elem()->make_ptr(); + a2tp = a2tap->elem()->make_ptr(); + } + if (a1tp && a1tp->isa_instptr() && a2tp && a2tp->isa_instptr()) { + if (e1) *e1 = a1tp->is_instptr(); + if (e2) *e2 = a2tp->is_instptr(); + } + } +} + //---------------------------get_typeflow_type--------------------------------- // Import a type produced by ciTypeFlow. const Type* Type::get_typeflow_type(ciType* type) { @@ -2029,7 +2056,11 @@ bool TypeAry::interface_vs_oop(const Type *t) const { const TypeAry* t_ary = t->is_ary(); if (t_ary) { - return _elem->interface_vs_oop(t_ary->_elem); + const TypePtr* this_ptr = _elem->make_ptr(); // In case we have narrow_oops + const TypePtr* t_ptr = t_ary->_elem->make_ptr(); + if(this_ptr != NULL && t_ptr != NULL) { + return this_ptr->interface_vs_oop(t_ptr); + } } return false; } @@ -3134,8 +3165,17 @@ // be 'I' or 'j/l/O'. Thus we'll pick 'j/l/O'. If this then flows // into a Phi which "knows" it's an Interface type we'll have to // uplift the type. - if (!empty() && ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface()) - return kills; // Uplift to interface + if (!empty()) { + if (ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface()) { + return kills; // Uplift to interface + } + // Also check for evil cases of 'this' being a class array + // and 'kills' expecting an array of interfaces. + Type::get_arrays_base_elements(ft, kills, NULL, &ktip); + if (ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface()) { + return kills; // Uplift to array of interface + } + } return Type::TOP; // Canonical empty value } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/type.hpp --- a/hotspot/src/share/vm/opto/type.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/type.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -368,6 +368,11 @@ return _const_basic_type[type]; } + // For two instance arrays of same dimension, return the base element types. + // Otherwise or if the arrays have different dimensions, return NULL. + static void get_arrays_base_elements(const Type *a1, const Type *a2, + const TypeInstPtr **e1, const TypeInstPtr **e2); + // Mapping to the array element's basic type. BasicType array_element_basic_type() const; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/opto/vectornode.cpp --- a/hotspot/src/share/vm/opto/vectornode.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/opto/vectornode.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -188,7 +188,7 @@ (vlen > 1) && is_power_of_2(vlen) && Matcher::vector_size_supported(bt, vlen)) { int vopc = VectorNode::opcode(opc, bt); - return vopc > 0 && Matcher::match_rule_supported(vopc) && (vopc != Op_CMoveD || vlen == 4); + return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen); } return false; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/prims/jni.cpp --- a/hotspot/src/share/vm/prims/jni.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/prims/jni.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -3878,7 +3878,7 @@ unit_test_function_call // Forward declaration -void TestNmethodBucket_test(); +void TestDependencyContext_test(); void test_semaphore(); void TestOS_test(); void TestReservedSpace_test(); @@ -3902,7 +3902,7 @@ void TestBufferingOopClosure_test(); void TestCodeCacheRemSet_test(); void FreeRegionList_test(); -void test_memset_with_concurrent_readers(); +void test_memset_with_concurrent_readers() NOT_DEBUG_RETURN; void TestPredictions_test(); void WorkerDataArray_test(); #endif @@ -3910,7 +3910,7 @@ void execute_internal_vm_tests() { if (ExecuteInternalVMTests) { tty->print_cr("Running internal VM tests"); - run_unit_test(TestNmethodBucket_test()); + run_unit_test(TestDependencyContext_test()); run_unit_test(test_semaphore()); run_unit_test(TestOS_test()); run_unit_test(TestReservedSpace_test()); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/prims/methodHandles.cpp --- a/hotspot/src/share/vm/prims/methodHandles.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/prims/methodHandles.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -27,6 +27,7 @@ #include "classfile/stringTable.hpp" #include "code/codeCache.hpp" #include "code/codeCacheExtensions.hpp" +#include "code/dependencyContext.hpp" #include "compiler/compileBroker.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/oopMapCache.hpp" @@ -945,30 +946,33 @@ return rfill + overflow; } +// Is it safe to remove stale entries from a dependency list? +static bool safe_to_expunge() { + // Since parallel GC threads can concurrently iterate over a dependency + // list during safepoint, it is safe to remove entries only when + // CodeCache lock is held. + return CodeCache_lock->owned_by_self(); +} + void MethodHandles::add_dependent_nmethod(oop call_site, nmethod* nm) { assert_locked_or_safepoint(CodeCache_lock); oop context = java_lang_invoke_CallSite::context(call_site); - nmethodBucket* deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context); - - nmethodBucket* new_deps = nmethodBucket::add_dependent_nmethod(deps, nm); - if (deps != new_deps) { - java_lang_invoke_MethodHandleNatives_CallSiteContext::set_vmdependencies(context, new_deps); - } + DependencyContext deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context); + // Try to purge stale entries on updates. + // Since GC doesn't clean dependency contexts rooted at CallSiteContext objects, + // in order to avoid memory leak, stale entries are purged whenever a dependency list + // is changed (both on addition and removal). Though memory reclamation is delayed, + // it avoids indefinite memory usage growth. + deps.add_dependent_nmethod(nm, /*expunge_stale_entries=*/safe_to_expunge()); } void MethodHandles::remove_dependent_nmethod(oop call_site, nmethod* nm) { assert_locked_or_safepoint(CodeCache_lock); oop context = java_lang_invoke_CallSite::context(call_site); - nmethodBucket* deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context); - - if (nmethodBucket::remove_dependent_nmethod(deps, nm)) { - nmethodBucket* new_deps = nmethodBucket::clean_dependent_nmethods(deps); - if (deps != new_deps) { - java_lang_invoke_MethodHandleNatives_CallSiteContext::set_vmdependencies(context, new_deps); - } - } + DependencyContext deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context); + deps.remove_dependent_nmethod(nm, /*expunge_stale_entries=*/safe_to_expunge()); } void MethodHandles::flush_dependent_nmethods(Handle call_site, Handle target) { @@ -977,21 +981,15 @@ int marked = 0; CallSiteDepChange changes(call_site(), target()); { + No_Safepoint_Verifier nsv; MutexLockerEx mu2(CodeCache_lock, Mutex::_no_safepoint_check_flag); oop context = java_lang_invoke_CallSite::context(call_site()); - nmethodBucket* deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context); - - marked = nmethodBucket::mark_dependent_nmethods(deps, changes); - if (marked > 0) { - nmethodBucket* new_deps = nmethodBucket::clean_dependent_nmethods(deps); - if (deps != new_deps) { - java_lang_invoke_MethodHandleNatives_CallSiteContext::set_vmdependencies(context, new_deps); - } - } + DependencyContext deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context); + marked = deps.mark_dependent_nmethods(changes); } if (marked > 0) { - // At least one nmethod has been marked for deoptimization + // At least one nmethod has been marked for deoptimization. VM_Deoptimize op; VMThread::execute(&op); } @@ -1331,6 +1329,8 @@ } JVM_END +// It is called by a Cleaner object which ensures that dropped CallSites properly +// deallocate their dependency information. JVM_ENTRY(void, MHN_clearCallSiteContext(JNIEnv* env, jobject igcls, jobject context_jh)) { Handle context(THREAD, JNIHandles::resolve_non_null(context_jh)); { @@ -1339,19 +1339,11 @@ int marked = 0; { + No_Safepoint_Verifier nsv; MutexLockerEx mu2(CodeCache_lock, Mutex::_no_safepoint_check_flag); - nmethodBucket* b = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context()); - while(b != NULL) { - nmethod* nm = b->get_nmethod(); - if (b->count() > 0 && nm->is_alive() && !nm->is_marked_for_deoptimization()) { - nm->mark_for_deoptimization(); - marked++; - } - nmethodBucket* next = b->next(); - delete b; - b = next; - } - java_lang_invoke_MethodHandleNatives_CallSiteContext::set_vmdependencies(context(), NULL); // reset context + assert(safe_to_expunge(), "removal is not safe"); + DependencyContext deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context()); + marked = deps.remove_all_dependents(); } if (marked > 0) { // At least one nmethod has been marked for deoptimization diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/runtime/arguments.cpp --- a/hotspot/src/share/vm/runtime/arguments.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/runtime/arguments.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -459,7 +459,7 @@ return flag_name; } -#ifndef PRODUCT +#ifdef ASSERT static bool lookup_special_flag(const char *flag_name, size_t skip_index) { for (size_t i = 0; special_jvm_flags[i].name != NULL; i++) { if ((i != skip_index) && (strcmp(special_jvm_flags[i].name, flag_name) == 0)) { @@ -1468,24 +1468,6 @@ // Enable SegmentedCodeCache if TieredCompilation is enabled and ReservedCodeCacheSize >= 240M if (FLAG_IS_DEFAULT(SegmentedCodeCache) && ReservedCodeCacheSize >= 240*M) { FLAG_SET_ERGO(bool, SegmentedCodeCache, true); - - if (FLAG_IS_DEFAULT(ReservedCodeCacheSize)) { - // Multiply sizes by 5 but fix NonNMethodCodeHeapSize (distribute among non-profiled and profiled code heap) - if (FLAG_IS_DEFAULT(ProfiledCodeHeapSize)) { - FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, ProfiledCodeHeapSize * 5 + NonNMethodCodeHeapSize * 2); - } - if (FLAG_IS_DEFAULT(NonProfiledCodeHeapSize)) { - FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, NonProfiledCodeHeapSize * 5 + NonNMethodCodeHeapSize * 2); - } - // Check consistency of code heap sizes - if ((NonNMethodCodeHeapSize + NonProfiledCodeHeapSize + ProfiledCodeHeapSize) != ReservedCodeCacheSize) { - jio_fprintf(defaultStream::error_stream(), - "Invalid code heap sizes: NonNMethodCodeHeapSize(%dK) + ProfiledCodeHeapSize(%dK) + NonProfiledCodeHeapSize(%dK) = %dK. Must be equal to ReservedCodeCacheSize = %uK.\n", - NonNMethodCodeHeapSize/K, ProfiledCodeHeapSize/K, NonProfiledCodeHeapSize/K, - (NonNMethodCodeHeapSize + ProfiledCodeHeapSize + NonProfiledCodeHeapSize)/K, ReservedCodeCacheSize/K); - vm_exit(1); - } - } } if (!UseInterpreter) { // -Xcomp Tier3InvokeNotifyFreqLog = 0; @@ -2535,18 +2517,11 @@ "Invalid ReservedCodeCacheSize=%dM. Must be at most %uM.\n", ReservedCodeCacheSize/M, CODE_CACHE_SIZE_LIMIT/M); status = false; - } else if (NonNMethodCodeHeapSize < min_code_cache_size){ + } else if (NonNMethodCodeHeapSize < min_code_cache_size) { jio_fprintf(defaultStream::error_stream(), "Invalid NonNMethodCodeHeapSize=%dK. Must be at least %uK.\n", NonNMethodCodeHeapSize/K, min_code_cache_size/K); status = false; - } else if ((!FLAG_IS_DEFAULT(NonNMethodCodeHeapSize) || !FLAG_IS_DEFAULT(ProfiledCodeHeapSize) || !FLAG_IS_DEFAULT(NonProfiledCodeHeapSize)) - && (NonNMethodCodeHeapSize + NonProfiledCodeHeapSize + ProfiledCodeHeapSize) != ReservedCodeCacheSize) { - jio_fprintf(defaultStream::error_stream(), - "Invalid code heap sizes: NonNMethodCodeHeapSize(%dK) + ProfiledCodeHeapSize(%dK) + NonProfiledCodeHeapSize(%dK) = %dK. Must be equal to ReservedCodeCacheSize = %uK.\n", - NonNMethodCodeHeapSize/K, ProfiledCodeHeapSize/K, NonProfiledCodeHeapSize/K, - (NonNMethodCodeHeapSize + ProfiledCodeHeapSize + NonProfiledCodeHeapSize)/K, ReservedCodeCacheSize/K); - status = false; } if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/runtime/deoptimization.cpp --- a/hotspot/src/share/vm/runtime/deoptimization.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/runtime/deoptimization.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -68,7 +68,8 @@ int number_of_frames, intptr_t* frame_sizes, address* frame_pcs, - BasicType return_type) { + BasicType return_type, + int exec_mode) { _size_of_deoptimized_frame = size_of_deoptimized_frame; _caller_adjustment = caller_adjustment; _caller_actual_parameters = caller_actual_parameters; @@ -80,10 +81,11 @@ _initial_info = 0; // PD (x86 only) _counter_temp = 0; - _unpack_kind = 0; + _unpack_kind = exec_mode; _sender_sp_temp = 0; _total_frame_sizes = size_of_frames(); + assert(exec_mode >= 0 && exec_mode < Unpack_LIMIT, "Unexpected exec_mode"); } @@ -128,7 +130,7 @@ // ResetNoHandleMark and HandleMark were removed from it. The actual reallocation // of previously eliminated objects occurs in realloc_objects, which is // called from the method fetch_unroll_info_helper below. -JRT_BLOCK_ENTRY(Deoptimization::UnrollBlock*, Deoptimization::fetch_unroll_info(JavaThread* thread)) +JRT_BLOCK_ENTRY(Deoptimization::UnrollBlock*, Deoptimization::fetch_unroll_info(JavaThread* thread, int exec_mode)) // It is actually ok to allocate handles in a leaf method. It causes no safepoints, // but makes the entry a little slower. There is however a little dance we have to // do in debug mode to get around the NoHandleMark code in the JRT_LEAF macro @@ -142,12 +144,12 @@ } thread->inc_in_deopt_handler(); - return fetch_unroll_info_helper(thread); + return fetch_unroll_info_helper(thread, exec_mode); JRT_END // This is factored, since it is both called from a JRT_LEAF (deoptimization) and a JRT_ENTRY (uncommon_trap) -Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread* thread) { +Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread* thread, int exec_mode) { // Note: there is a safepoint safety issue here. No matter whether we enter // via vanilla deopt or uncommon trap we MUST NOT stop at a safepoint once @@ -186,6 +188,19 @@ assert(vf->is_compiled_frame(), "Wrong frame type"); chunk->push(compiledVFrame::cast(vf)); + ScopeDesc* trap_scope = chunk->at(0)->scope(); + Handle exceptionObject; + if (trap_scope->rethrow_exception()) { + if (PrintDeoptimizationDetails) { + tty->print_cr("Exception to be rethrown in the interpreter for method %s::%s at bci %d", trap_scope->method()->method_holder()->name()->as_C_string(), trap_scope->method()->name()->as_C_string(), trap_scope->bci()); + } + GrowableArray* expressions = trap_scope->expressions(); + guarantee(expressions != NULL && expressions->length() > 0, "must have exception to throw"); + ScopeValue* topOfStack = expressions->top(); + exceptionObject = StackValue::create_stack_value(&deoptee, &map, topOfStack)->get_obj(); + assert(exceptionObject() != NULL, "exception oop can not be null"); + } + bool realloc_failures = false; #if defined(COMPILER2) || INCLUDE_JVMCI @@ -474,13 +489,21 @@ assert(CodeCache::find_blob_unsafe(frame_pcs[0]) != NULL, "bad pc"); #endif // SHARK +#ifdef INCLUDE_JVMCI + if (exceptionObject() != NULL) { + thread->set_exception_oop(exceptionObject()); + exec_mode = Unpack_exception; + } +#endif + UnrollBlock* info = new UnrollBlock(array->frame_size() * BytesPerWord, caller_adjustment * BytesPerWord, caller_was_method_handle ? 0 : callee_parameters, number_of_frames, frame_sizes, frame_pcs, - return_type); + return_type, + exec_mode); // On some platforms, we need a way to pass some platform dependent // information to the unpacking code so the skeletal frames come out // correct (initial fp value, unextended sp, ...) @@ -1495,18 +1518,6 @@ #endif Bytecodes::Code trap_bc = trap_method->java_code_at(trap_bci); - - if (trap_scope->rethrow_exception()) { - if (PrintDeoptimizationDetails) { - tty->print_cr("Exception to be rethrown in the interpreter for method %s::%s at bci %d", trap_method->method_holder()->name()->as_C_string(), trap_method->name()->as_C_string(), trap_bci); - } - GrowableArray* expressions = trap_scope->expressions(); - guarantee(expressions != NULL, "must have exception to throw"); - ScopeValue* topOfStack = expressions->top(); - Handle topOfStackObj = StackValue::create_stack_value(&fr, ®_map, topOfStack)->get_obj(); - THREAD->set_pending_exception(topOfStackObj(), NULL, 0); - } - // Record this event in the histogram. gather_statistics(reason, action, trap_bc); @@ -1985,7 +1996,7 @@ ignore_maybe_prior_recompile); } -Deoptimization::UnrollBlock* Deoptimization::uncommon_trap(JavaThread* thread, jint trap_request) { +Deoptimization::UnrollBlock* Deoptimization::uncommon_trap(JavaThread* thread, jint trap_request, jint exec_mode) { if (TraceDeoptimization) { tty->print("Uncommon trap "); } @@ -1994,7 +2005,7 @@ // This enters VM and may safepoint uncommon_trap_inner(thread, trap_request); } - return fetch_unroll_info_helper(thread); + return fetch_unroll_info_helper(thread, exec_mode); } // Local derived constants. diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/runtime/deoptimization.hpp --- a/hotspot/src/share/vm/runtime/deoptimization.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/runtime/deoptimization.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -123,7 +123,8 @@ Unpack_deopt = 0, // normal deoptimization, use pc computed in unpack_vframe_on_stack Unpack_exception = 1, // exception is pending Unpack_uncommon_trap = 2, // redo last byte code (C2 only) - Unpack_reexecute = 3 // reexecute bytecode (C1 only) + Unpack_reexecute = 3, // reexecute bytecode (C1 only) + Unpack_LIMIT = 4 }; // Checks all compiled methods. Invalid methods are deleted and @@ -179,13 +180,13 @@ intptr_t _initial_info; // Platform dependent data for the sender frame (was FP on x86) int _caller_actual_parameters; // The number of actual arguments at the // interpreted caller of the deoptimized frame + int _unpack_kind; // exec_mode that can be changed during fetch_unroll_info // The following fields are used as temps during the unpacking phase // (which is tight on registers, especially on x86). They really ought // to be PD variables but that involves moving this class into its own // file to use the pd include mechanism. Maybe in a later cleanup ... intptr_t _counter_temp; // SHOULD BE PD VARIABLE (x86 frame count temp) - intptr_t _unpack_kind; // SHOULD BE PD VARIABLE (x86 unpack kind) intptr_t _sender_sp_temp; // SHOULD BE PD VARIABLE (x86 sender_sp) public: // Constructor @@ -195,7 +196,8 @@ int number_of_frames, intptr_t* frame_sizes, address* frames_pcs, - BasicType return_type); + BasicType return_type, + int unpack_kind); ~UnrollBlock(); // Returns where a register is located. @@ -205,6 +207,7 @@ intptr_t* frame_sizes() const { return _frame_sizes; } int number_of_frames() const { return _number_of_frames; } address* frame_pcs() const { return _frame_pcs ; } + int unpack_kind() const { return _unpack_kind; } // Returns the total size of frames int size_of_frames() const; @@ -237,7 +240,7 @@ // deoptimized frame. // @argument thread. Thread where stub_frame resides. // @see OptoRuntime::deoptimization_fetch_unroll_info_C - static UnrollBlock* fetch_unroll_info(JavaThread* thread); + static UnrollBlock* fetch_unroll_info(JavaThread* thread, int exec_mode); //** Unpacks vframeArray onto execution stack // Called by assembly stub after execution has returned to @@ -262,7 +265,7 @@ //** Performs an uncommon trap for compiled code. // The top most compiler frame is converted into interpreter frames - static UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); + static UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode); // Helper routine that enters the VM and may block static void uncommon_trap_inner(JavaThread* thread, jint unloaded_class_index); @@ -423,7 +426,7 @@ static void load_class_by_index(const constantPoolHandle& constant_pool, int index, TRAPS); static void load_class_by_index(const constantPoolHandle& constant_pool, int index); - static UnrollBlock* fetch_unroll_info_helper(JavaThread* thread); + static UnrollBlock* fetch_unroll_info_helper(JavaThread* thread, int exec_mode); static DeoptAction _unloaded_action; // == Action_reinterpret; static const char* _trap_reason_name[]; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/runtime/globals.cpp --- a/hotspot/src/share/vm/runtime/globals.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/runtime/globals.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -528,59 +528,57 @@ // 4991491 do not "optimize out" the was_set false values: omitting them // tickles a Microsoft compiler bug causing flagTable to be malformed -#define NAME(name) NOT_PRODUCT(&name) PRODUCT_ONLY(&CONST_##name) +#define RUNTIME_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT) }, +#define RUNTIME_PD_PRODUCT_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) }, +#define RUNTIME_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DIAGNOSTIC) }, +#define RUNTIME_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_EXPERIMENTAL) }, +#define RUNTIME_MANAGEABLE_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_MANAGEABLE) }, +#define RUNTIME_PRODUCT_RW_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_READ_WRITE) }, +#define RUNTIME_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP) }, +#define RUNTIME_PD_DEVELOP_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) }, +#define RUNTIME_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_NOT_PRODUCT) }, -#define RUNTIME_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT) }, -#define RUNTIME_PD_PRODUCT_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) }, -#define RUNTIME_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DIAGNOSTIC) }, -#define RUNTIME_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_EXPERIMENTAL) }, -#define RUNTIME_MANAGEABLE_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_MANAGEABLE) }, -#define RUNTIME_PRODUCT_RW_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_READ_WRITE) }, -#define RUNTIME_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP) }, -#define RUNTIME_PD_DEVELOP_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) }, -#define RUNTIME_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_NOT_PRODUCT) }, - -#define JVMCI_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT) }, -#define JVMCI_PD_PRODUCT_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) }, -#define JVMCI_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP) }, -#define JVMCI_PD_DEVELOP_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) }, -#define JVMCI_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DIAGNOSTIC) }, -#define JVMCI_EXPERIMENTAL_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_EXPERIMENTAL) }, -#define JVMCI_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_NOT_PRODUCT) }, +#define JVMCI_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT) }, +#define JVMCI_PD_PRODUCT_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) }, +#define JVMCI_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DIAGNOSTIC) }, +#define JVMCI_EXPERIMENTAL_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_EXPERIMENTAL) }, +#define JVMCI_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP) }, +#define JVMCI_PD_DEVELOP_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) }, +#define JVMCI_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_NOT_PRODUCT) }, #ifdef _LP64 -#define RUNTIME_LP64_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_LP64_PRODUCT) }, +#define RUNTIME_LP64_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_LP64_PRODUCT) }, #else #define RUNTIME_LP64_PRODUCT_FLAG_STRUCT(type, name, value, doc) /* flag is constant */ #endif // _LP64 -#define C1_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT) }, -#define C1_PD_PRODUCT_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) }, -#define C1_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DIAGNOSTIC) }, -#define C1_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP) }, -#define C1_PD_DEVELOP_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) }, -#define C1_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_NOT_PRODUCT) }, +#define C1_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT) }, +#define C1_PD_PRODUCT_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) }, +#define C1_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DIAGNOSTIC) }, +#define C1_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP) }, +#define C1_PD_DEVELOP_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) }, +#define C1_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_NOT_PRODUCT) }, -#define C2_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT) }, -#define C2_PD_PRODUCT_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) }, -#define C2_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DIAGNOSTIC) }, -#define C2_EXPERIMENTAL_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_EXPERIMENTAL) }, -#define C2_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP) }, -#define C2_PD_DEVELOP_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) }, -#define C2_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_NOT_PRODUCT) }, +#define C2_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT) }, +#define C2_PD_PRODUCT_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) }, +#define C2_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DIAGNOSTIC) }, +#define C2_EXPERIMENTAL_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_EXPERIMENTAL) }, +#define C2_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP) }, +#define C2_PD_DEVELOP_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) }, +#define C2_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_NOT_PRODUCT) }, -#define ARCH_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_PRODUCT) }, -#define ARCH_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DIAGNOSTIC) }, -#define ARCH_EXPERIMENTAL_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_EXPERIMENTAL) }, -#define ARCH_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DEVELOP) }, -#define ARCH_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_NOT_PRODUCT) }, +#define ARCH_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_PRODUCT) }, +#define ARCH_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DIAGNOSTIC) }, +#define ARCH_EXPERIMENTAL_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_EXPERIMENTAL) }, +#define ARCH_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DEVELOP) }, +#define ARCH_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_NOT_PRODUCT) }, -#define SHARK_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT) }, -#define SHARK_PD_PRODUCT_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) }, -#define SHARK_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DIAGNOSTIC) }, -#define SHARK_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP) }, -#define SHARK_PD_DEVELOP_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) }, -#define SHARK_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_NOT_PRODUCT) }, +#define SHARK_PRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT) }, +#define SHARK_PD_PRODUCT_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) }, +#define SHARK_DIAGNOSTIC_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DIAGNOSTIC) }, +#define SHARK_DEVELOP_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP) }, +#define SHARK_PD_DEVELOP_FLAG_STRUCT( type, name, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) }, +#define SHARK_NOTPRODUCT_FLAG_STRUCT( type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_NOT_PRODUCT) }, static Flag flagTable[] = { RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, \ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/runtime/globals.hpp --- a/hotspot/src/share/vm/runtime/globals.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/runtime/globals.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -1126,9 +1126,6 @@ diagnostic(bool, PrintNMethods, false, \ "Print assembly code for nmethods when generated") \ \ - diagnostic(intx, PrintNMethodsAtLevel, -1, \ - "Only print code for nmethods at the given compilation level") \ - \ diagnostic(bool, PrintNativeNMethods, false, \ "Print assembly code for native nmethods when generated") \ \ @@ -3554,7 +3551,7 @@ \ product_pd(intx, CompilerThreadStackSize, \ "Compiler Thread Stack Size (in Kbytes)") \ - range(0, max_intx) \ + range(0, max_intx /(1 * K)) \ \ develop_pd(size_t, JVMInvokeMethodSlack, \ "Stack space (bytes) required for JVM_InvokeMethod to complete") \ @@ -4283,9 +4280,9 @@ #define DECLARE_MANAGEABLE_FLAG(type, name, value, doc) extern "C" type name; #define DECLARE_PRODUCT_RW_FLAG(type, name, value, doc) extern "C" type name; #ifdef PRODUCT -#define DECLARE_DEVELOPER_FLAG(type, name, value, doc) extern "C" type CONST_##name; const type name = value; -#define DECLARE_PD_DEVELOPER_FLAG(type, name, doc) extern "C" type CONST_##name; const type name = pd_##name; -#define DECLARE_NOTPRODUCT_FLAG(type, name, value, doc) extern "C" type CONST_##name; +#define DECLARE_DEVELOPER_FLAG(type, name, value, doc) const type name = value; +#define DECLARE_PD_DEVELOPER_FLAG(type, name, doc) const type name = pd_##name; +#define DECLARE_NOTPRODUCT_FLAG(type, name, value, doc) const type name = value; #else #define DECLARE_DEVELOPER_FLAG(type, name, value, doc) extern "C" type name; #define DECLARE_PD_DEVELOPER_FLAG(type, name, doc) extern "C" type name; @@ -4306,9 +4303,9 @@ #define MATERIALIZE_MANAGEABLE_FLAG(type, name, value, doc) type name = value; #define MATERIALIZE_PRODUCT_RW_FLAG(type, name, value, doc) type name = value; #ifdef PRODUCT -#define MATERIALIZE_DEVELOPER_FLAG(type, name, value, doc) type CONST_##name = value; -#define MATERIALIZE_PD_DEVELOPER_FLAG(type, name, doc) type CONST_##name = pd_##name; -#define MATERIALIZE_NOTPRODUCT_FLAG(type, name, value, doc) type CONST_##name = value; +#define MATERIALIZE_DEVELOPER_FLAG(type, name, value, doc) +#define MATERIALIZE_PD_DEVELOPER_FLAG(type, name, doc) +#define MATERIALIZE_NOTPRODUCT_FLAG(type, name, value, doc) #else #define MATERIALIZE_DEVELOPER_FLAG(type, name, value, doc) type name = value; #define MATERIALIZE_PD_DEVELOPER_FLAG(type, name, doc) type name = pd_##name; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/runtime/init.cpp --- a/hotspot/src/share/vm/runtime/init.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/runtime/init.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -72,6 +72,7 @@ void InlineCacheBuffer_init(); void compilerOracle_init(); bool compileBroker_init(); +void dependencyContext_init(); // Initialization after compiler initialization bool universe_post_init(); // must happen after compiler_init @@ -131,6 +132,8 @@ vtableStubs_init(); InlineCacheBuffer_init(); compilerOracle_init(); + dependencyContext_init(); + if (!compileBroker_init()) { return JNI_EINVAL; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/runtime/perfData.hpp --- a/hotspot/src/share/vm/runtime/perfData.hpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/runtime/perfData.hpp Tue Nov 24 10:30:23 2015 +0100 @@ -424,6 +424,7 @@ public: inline void inc() { (*(jlong*)_valuep)++; } inline void inc(jlong val) { (*(jlong*)_valuep) += val; } + inline void dec(jlong val) { inc(-val); } inline void add(jlong val) { (*(jlong*)_valuep) += val; } void clear_sample_helper() { _sample_helper = NULL; } }; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/runtime/sweeper.cpp --- a/hotspot/src/share/vm/runtime/sweeper.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/runtime/sweeper.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -297,7 +297,7 @@ void NMethodSweeper::handle_safepoint_request() { if (SafepointSynchronize::is_synchronizing()) { if (PrintMethodFlushing && Verbose) { - tty->print_cr("### Sweep at %d out of %d, yielding to safepoint", _seen, CodeCache::nof_nmethods()); + tty->print_cr("### Sweep at %d out of %d, yielding to safepoint", _seen, CodeCache::nmethod_count()); } MutexUnlockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); @@ -401,7 +401,7 @@ int flushed_c2_count = 0; if (PrintMethodFlushing && Verbose) { - tty->print_cr("### Sweep at %d out of %d", _seen, CodeCache::nof_nmethods()); + tty->print_cr("### Sweep at %d out of %d", _seen, CodeCache::nmethod_count()); } int swept_count = 0; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/runtime/vmStructs.cpp --- a/hotspot/src/share/vm/runtime/vmStructs.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/runtime/vmStructs.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -343,10 +343,6 @@ nonstatic_field(InstanceKlass, _methods_jmethod_ids, jmethodID*) \ volatile_nonstatic_field(InstanceKlass, _idnum_allocated_count, u2) \ nonstatic_field(InstanceKlass, _annotations, Annotations*) \ - nonstatic_field(InstanceKlass, _dependencies, nmethodBucket*) \ - nonstatic_field(nmethodBucket, _nmethod, nmethod*) \ - nonstatic_field(nmethodBucket, _count, int) \ - nonstatic_field(nmethodBucket, _next, nmethodBucket*) \ nonstatic_field(InstanceKlass, _method_ordering, Array*) \ nonstatic_field(InstanceKlass, _default_vtable_indices, Array*) \ nonstatic_field(Klass, _super_check_offset, juint) \ @@ -969,6 +965,7 @@ nonstatic_field(Deoptimization::UnrollBlock, _caller_adjustment, int) \ nonstatic_field(Deoptimization::UnrollBlock, _number_of_frames, int) \ nonstatic_field(Deoptimization::UnrollBlock, _total_frame_sizes, int) \ + nonstatic_field(Deoptimization::UnrollBlock, _unpack_kind, int) \ nonstatic_field(Deoptimization::UnrollBlock, _frame_sizes, intptr_t*) \ nonstatic_field(Deoptimization::UnrollBlock, _frame_pcs, address*) \ nonstatic_field(Deoptimization::UnrollBlock, _register_block, intptr_t*) \ @@ -1550,7 +1547,6 @@ declare_toplevel_type(volatile Metadata*) \ \ declare_toplevel_type(DataLayout) \ - declare_toplevel_type(nmethodBucket) \ \ /********/ \ /* Oops */ \ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/src/share/vm/shark/sharkRuntime.cpp --- a/hotspot/src/share/vm/shark/sharkRuntime.cpp Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/src/share/vm/shark/sharkRuntime.cpp Tue Nov 24 10:30:23 2015 +0100 @@ -213,8 +213,9 @@ // Initiate the trap thread->set_last_Java_frame(); Deoptimization::UnrollBlock *urb = - Deoptimization::uncommon_trap(thread, trap_request); + Deoptimization::uncommon_trap(thread, trap_request, Deoptimization::Unpack_uncommon_trap); thread->reset_last_Java_frame(); + assert(urb->unpack_kind() == Deoptimization::Unpack_uncommon_trap, "expected Unpack_uncommon_trap"); // Pop our dummy frame and the frame being deoptimized thread->pop_zero_frame(); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java --- a/hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java Tue Nov 24 10:30:23 2015 +0100 @@ -25,7 +25,7 @@ * @test * @bug 8072016 * @summary Infinite deoptimization/recompilation cycles in case of arraycopy with tightly coupled allocation - * @library /testlibrary /test/lib /compiler/whitebox + * @library /testlibrary /test/lib /compiler/whitebox / * @modules java.base/sun.misc * java.management * @build TestArrayCopyNoInitDeopt @@ -42,6 +42,7 @@ import sun.hotspot.code.NMethod; import jdk.test.lib.Platform; import java.lang.reflect.*; +import compiler.whitebox.CompilerWhiteBoxTest; public class TestArrayCopyNoInitDeopt { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/floatingpoint/TestPow2.java --- a/hotspot/test/compiler/floatingpoint/TestPow2.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/floatingpoint/TestPow2.java Tue Nov 24 10:30:23 2015 +0100 @@ -25,7 +25,7 @@ * @test * @bug 8063086 * @summary X^2 special case for C2 yields different result than interpreter - * @library /testlibrary /test/lib /compiler/whitebox + * @library /testlibrary /test/lib /compiler/whitebox / * @modules java.management * @build TestPow2 * @run main ClassFileInstaller sun.hotspot.WhiteBox @@ -36,6 +36,7 @@ import java.lang.reflect.*; import sun.hotspot.WhiteBox; +import compiler.whitebox.CompilerWhiteBoxTest; public class TestPow2 { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/inlining/InlineAccessors.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/inlining/InlineAccessors.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +/** + * @test + * @bug 8140650 + * @summary Method::is_accessor should cover getters and setters for all types + * @library /testlibrary + * @run main/othervm InlineAccessors + */ +import java.lang.invoke.*; +import jdk.test.lib.*; +import static jdk.test.lib.Asserts.*; + +public class InlineAccessors { + public static void main(String[] args) throws Exception { + // try some sanity checks first + doTest(); + + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder( + "-XX:+IgnoreUnrecognizedVMOptions", "-showversion", + "-server", "-XX:-TieredCompilation", "-Xbatch", "-Xcomp", + "-XX:+PrintCompilation", "-XX:+UnlockDiagnosticVMOptions", "-XX:+PrintInlining", + "InlineAccessors$Launcher"); + + OutputAnalyzer analyzer = new OutputAnalyzer(pb.start()); + + analyzer.shouldHaveExitValue(0); + + // The test is applicable only to C2 (present in Server VM). + if (analyzer.getStderr().contains("Server VM")) { + analyzer.shouldContain("InlineAccessors::setBool (6 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::setByte (6 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::setChar (6 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::setShort (6 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::setInt (6 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::setFloat (6 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::setLong (6 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::setDouble (6 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::setObject (6 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::setArray (6 bytes) accessor"); + + analyzer.shouldContain("InlineAccessors::getBool (5 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::getByte (5 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::getChar (5 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::getShort (5 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::getInt (5 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::getFloat (5 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::getLong (5 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::getDouble (5 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::getObject (5 bytes) accessor"); + analyzer.shouldContain("InlineAccessors::getArray (5 bytes) accessor"); + } + } + + boolean bool; + byte b; + char c; + short s; + int i; + float f; + long l; + double d; + Object o; + Object[] a; + + public void setBool(boolean v) { bool = v; } + public void setByte(byte v) { b = v; } + public void setChar(char v) { c = v; } + public void setShort(short v) { s = v; } + public void setInt(int v) { i = v; } + public void setFloat(float v) { f = v; } + public void setLong(long v) { l = v; } + public void setDouble(double v) { d = v; } + public void setObject(Object v) { o = v; } + public void setArray(Object[] v) { a = v; } + + public boolean getBool() { return bool; } + public byte getByte() { return b; } + public char getChar() { return c; } + public short getShort() { return s; } + public int getInt() { return i; } + public float getFloat() { return f; } + public long getLong() { return l; } + public double getDouble() { return d; } + public Object getObject() { return o; } + public Object[] getArray() { return a; } + + static void doTest() { + InlineAccessors o = new InlineAccessors(); + o.setBool(false); + o.setByte((byte)0); + o.setChar('a'); + o.setShort((short)0); + o.setInt(0); + o.setFloat(0F); + o.setLong(0L); + o.setDouble(0D); + o.setObject(new Object()); + o.setArray(new Object[1]); + + o.getBool(); + o.getByte(); + o.getChar(); + o.getShort(); + o.getInt(); + o.getFloat(); + o.getLong(); + o.getDouble(); + o.getObject(); + o.getArray(); + } + + static class Launcher { + public static void main(String[] args) throws Exception { + for (int c = 0; c < 20_000; c++) { + doTest(); + } + } + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java --- a/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,10 +23,11 @@ import java.lang.reflect.Executable; import java.util.concurrent.Callable; import java.util.Objects; +import compiler.whitebox.CompilerWhiteBoxTest; /* * @test * @bug 8130832 - * @library /testlibrary /test/lib /compiler/whitebox /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox /compiler/testlibrary / * @build IntrinsicAvailableTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/TestAndnI.java --- a/hotspot/test/compiler/intrinsics/bmi/TestAndnI.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/TestAndnI.java Tue Nov 24 10:30:23 2015 +0100 @@ -61,15 +61,27 @@ } public int intExpr(int src1, Expr.MemI src2) { - return ~src1 & src2.value; + if (src2 != null) { + return ~src1 & src2.value; + } else { + return 0; + } } public int intExpr(Expr.MemI src1, int src2) { - return ~src1.value & src2; + if (src1 != null) { + return ~src1.value & src2; + } else { + return 0; + } } public int intExpr(Expr.MemI src1, Expr.MemI src2) { - return ~src1.value & src2.value; + if (src1 != null && src2 != null) { + return ~src1.value & src2.value; + } else { + return 0; + } } } @@ -80,15 +92,27 @@ } public int intExpr(int src1, Expr.MemI src2) { - return src1 & ~src2.value; + if (src2 != null) { + return src1 & ~src2.value; + } else { + return 0; + } } public int intExpr(Expr.MemI src1, int src2) { - return src1.value & ~src2; + if (src1 != null) { + return src1.value & ~src2; + } else { + return 0; + } } public int intExpr(Expr.MemI src1, Expr.MemI src2) { - return src1.value & ~src2.value; + if (src1 != null && src2 != null) { + return src1.value & ~src2.value; + } else { + return 0; + } } } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/TestAndnL.java --- a/hotspot/test/compiler/intrinsics/bmi/TestAndnL.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/TestAndnL.java Tue Nov 24 10:30:23 2015 +0100 @@ -61,15 +61,27 @@ } public long longExpr(long src1, Expr.MemL src2) { - return ~src1 & src2.value; + if (src2 != null) { + return ~src1 & src2.value; + } else { + return 0; + } } public long longExpr(Expr.MemL src1, long src2) { - return ~src1.value & src2; + if (src1 != null) { + return ~src1.value & src2; + } else { + return 0; + } } public long longExpr(Expr.MemL src1, Expr.MemL src2) { - return ~src1.value & src2.value; + if (src1 != null && src2 != null) { + return ~src1.value & src2.value; + } else { + return 0; + } } @@ -82,15 +94,27 @@ } public long longExpr(long src1, Expr.MemL src2) { - return src1 & ~src2.value; + if (src2 != null) { + return src1 & ~src2.value; + } else { + return 0; + } } public long longExpr(Expr.MemL src1, long src2) { - return src1.value & ~src2; + if (src1 != null) { + return src1.value & ~src2; + } else { + return 0; + } } public long longExpr(Expr.MemL src1, Expr.MemL src2) { - return src1.value & ~src2.value; + if (src1 != null && src2 != null) { + return src1.value & ~src2.value; + } else { + return 0; + } } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestI.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestI.java Tue Nov 17 16:40:52 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -/* - * @test - * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. - * @modules java.base/sun.misc - * java.management - * @build AddnTestI - * @run main ClassFileInstaller sun.hotspot.WhiteBox - * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm -Xbootclasspath/a:. -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI - * -XX:+IgnoreUnrecognizedVMOptions -XX:+UseBMI1Instructions AddnTestI - */ - -import java.lang.reflect.Method; - -public class AddnTestI extends BmiIntrinsicBase.BmiTestCase { - - protected AddnTestI(Method method) { - super(method); - // from intel manual VEX.NDS.LZ.0F38.W0 F2 /r, example c4e260f2c2 - instrMask = new byte[]{ - (byte) 0xFF, - (byte) 0x1F, - (byte) 0x00, - (byte) 0xFF}; - instrPattern = new byte[]{ - (byte) 0xC4, // prefix for 3-byte VEX instruction - (byte) 0x02, // 00010 implied 0F 38 leading opcode bytes - (byte) 0x00, - (byte) 0xF2}; - } - - public static void main(String[] args) throws Exception { - BmiIntrinsicBase.verifyTestCase(AddnTestI::new, TestAndnI.AndnIExpr.class.getDeclaredMethods()); - BmiIntrinsicBase.verifyTestCase(AddnTestI::new, TestAndnI.AndnICommutativeExpr.class.getDeclaredMethods()); - } -} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestL.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestL.java Tue Nov 17 16:40:52 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -/* - * @test - * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. - * @modules java.base/sun.misc - * java.management - * @build AddnTestL - * @run main ClassFileInstaller sun.hotspot.WhiteBox - * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm -Xbootclasspath/a:. -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI - * -XX:+IgnoreUnrecognizedVMOptions -XX:+UseBMI1Instructions AddnTestL - */ - -import java.lang.reflect.Method; - -public class AddnTestL extends AddnTestI { - - protected AddnTestL(Method method) { - super(method); - isLongOperation = true; - } - - public static void main(String[] args) throws Exception { - BmiIntrinsicBase.verifyTestCase(AddnTestL::new, TestAndnL.AndnLExpr.class.getDeclaredMethods()); - BmiIntrinsicBase.verifyTestCase(AddnTestL::new, TestAndnL.AndnLCommutativeExpr.class.getDeclaredMethods()); - } -} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/AndnTestI.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/AndnTestI.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8031321 + * @library /testlibrary /test/lib /compiler/whitebox / .. + * @modules java.base/sun.misc + * java.management + * @build AndnTestI + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission + * @run main/bootclasspath -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * -XX:+IgnoreUnrecognizedVMOptions -XX:+UseBMI1Instructions AndnTestI + */ + +import java.lang.reflect.Method; + +public class AndnTestI extends BmiIntrinsicBase.BmiTestCase { + + protected AndnTestI(Method method) { + super(method); + // from intel manual VEX.NDS.LZ.0F38.W0 F2 /r, example c4e260f2c2 + instrMask = new byte[]{ + (byte) 0xFF, + (byte) 0x1F, + (byte) 0x00, + (byte) 0xFF}; + instrPattern = new byte[]{ + (byte) 0xC4, // prefix for 3-byte VEX instruction + (byte) 0x02, // 00010 implied 0F 38 leading opcode bytes + (byte) 0x00, + (byte) 0xF2}; + } + + public static void main(String[] args) throws Exception { + BmiIntrinsicBase.verifyTestCase(AndnTestI::new, TestAndnI.AndnIExpr.class.getDeclaredMethods()); + BmiIntrinsicBase.verifyTestCase(AndnTestI::new, TestAndnI.AndnICommutativeExpr.class.getDeclaredMethods()); + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/AndnTestL.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/AndnTestL.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8031321 + * @library /testlibrary /test/lib /compiler/whitebox / .. + * @modules java.base/sun.misc + * java.management + * @build AndnTestL + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission + * @run main/bootclasspath -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * -XX:+IgnoreUnrecognizedVMOptions -XX:+UseBMI1Instructions AndnTestL + */ + +import java.lang.reflect.Method; + +public class AndnTestL extends AndnTestI { + + protected AndnTestL(Method method) { + super(method); + isLongOperation = true; + } + + public static void main(String[] args) throws Exception { + BmiIntrinsicBase.verifyTestCase(AndnTestL::new, TestAndnL.AndnLExpr.class.getDeclaredMethods()); + BmiIntrinsicBase.verifyTestCase(AndnTestL::new, TestAndnL.AndnLCommutativeExpr.class.getDeclaredMethods()); + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestI.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestI.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestI.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. + * @library /testlibrary /test/lib /compiler/whitebox / .. * @modules java.base/sun.misc * java.management * @build BlsiTestI diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestL.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestL.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestL.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. + * @library /testlibrary /test/lib /compiler/whitebox / .. * @modules java.base/sun.misc * java.management * @build BlsiTestL diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestI.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestI.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestI.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. + * @library /testlibrary /test/lib /compiler/whitebox / .. * @modules java.base/sun.misc * java.management * @build BlsmskTestI diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestL.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestL.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestL.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. + * @library /testlibrary /test/lib /compiler/whitebox / .. * @modules java.base/sun.misc * java.management * @build BlsmskTestL diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestI.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestI.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestI.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. + * @library /testlibrary /test/lib /compiler/whitebox / .. * @modules java.base/sun.misc * java.management * @build BlsrTestI diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestL.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestL.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestL.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. + * @library /testlibrary /test/lib /compiler/whitebox / .. * @modules java.base/sun.misc * java.management * @build BlsrTestL diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/BmiIntrinsicBase.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BmiIntrinsicBase.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BmiIntrinsicBase.java Tue Nov 24 10:30:23 2015 +0100 @@ -32,6 +32,7 @@ import java.lang.reflect.Method; import java.util.concurrent.Callable; import java.util.function.Function; +import compiler.whitebox.CompilerWhiteBoxTest; public class BmiIntrinsicBase extends CompilerWhiteBoxTest { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestI.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestI.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestI.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. + * @library /testlibrary /test/lib /compiler/whitebox / .. * @modules java.base/sun.misc * java.management * @build LZcntTestI @@ -50,6 +50,8 @@ public static void main(String[] args) throws Exception { // j.l.Integer and Long should be loaded to allow a compilation of the methods that use their methods System.out.println("class java.lang.Integer should be loaded. Proof: " + Integer.class); + // Avoid uncommon traps. + System.out.println("Num leading zeroes: " + new TestLzcntI.LzcntIExpr().intExpr(12341341)); BmiIntrinsicBase.verifyTestCase(LZcntTestI::new, TestLzcntI.LzcntIExpr.class.getDeclaredMethods()); } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestL.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestL.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestL.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. + * @library /testlibrary /test/lib /compiler/whitebox / .. * @modules java.base/sun.misc * java.management * @build LZcntTestL @@ -46,6 +46,8 @@ public static void main(String[] args) throws Exception { // j.l.Integer and Long should be loaded to allow a compilation of the methods that use their methods System.out.println("classes java.lang.Long should be loaded. Proof: " + Long.class); + // Avoid uncommon traps. + System.out.println("Num leading zeroes: " + new TestLzcntL.LzcntLExpr().longExpr(12341341)); BmiIntrinsicBase.verifyTestCase(LZcntTestL::new, TestLzcntL.LzcntLExpr.class.getDeclaredMethods()); } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestI.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestI.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestI.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. + * @library /testlibrary /test/lib /compiler/whitebox / .. * @modules java.base/sun.misc * java.management * @build TZcntTestI @@ -50,6 +50,8 @@ public static void main(String[] args) throws Exception { // j.l.Integer and Long should be loaded to allow a compilation of the methods that use their methods System.out.println("class java.lang.Integer should be loaded. Proof: " + Integer.class); + // Avoid uncommon traps. + System.out.println("Num trailing zeroes: " + new TestTzcntI.TzcntIExpr().intExpr(12341341)); BmiIntrinsicBase.verifyTestCase(TZcntTestI::new, TestTzcntI.TzcntIExpr.class.getDeclaredMethods()); } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestL.java --- a/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestL.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestL.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test * @bug 8031321 - * @library /testlibrary /test/lib /compiler/whitebox .. + * @library /testlibrary /test/lib /compiler/whitebox / .. * @modules java.base/sun.misc * java.management * @build TZcntTestL @@ -46,6 +46,8 @@ public static void main(String[] args) throws Exception { // j.l.Integer and Long should be loaded to allow a compilation of the methods that use their methods System.out.println("classes java.lang.Long should be loaded. Proof: " + Long.class); + // Avoid uncommon traps. + System.out.println("Num trailing zeroes: " + new TestTzcntL.TzcntLExpr().longExpr(12341341)); BmiIntrinsicBase.verifyTestCase(TZcntTestL::new, TestTzcntL.TzcntLExpr.class.getDeclaredMethods()); } } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/crc32/TestCRC32.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/intrinsics/crc32/TestCRC32.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8143012 + * @summary CRC32 Intrinsics support on SPARC + * + * @run main/othervm/timeout=720 -Xbatch TestCRC32 -m + */ + +import java.nio.ByteBuffer; +import java.util.zip.Checksum; +import java.util.zip.CRC32; + +public class TestCRC32 { + public static void main(String[] args) { + int offset = Integer.getInteger("offset", 0); + int msgSize = Integer.getInteger("msgSize", 512); + boolean multi = false; + int iters = 20000; + int warmupIters = 20000; + + if (args.length > 0) { + if (args[0].equals("-m")) { + multi = true; + } else { + iters = Integer.valueOf(args[0]); + } + if (args.length > 1) { + warmupIters = Integer.valueOf(args[1]); + } + } + + if (multi) { + test_multi(warmupIters); + return; + } + + System.out.println(" offset = " + offset); + System.out.println("msgSize = " + msgSize + " bytes"); + System.out.println(" iters = " + iters); + + byte[] b = initializedBytes(msgSize, offset); + + CRC32 crc0 = new CRC32(); + CRC32 crc1 = new CRC32(); + CRC32 crc2 = new CRC32(); + + crc0.update(b, offset, msgSize); + + System.out.println("-------------------------------------------------------"); + + /* warm up */ + for (int i = 0; i < warmupIters; i++) { + crc1.reset(); + crc1.update(b, offset, msgSize); + } + + /* measure performance */ + long start = System.nanoTime(); + for (int i = 0; i < iters; i++) { + crc1.reset(); + crc1.update(b, offset, msgSize); + } + long end = System.nanoTime(); + double total = (double)(end - start)/1e9; // in seconds + double thruput = (double)msgSize*iters/1e6/total; // in MB/s + System.out.println("CRC32.update(byte[]) runtime = " + total + " seconds"); + System.out.println("CRC32.update(byte[]) throughput = " + thruput + " MB/s"); + + /* check correctness */ + for (int i = 0; i < iters; i++) { + crc1.reset(); + crc1.update(b, offset, msgSize); + if (!check(crc0, crc1)) break; + } + report("CRCs", crc0, crc1); + + System.out.println("-------------------------------------------------------"); + + ByteBuffer buf = ByteBuffer.allocateDirect(msgSize); + buf.put(b, offset, msgSize); + buf.flip(); + + /* warm up */ + for (int i = 0; i < warmupIters; i++) { + crc2.reset(); + crc2.update(buf); + buf.rewind(); + } + + /* measure performance */ + start = System.nanoTime(); + for (int i = 0; i < iters; i++) { + crc2.reset(); + crc2.update(buf); + buf.rewind(); + } + end = System.nanoTime(); + total = (double)(end - start)/1e9; // in seconds + thruput = (double)msgSize*iters/1e6/total; // in MB/s + System.out.println("CRC32.update(ByteBuffer) runtime = " + total + " seconds"); + System.out.println("CRC32.update(ByteBuffer) throughput = " + thruput + " MB/s"); + + /* check correctness */ + for (int i = 0; i < iters; i++) { + crc2.reset(); + crc2.update(buf); + buf.rewind(); + if (!check(crc0, crc2)) break; + } + report("CRCs", crc0, crc2); + + System.out.println("-------------------------------------------------------"); + } + + private static void report(String s, Checksum crc0, Checksum crc1) { + System.out.printf("%s: crc0 = %08x, crc1 = %08x\n", + s, crc0.getValue(), crc1.getValue()); + } + + private static boolean check(Checksum crc0, Checksum crc1) { + if (crc0.getValue() != crc1.getValue()) { + System.err.printf("ERROR: crc0 = %08x, crc1 = %08x\n", + crc0.getValue(), crc1.getValue()); + return false; + } + return true; + } + + private static byte[] initializedBytes(int M, int offset) { + byte[] bytes = new byte[M + offset]; + for (int i = 0; i < offset; i++) { + bytes[i] = (byte) i; + } + for (int i = offset; i < bytes.length; i++) { + bytes[i] = (byte) (i - offset); + } + return bytes; + } + + private static void test_multi(int iters) { + int len1 = 8; // the 8B/iteration loop + int len2 = 32; // the 32B/iteration loop + int len3 = 4096; // the 4KB/iteration loop + + byte[] b = initializedBytes(len3*16, 0); + int[] offsets = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512 }; + int[] sizes = { 0, 1, 2, 3, 4, 5, 6, 7, + len1, len1+1, len1+2, len1+3, len1+4, len1+5, len1+6, len1+7, + len1*2, len1*2+1, len1*2+3, len1*2+5, len1*2+7, + len2, len2+1, len2+3, len2+5, len2+7, + len2*2, len2*4, len2*8, len2*16, len2*32, len2*64, + len3, len3+1, len3+3, len3+5, len3+7, + len3*2, len3*4, len3*8, + len1+len2, len1+len2+1, len1+len2+3, len1+len2+5, len1+len2+7, + len1+len3, len1+len3+1, len1+len3+3, len1+len3+5, len1+len3+7, + len2+len3, len2+len3+1, len2+len3+3, len2+len3+5, len2+len3+7, + len1+len2+len3, len1+len2+len3+1, len1+len2+len3+3, + len1+len2+len3+5, len1+len2+len3+7, + (len1+len2+len3)*2, (len1+len2+len3)*2+1, (len1+len2+len3)*2+3, + (len1+len2+len3)*2+5, (len1+len2+len3)*2+7, + (len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3, + (len1+len2+len3)*3-5, (len1+len2+len3)*3-7 }; + CRC32[] crc0 = new CRC32[offsets.length*sizes.length]; + CRC32[] crc1 = new CRC32[offsets.length*sizes.length]; + int i, j, k; + + System.out.printf("testing %d cases ...\n", offsets.length*sizes.length); + + /* set the result from interpreter as reference */ + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + crc0[i*sizes.length + j] = new CRC32(); + crc1[i*sizes.length + j] = new CRC32(); + crc0[i*sizes.length + j].update(b, offsets[i], sizes[j]); + } + } + + /* warm up the JIT compiler and get result */ + for (k = 0; k < iters; k++) { + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + crc1[i*sizes.length + j].reset(); + crc1[i*sizes.length + j].update(b, offsets[i], sizes[j]); + } + } + } + + /* check correctness */ + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + if (!check(crc0[i*sizes.length + j], crc1[i*sizes.length + j])) { + System.out.printf("offsets[%d] = %d", i, offsets[i]); + System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]); + } + } + } + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build AddExactIntTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build AddExactLongTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build DecrementExactIntTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build DecrementExactLongTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build IncrementExactIntTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build IncrementExactLongTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java Tue Nov 24 10:30:23 2015 +0100 @@ -27,6 +27,7 @@ import java.io.FileOutputStream; import java.lang.reflect.Executable; import java.util.Properties; +import compiler.whitebox.CompilerWhiteBoxTest; public abstract class IntrinsicBase extends CompilerWhiteBoxTest { protected String javaVmName; diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,6 +23,7 @@ import java.lang.reflect.Executable; import java.util.concurrent.Callable; +import compiler.whitebox.CompilerWhiteBoxTest; public class MathIntrinsic { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build MultiplyExactIntTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build MultiplyExactLongTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build NegateExactIntTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build NegateExactLongTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build SubtractExactIntTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java --- a/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,8 +23,7 @@ /* * @test - * @library /testlibrary /test/lib /compiler/whitebox - * /compiler/testlibrary + * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary * @modules java.base/sun.misc * java.management * @build SubtractExactLongTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/unsafe/TestUnsafeMismatchedArrayFieldAccess.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/intrinsics/unsafe/TestUnsafeMismatchedArrayFieldAccess.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8142386 + * @library /testlibrary /test/lib + * @summary Unsafe access to an array is wrongly marked as mismatched + * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:-TieredCompilation TestUnsafeMismatchedArrayFieldAccess + * + */ + +import java.lang.reflect.*; +import jdk.test.lib.Utils; +import sun.misc.Unsafe; + +public class TestUnsafeMismatchedArrayFieldAccess { + + private static final Unsafe UNSAFE = Utils.getUnsafe(); + + static { + try { + array_offset = UNSAFE.objectFieldOffset(TestUnsafeMismatchedArrayFieldAccess.class.getDeclaredField("array")); + } + catch (Exception e) { + throw new AssertionError(e); + } + } + + int[] array; + static final long array_offset; + + void m() { + UNSAFE.getObject(this, array_offset); + } + + static public void main(String[] args) { + TestUnsafeMismatchedArrayFieldAccess test = new TestUnsafeMismatchedArrayFieldAccess(); + + for (int i = 0; i < 20000; i++) { + test.m(); + } + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/intrinsics/unsafe/TestUnsafeUnalignedMismatchedAccesses.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/intrinsics/unsafe/TestUnsafeUnalignedMismatchedAccesses.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8136473 + * @summary Mismatched stores on same slice possible with Unsafe.Put*Unaligned methods + * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation TestUnsafeUnalignedMismatchedAccesses + * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:+UnlockDiagnosticVMOptions -XX:-UseUnalignedAccesses TestUnsafeUnalignedMismatchedAccesses + * + */ + +import java.lang.reflect.*; +import jdk.internal.misc.Unsafe; + +public class TestUnsafeUnalignedMismatchedAccesses { + + private static final Unsafe UNSAFE; + + static { + try { + Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe"); + unsafeField.setAccessible(true); + UNSAFE = (Unsafe) unsafeField.get(null); + } + catch (Exception e) { + throw new AssertionError(e); + } + } + + static void test1(byte[] array) { + array[0] = 0; + UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET, 0); + array[0] = 0; + } + + static void test2(byte[] array) { + array[0] = 0; + UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+1, 0); + array[0] = 0; + } + + static void test3(byte[] array) { + array[0] = 0; + UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+2, 0); + array[0] = 0; + } + + static void test4(byte[] array) { + array[0] = 0; + UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+3, 0); + array[0] = 0; + } + + static void test5(byte[] array) { + array[0] = 0; + UNSAFE.putInt(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET, 0); + array[0] = 0; + } + + // unaligned access and non escaping allocation + static void test6() { + byte[] array = new byte[10]; + UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+1, -1); + array[0] = 0; + } + + // unaligned access and non escaping allocation + static int test7() { + byte[] array = new byte[10]; + UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+1, -1); + array[0] = 0; + array[2] = 0; + return array[0] + array[1] + array[2] + array[3] + array[4]; + } + + // unaligned access with vectorization + static void test8(int[] src1, int[] src2, int[] dst) { + for (int i = 0; i < dst.length-1; i++) { + int res = src1[i] + src2[i]; + UNSAFE.putIntUnaligned(dst, UNSAFE.ARRAY_INT_BASE_OFFSET + i*4+1, res); + } + } + + static public void main(String[] args) throws Exception { + byte[] byte_array = new byte[100]; + int[] int_array = new int[100]; + Object[] obj_array = new Object[100]; + TestUnsafeUnalignedMismatchedAccesses test = new TestUnsafeUnalignedMismatchedAccesses(); + for (int i = 0; i < 20000; i++) { + test1(byte_array); + test2(byte_array); + test3(byte_array); + test4(byte_array); + test5(byte_array); + test6(); + test7(); + test8(int_array, int_array, int_array); + } + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/jvmci/compilerToVM/AllocateCompileIdTest.java --- a/hotspot/test/compiler/jvmci/compilerToVM/AllocateCompileIdTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/jvmci/compilerToVM/AllocateCompileIdTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -34,7 +34,6 @@ * @run main/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI * -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:. * -XX:-BackgroundCompilation - -XX:+LogCompilation * compiler.jvmci.compilerToVM.AllocateCompileIdTest */ @@ -45,22 +44,21 @@ import java.lang.reflect.Executable; import java.lang.reflect.Method; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.HashSet; +import java.util.stream.Collectors; +import java.util.stream.Stream; -import compiler.jvmci.common.testcases.TestCase; import jdk.vm.ci.hotspot.CompilerToVMHelper; import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod; import jdk.test.lib.Asserts; import jdk.test.lib.Pair; import jdk.test.lib.Utils; -import sun.hotspot.WhiteBox; import sun.hotspot.code.NMethod; public class AllocateCompileIdTest { + private static final int SOME_REPEAT_VALUE = 5; private final HashSet ids = new HashSet<>(); public static void main(String[] args) { @@ -69,7 +67,6 @@ createTestCasesIncorrectBci().forEach(test::runSanityIncorrectTest); } - private static List createTestCasesCorrectBci() { List result = new ArrayList<>(); try { @@ -84,29 +81,29 @@ return result; } - private static List>> createTestCasesIncorrectBci() { List>> result = new ArrayList<>(); - try { Class aClass = DummyClass.class; Object receiver = new DummyClass(); Method method = aClass.getMethod("dummyInstanceFunction"); // greater than bytecode.length - int[] bcis = new int[] {30, 50, 200}; - for (int bci : bcis) { - result.add(new Pair<>( - new CompileCodeTestCase(receiver, method, bci), - IllegalArgumentException.class)); - } - bcis = new int[] {-4, -50, -200}; - for (int bci : bcis) { - result.add(new Pair<>( - new CompileCodeTestCase(receiver, method, bci), - IllegalArgumentException.class)); - } + byte[] bytecode = CompilerToVMHelper.getBytecode(CTVMUtilities + .getResolvedMethod(method)); + Stream.of( + // greater than bytecode.length + bytecode.length + 4, + bytecode.length + 50, + bytecode.length + 200, + // negative cases + -4, -50, -200) + .map(bci -> new Pair>( + new CompileCodeTestCase(receiver, method, bci), + IllegalArgumentException.class)) + .collect(Collectors.toList()); } catch (NoSuchMethodException e) { throw new Error("TEST BUG : " + e.getMessage(), e); } @@ -117,27 +114,20 @@ System.out.println(testCase); Executable aMethod = testCase.executable; // to generate ciTypeFlow - System.out.println(testCase.invoke(Utils.getNullValues(aMethod.getParameterTypes()))); + testCase.invoke(Utils.getNullValues(aMethod.getParameterTypes())); int bci = testCase.bci; HotSpotResolvedJavaMethod method = CTVMUtilities .getResolvedMethod(aMethod); - int wbCompileID = getWBCompileID(testCase); - int id = CompilerToVMHelper.allocateCompileId(method, bci); - Asserts.assertNE(id, 0, testCase + " : zero compile id"); - - if (wbCompileID > 0) { + for (int i = 0; i < SOME_REPEAT_VALUE; ++i) { + int wbCompileID = getWBCompileID(testCase); + int id = CompilerToVMHelper.allocateCompileId(method, bci); + Asserts.assertNE(id, 0, testCase + " : zero compile id"); Asserts.assertGT(id, wbCompileID, testCase + " : allocated 'compile id' not greater than existed"); - if (!ids.add(wbCompileID)) { - throw new AssertionError(String.format( - "%s : vm compilation allocated existed id -- %d", - testCase, id)); - } - } - if (!ids.add(id)) { - throw new AssertionError(String.format( - "%s : allocateCompileId returned existed id %d", - testCase, id)); + Asserts.assertTrue(ids.add(wbCompileID), testCase + + " : vm compilation allocated existing id " + id); + Asserts.assertTrue(ids.add(id), testCase + + " : allocateCompileId returned existing id " + id); } } @@ -156,8 +146,8 @@ private int getWBCompileID(CompileCodeTestCase testCase) { NMethod nm = testCase.deoptimizeAndCompile(); - if (nm == null) { - throw new Error("[TEST BUG] cannot compile method " + testCase); + if (nm == null || nm.compile_id <= 0) { + throw new Error("TEST BUG : cannot compile method " + testCase); } return nm.compile_id; } diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/jvmci/compilerToVM/ReprofileTest.java --- a/hotspot/test/compiler/jvmci/compilerToVM/ReprofileTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/jvmci/compilerToVM/ReprofileTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -45,18 +45,15 @@ import java.lang.reflect.Method; import java.util.ArrayList; import java.util.List; -import java.util.Random; + +import compiler.whitebox.CompilerWhiteBoxTest; import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod; import jdk.vm.ci.hotspot.CompilerToVMHelper; import jdk.vm.ci.meta.ProfilingInfo; import jdk.test.lib.Asserts; -import jdk.test.lib.Utils; -import sun.hotspot.WhiteBox; public class ReprofileTest { - private static final WhiteBox WB = WhiteBox.getWhiteBox(); - public static void main(String[] args) { List testCases = createTestCases(); testCases.forEach(ReprofileTest::runSanityTest); @@ -67,10 +64,10 @@ try { Class aClass = DummyClass.class; - testCases.add(aClass.getMethod("withLoop")); + testCases.add(aClass.getMethod("dummyInstanceFunction")); aClass = DummyClass.class; - testCases.add(aClass.getDeclaredMethod("dummyFunction")); + testCases.add(aClass.getMethod("dummyFunction")); } catch (NoSuchMethodException e) { throw new Error("TEST BUG " + e.getMessage(), e); } @@ -78,17 +75,17 @@ } private static void runSanityTest(Method aMethod) { + System.out.println(aMethod); HotSpotResolvedJavaMethod method = CTVMUtilities .getResolvedMethod(aMethod); ProfilingInfo startProfile = method.getProfilingInfo(); Asserts.assertFalse(startProfile.isMature(), aMethod - + " : profiling info is mature in the begging"); + + " : profiling info is mature in the beginning"); - long compileThreshold = (Long) WB.getVMFlag("CompileThreshold"); // make interpreter to profile this method try { Object obj = aMethod.getDeclaringClass().newInstance(); - for (long i = 0; i < compileThreshold; i++) { + for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; i++) { aMethod.invoke(obj); } } catch (ReflectiveOperationException e) { @@ -99,10 +96,10 @@ Asserts.assertNE(startProfile.toString(), compProfile.toString(), String.format("%s : profiling info wasn't changed after " + "%d invocations", - aMethod, compileThreshold)); + aMethod, CompilerWhiteBoxTest.THRESHOLD)); Asserts.assertTrue(compProfile.isMature(), String.format("%s is not mature after %d invocations", - aMethod, compileThreshold)); + aMethod, CompilerWhiteBoxTest.THRESHOLD)); CompilerToVMHelper.reprofile(method); ProfilingInfo reprofiledProfile = method.getProfilingInfo(); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/jvmci/errors/CodeInstallerTest.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/jvmci/errors/CodeInstallerTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.jvmci.errors; + +import java.lang.reflect.Method; + +import jdk.vm.ci.code.Architecture; +import jdk.vm.ci.code.CodeCacheProvider; +import jdk.vm.ci.code.CompilationResult; +import jdk.vm.ci.code.Register; +import jdk.vm.ci.meta.MetaAccessProvider; +import jdk.vm.ci.meta.PlatformKind; +import jdk.vm.ci.meta.ResolvedJavaMethod; +import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider; +import jdk.vm.ci.runtime.JVMCI; +import jdk.vm.ci.runtime.JVMCIBackend; + +import org.junit.Assert; + +public class CodeInstallerTest { + + protected final Architecture arch; + protected final CodeCacheProvider codeCache; + protected final MetaAccessProvider metaAccess; + protected final HotSpotConstantReflectionProvider constantReflection; + + protected final ResolvedJavaMethod dummyMethod; + + public static void dummyMethod() { + } + + protected CodeInstallerTest() { + JVMCIBackend backend = JVMCI.getRuntime().getHostJVMCIBackend(); + metaAccess = backend.getMetaAccess(); + codeCache = backend.getCodeCache(); + constantReflection = (HotSpotConstantReflectionProvider) backend.getConstantReflection(); + arch = codeCache.getTarget().arch; + + Method method = null; + try { + method = CodeInstallerTest.class.getMethod("dummyMethod"); + } catch (NoSuchMethodException e) { + Assert.fail(); + } + + dummyMethod = metaAccess.lookupJavaMethod(method); + } + + protected void installCode(CompilationResult result) { + codeCache.addCode(dummyMethod, result, null, null); + } + + protected CompilationResult createEmptyCompilationResult() { + CompilationResult ret = new CompilationResult(); + ret.setTotalFrameSize(0); + return ret; + } + + protected Register getRegister(PlatformKind kind, int index) { + Register[] allRegs = arch.getAvailableValueRegisters(); + for (int i = 0; i < allRegs.length; i++) { + if (arch.canStoreValue(allRegs[i].getRegisterCategory(), kind)) { + if (index-- == 0) { + return allRegs[i]; + } + } + } + return null; + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/jvmci/errors/TestInvalidCompilationResult.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/jvmci/errors/TestInvalidCompilationResult.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @requires (os.simpleArch == "x64" | os.simpleArch == "sparcv9") & os.arch != "aarch64" + * @compile CodeInstallerTest.java + * @run junit/othervm -da:jdk.vm.ci... -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI compiler.jvmci.errors.TestInvalidCompilationResult + */ + +package compiler.jvmci.errors; + +import static jdk.vm.ci.code.CompilationResult.ConstantReference; +import static jdk.vm.ci.code.CompilationResult.DataPatch; +import static jdk.vm.ci.code.CompilationResult.DataSectionReference; +import static jdk.vm.ci.code.CompilationResult.Infopoint; +import static jdk.vm.ci.code.CompilationResult.Reference; +import static jdk.vm.ci.code.DataSection.Data; +import static jdk.vm.ci.code.DataSection.DataBuilder; +import static jdk.vm.ci.meta.Assumptions.Assumption; + +import jdk.vm.ci.code.CompilationResult; +import jdk.vm.ci.code.InfopointReason; +import jdk.vm.ci.common.JVMCIError; +import jdk.vm.ci.hotspot.HotSpotConstant; +import jdk.vm.ci.meta.ResolvedJavaType; +import jdk.vm.ci.meta.VMConstant; + +import org.junit.Test; + +/** + * Tests for errors in the code installer. + */ +public class TestInvalidCompilationResult extends CodeInstallerTest { + + private static class InvalidAssumption extends Assumption { + } + + private static class InvalidVMConstant implements VMConstant { + + public boolean isDefaultForKind() { + return false; + } + + public String toValueString() { + return null; + } + } + + private static class InvalidReference extends Reference { + + @Override + public int hashCode() { + return 0; + } + + @Override + public boolean equals(Object obj) { + return false; + } + } + + @Test(expected = JVMCIError.class) + public void testInvalidAssumption() { + CompilationResult result = createEmptyCompilationResult(); + result.setAssumptions(new Assumption[]{new InvalidAssumption()}); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testInvalidAlignment() { + CompilationResult result = createEmptyCompilationResult(); + result.getDataSection().insertData(new Data(7, 1, DataBuilder.zero(1))); + installCode(result); + } + + @Test(expected = NullPointerException.class) + public void testNullDataPatchInDataSection() { + CompilationResult result = createEmptyCompilationResult(); + Data data = new Data(1, 1, (buffer, patch) -> { + patch.accept(null); + buffer.put((byte) 0); + }); + result.getDataSection().insertData(data); + installCode(result); + } + + @Test(expected = NullPointerException.class) + public void testNullReferenceInDataSection() { + CompilationResult result = createEmptyCompilationResult(); + Data data = new Data(1, 1, (buffer, patch) -> { + patch.accept(new DataPatch(buffer.position(), null)); + buffer.put((byte) 0); + }); + result.getDataSection().insertData(data); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testInvalidDataSectionReference() { + CompilationResult result = createEmptyCompilationResult(); + DataSectionReference ref = result.getDataSection().insertData(new Data(1, 1, DataBuilder.zero(1))); + Data data = new Data(1, 1, (buffer, patch) -> { + patch.accept(new DataPatch(buffer.position(), ref)); + buffer.put((byte) 0); + }); + result.getDataSection().insertData(data); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testInvalidNarrowMethodInDataSection() { + CompilationResult result = createEmptyCompilationResult(); + HotSpotConstant c = (HotSpotConstant) dummyMethod.getEncoding(); + Data data = new Data(4, 4, (buffer, patch) -> { + patch.accept(new DataPatch(buffer.position(), new ConstantReference((VMConstant) c.compress()))); + buffer.putInt(0); + }); + result.getDataSection().insertData(data); + installCode(result); + } + + @Test(expected = NullPointerException.class) + public void testNullConstantInDataSection() { + CompilationResult result = createEmptyCompilationResult(); + Data data = new Data(1, 1, (buffer, patch) -> { + patch.accept(new DataPatch(buffer.position(), new ConstantReference(null))); + }); + result.getDataSection().insertData(data); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testInvalidConstantInDataSection() { + CompilationResult result = createEmptyCompilationResult(); + Data data = new Data(1, 1, (buffer, patch) -> { + patch.accept(new DataPatch(buffer.position(), new ConstantReference(new InvalidVMConstant()))); + }); + result.getDataSection().insertData(data); + installCode(result); + } + + @Test(expected = NullPointerException.class) + public void testNullReferenceInCode() { + CompilationResult result = createEmptyCompilationResult(); + result.recordDataPatch(0, null); + installCode(result); + } + + @Test(expected = NullPointerException.class) + public void testNullConstantInCode() { + CompilationResult result = createEmptyCompilationResult(); + result.recordDataPatch(0, new ConstantReference(null)); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testInvalidConstantInCode() { + CompilationResult result = createEmptyCompilationResult(); + result.recordDataPatch(0, new ConstantReference(new InvalidVMConstant())); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testInvalidReference() { + CompilationResult result = createEmptyCompilationResult(); + result.recordDataPatch(0, new InvalidReference()); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testOutOfBoundsDataSectionReference() { + CompilationResult result = createEmptyCompilationResult(); + DataSectionReference ref = new DataSectionReference(); + ref.setOffset(0x1000); + result.recordDataPatch(0, ref); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testInvalidMark() { + CompilationResult result = createEmptyCompilationResult(); + result.recordMark(0, new Object()); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testInvalidMarkInt() { + CompilationResult result = createEmptyCompilationResult(); + result.recordMark(0, -1); + installCode(result); + } + + @Test(expected = NullPointerException.class) + public void testNullInfopoint() { + CompilationResult result = createEmptyCompilationResult(); + result.addInfopoint(null); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testUnknownInfopointReason() { + CompilationResult result = createEmptyCompilationResult(); + result.addInfopoint(new Infopoint(0, null, InfopointReason.UNKNOWN)); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testInfopointMissingDebugInfo() { + CompilationResult result = createEmptyCompilationResult(); + result.addInfopoint(new Infopoint(0, null, InfopointReason.METHOD_START)); + installCode(result); + } + + @Test(expected = JVMCIError.class) + public void testSafepointMissingDebugInfo() { + CompilationResult result = createEmptyCompilationResult(); + result.addInfopoint(new Infopoint(0, null, InfopointReason.SAFEPOINT)); + installCode(result); + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/jvmci/errors/TestInvalidDebugInfo.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/jvmci/errors/TestInvalidDebugInfo.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @requires (os.simpleArch == "x64" | os.simpleArch == "sparcv9") & os.arch != "aarch64" + * @compile CodeInstallerTest.java + * @run junit/othervm -da:jdk.vm.ci... -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI compiler.jvmci.errors.TestInvalidDebugInfo + */ + +package compiler.jvmci.errors; + +import static jdk.vm.ci.code.CompilationResult.Infopoint; + +import jdk.vm.ci.code.BytecodeFrame; +import jdk.vm.ci.code.CompilationResult; +import jdk.vm.ci.code.DebugInfo; +import jdk.vm.ci.code.InfopointReason; +import jdk.vm.ci.code.Location; +import jdk.vm.ci.code.Register; +import jdk.vm.ci.code.StackSlot; +import jdk.vm.ci.code.VirtualObject; +import jdk.vm.ci.hotspot.HotSpotReferenceMap; +import jdk.vm.ci.meta.JavaConstant; +import jdk.vm.ci.meta.JavaKind; +import jdk.vm.ci.meta.JavaValue; +import jdk.vm.ci.meta.LIRKind; +import jdk.vm.ci.meta.ResolvedJavaType; +import jdk.vm.ci.meta.Value; +import jdk.vm.ci.common.JVMCIError; + +import org.junit.Test; + +/** + * Tests for errors in debug info. + */ +public class TestInvalidDebugInfo extends CodeInstallerTest { + + private static class UnknownJavaValue implements JavaValue { + } + + private void test(JavaValue[] values, JavaKind[] slotKinds, int locals, int stack, int locks) { + test(null, values, slotKinds, locals, stack, locks); + } + + private void test(VirtualObject[] vobj, JavaValue[] values, JavaKind[] slotKinds, int locals, int stack, int locks) { + BytecodeFrame frame = new BytecodeFrame(null, dummyMethod, 0, false, false, values, slotKinds, locals, stack, locks); + DebugInfo info = new DebugInfo(frame, vobj); + info.setReferenceMap(new HotSpotReferenceMap(new Location[0], new Location[0], new int[0], 8)); + + CompilationResult result = createEmptyCompilationResult(); + result.addInfopoint(new Infopoint(0, info, InfopointReason.SAFEPOINT)); + installCode(result); + } + + @Test(expected = NullPointerException.class) + public void testNullValues() { + test(null, new JavaKind[0], 0, 0, 0); + } + + @Test(expected = NullPointerException.class) + public void testNullSlotKinds() { + test(new JavaValue[0], null, 0, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUnexpectedScopeValuesLength() { + test(new JavaValue[]{JavaConstant.FALSE}, new JavaKind[0], 0, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUnexpectedScopeSlotKindsLength() { + test(new JavaValue[0], new JavaKind[]{JavaKind.Boolean}, 0, 0, 0); + } + + @Test(expected = NullPointerException.class) + public void testNullValue() { + test(new JavaValue[]{null}, new JavaKind[]{JavaKind.Int}, 1, 0, 0); + } + + @Test(expected = NullPointerException.class) + public void testNullSlotKind() { + test(new JavaValue[]{JavaConstant.INT_0}, new JavaKind[]{null}, 1, 0, 0); + } + + @Test(expected = NullPointerException.class) + public void testNullMonitor() { + test(new JavaValue[]{null}, new JavaKind[0], 0, 0, 1); + } + + @Test(expected = JVMCIError.class) + public void testWrongMonitorType() { + test(new JavaValue[]{JavaConstant.INT_0}, new JavaKind[0], 0, 0, 1); + } + + @Test(expected = JVMCIError.class) + public void testUnexpectedIllegalValue() { + test(new JavaValue[]{Value.ILLEGAL}, new JavaKind[]{JavaKind.Int}, 1, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUnexpectedTypeInCPURegister() { + Register reg = getRegister(arch.getPlatformKind(JavaKind.Int), 0); + test(new JavaValue[]{reg.asValue()}, new JavaKind[]{JavaKind.Illegal}, 1, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUnexpectedTypeInFloatRegister() { + Register reg = getRegister(arch.getPlatformKind(JavaKind.Float), 0); + test(new JavaValue[]{reg.asValue()}, new JavaKind[]{JavaKind.Illegal}, 1, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUnexpectedTypeOnStack() { + LIRKind kind = codeCache.getTarget().getLIRKind(JavaKind.Int); + StackSlot value = StackSlot.get(kind, 8, false); + test(new JavaValue[]{value}, new JavaKind[]{JavaKind.Illegal}, 1, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testWrongConstantType() { + test(new JavaValue[]{JavaConstant.INT_0}, new JavaKind[]{JavaKind.Object}, 1, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUnsupportedConstantType() { + test(new JavaValue[]{JavaConstant.forShort((short) 0)}, new JavaKind[]{JavaKind.Short}, 1, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUnexpectedNull() { + test(new JavaValue[]{JavaConstant.NULL_POINTER}, new JavaKind[]{JavaKind.Int}, 1, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUnexpectedObject() { + JavaValue wrapped = constantReflection.forObject(this); + test(new JavaValue[]{wrapped}, new JavaKind[]{JavaKind.Int}, 1, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUnknownJavaValue() { + test(new JavaValue[]{new UnknownJavaValue()}, new JavaKind[]{JavaKind.Int}, 1, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testMissingIllegalAfterDouble() { + test(new JavaValue[]{JavaConstant.DOUBLE_0, JavaConstant.INT_0}, new JavaKind[]{JavaKind.Double, JavaKind.Int}, 2, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testInvalidVirtualObjectId() { + ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class); + VirtualObject o = VirtualObject.get(obj, 5); + o.setValues(new JavaValue[0], new JavaKind[0]); + + test(new VirtualObject[]{o}, new JavaValue[0], new JavaKind[0], 0, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testDuplicateVirtualObject() { + ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class); + VirtualObject o1 = VirtualObject.get(obj, 0); + o1.setValues(new JavaValue[0], new JavaKind[0]); + + VirtualObject o2 = VirtualObject.get(obj, 0); + o2.setValues(new JavaValue[0], new JavaKind[0]); + + test(new VirtualObject[]{o1, o2}, new JavaValue[0], new JavaKind[0], 0, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUnexpectedVirtualObject() { + ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class); + VirtualObject o = VirtualObject.get(obj, 0); + o.setValues(new JavaValue[0], new JavaKind[0]); + + test(new VirtualObject[]{o}, new JavaValue[]{o}, new JavaKind[]{JavaKind.Int}, 1, 0, 0); + } + + @Test(expected = JVMCIError.class) + public void testUndefinedVirtualObject() { + ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class); + VirtualObject o0 = VirtualObject.get(obj, 0); + o0.setValues(new JavaValue[0], new JavaKind[0]); + + VirtualObject o1 = VirtualObject.get(obj, 1); + o1.setValues(new JavaValue[0], new JavaKind[0]); + + test(new VirtualObject[]{o0}, new JavaValue[]{o1}, new JavaKind[]{JavaKind.Object}, 1, 0, 0); + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/jvmci/errors/TestInvalidOopMap.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/jvmci/errors/TestInvalidOopMap.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @requires (os.simpleArch == "x64" | os.simpleArch == "sparcv9") & os.arch != "aarch64" + * @compile CodeInstallerTest.java + * @run junit/othervm -da:jdk.vm.ci... -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI compiler.jvmci.errors.TestInvalidOopMap + */ + +package compiler.jvmci.errors; + +import static jdk.vm.ci.code.CompilationResult.Infopoint; + +import jdk.vm.ci.code.BytecodePosition; +import jdk.vm.ci.code.CompilationResult; +import jdk.vm.ci.code.DebugInfo; +import jdk.vm.ci.code.InfopointReason; +import jdk.vm.ci.code.Location; +import jdk.vm.ci.code.ReferenceMap; +import jdk.vm.ci.code.Register; +import jdk.vm.ci.hotspot.HotSpotReferenceMap; +import jdk.vm.ci.hotspot.HotSpotVMConfig; +import jdk.vm.ci.meta.JavaKind; +import jdk.vm.ci.meta.LIRKind; +import jdk.vm.ci.meta.PlatformKind; +import jdk.vm.ci.common.JVMCIError; + +import org.junit.Test; + +/** + * Tests for errors in oop maps. + */ +public class TestInvalidOopMap extends CodeInstallerTest { + + private static class InvalidReferenceMap extends ReferenceMap { + } + + private void test(ReferenceMap refMap) { + BytecodePosition pos = new BytecodePosition(null, dummyMethod, 0); + DebugInfo info = new DebugInfo(pos); + info.setReferenceMap(refMap); + + CompilationResult result = createEmptyCompilationResult(); + result.addInfopoint(new Infopoint(0, info, InfopointReason.SAFEPOINT)); + installCode(result); + } + + @Test(expected = NullPointerException.class) + public void testMissingReferenceMap() { + test(null); + } + + @Test(expected = JVMCIError.class) + public void testInvalidReferenceMap() { + test(new InvalidReferenceMap()); + } + + @Test(expected = NullPointerException.class) + public void testNullOops() { + test(new HotSpotReferenceMap(null, new Location[0], new int[0], 8)); + } + + @Test(expected = NullPointerException.class) + public void testNullBase() { + test(new HotSpotReferenceMap(new Location[0], null, new int[0], 8)); + } + + @Test(expected = NullPointerException.class) + public void testNullSize() { + test(new HotSpotReferenceMap(new Location[0], new Location[0], null, 8)); + } + + @Test(expected = JVMCIError.class) + public void testInvalidLength() { + test(new HotSpotReferenceMap(new Location[1], new Location[2], new int[3], 8)); + } + + @Test(expected = JVMCIError.class) + public void testInvalidShortOop() { + PlatformKind kind = arch.getPlatformKind(JavaKind.Short); + Register reg = getRegister(kind, 0); + + Location[] oops = new Location[]{Location.register(reg)}; + Location[] base = new Location[]{null}; + int[] size = new int[]{kind.getSizeInBytes()}; + + test(new HotSpotReferenceMap(oops, base, size, 8)); + } + + @Test(expected = JVMCIError.class) + public void testInvalidNarrowDerivedOop() { + if (!HotSpotVMConfig.config().useCompressedOops) { + throw new JVMCIError("skipping test"); + } + + PlatformKind kind = arch.getPlatformKind(JavaKind.Int); + Register reg = getRegister(kind, 0); + Register baseReg = getRegister(arch.getPlatformKind(JavaKind.Object), 1); + + Location[] oops = new Location[]{Location.register(reg)}; + Location[] base = new Location[]{Location.register(baseReg)}; + int[] size = new int[]{kind.getSizeInBytes()}; + + test(new HotSpotReferenceMap(oops, base, size, 8)); + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/jvmci/events/JvmciNotifyInstallEventTest.java --- a/hotspot/test/compiler/jvmci/events/JvmciNotifyInstallEventTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/jvmci/events/JvmciNotifyInstallEventTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -111,6 +111,8 @@ Asserts.assertEQ(gotInstallNotification, 1, "Got unexpected event count after 1st install attempt"); // since "empty" compilation result is ok, a second attempt should be ok + compResult = new CompilationResult(METHOD_NAME); // create another instance with fresh state + compResult.setTotalFrameSize(0); codeCache.installCode(compRequest, compResult, /* installedCode = */ null, /* speculationLog = */ null, /* isDefault = */ false); Asserts.assertEQ(gotInstallNotification, 2, diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/loopopts/superword/TestBestAlign.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/loopopts/superword/TestBestAlign.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,56 @@ +/* + * Copyright 2015 SAP AG. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8141624 + * @summary Limit calculation of pre loop during super word optimization is wrong + * @run main/othervm TestBestAlign + * @author gunter.haug@sap.com + */ + +public class TestBestAlign { + + static final int initVal = -1; + static int intArray []; + static boolean boolArray[]; + static int limit; + static public void clear() { + for (int i = 0; i < limit; i++) { + boolArray[1] = true; + intArray[i] = initVal; + boolArray[2] = true; + } + } + + public static void main(String argv[]) throws Exception { + limit = 64; + boolArray = new boolean[8]; + intArray = new int[limit + 4]; + for (int i = 0; i < 10000000; ++i) { + if(i % 1000000 == 0) + System.out.println(i); + clear(); + } + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/rangechecks/TestExplicitRangeChecks.java --- a/hotspot/test/compiler/rangechecks/TestExplicitRangeChecks.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/rangechecks/TestExplicitRangeChecks.java Tue Nov 24 10:30:23 2015 +0100 @@ -25,7 +25,7 @@ * @test * @bug 8073480 * @summary explicit range checks should be recognized by C2 - * @library /testlibrary /test/lib /compiler/whitebox + * @library /testlibrary /test/lib /compiler/whitebox / * @build TestExplicitRangeChecks * @run main ClassFileInstaller sun.hotspot.WhiteBox * @run main ClassFileInstaller jdk.test.lib.Platform @@ -41,6 +41,7 @@ import sun.hotspot.code.NMethod; import jdk.test.lib.Platform; import sun.misc.Unsafe; +import compiler.whitebox.CompilerWhiteBoxTest; public class TestExplicitRangeChecks { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/rangechecks/TestRangeCheckSmearing.java --- a/hotspot/test/compiler/rangechecks/TestRangeCheckSmearing.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/rangechecks/TestRangeCheckSmearing.java Tue Nov 24 10:30:23 2015 +0100 @@ -25,7 +25,7 @@ * @test * @bug 8066103 * @summary C2's range check smearing allows out of bound array accesses - * @library /testlibrary /test/lib /compiler/whitebox + * @library /testlibrary /test/lib /compiler/whitebox / * @modules java.base/sun.misc * java.management * @build TestRangeCheckSmearing @@ -42,6 +42,7 @@ import sun.hotspot.WhiteBox; import sun.hotspot.code.NMethod; import jdk.test.lib.Platform; +import compiler.whitebox.CompilerWhiteBoxTest; public class TestRangeCheckSmearing { private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox(); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/rangechecks/TestUncommonTrapMerging.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/rangechecks/TestUncommonTrapMerging.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8140574 + * @summary Verify proper re-execution of checks after merging of uncommon traps + * @run main/othervm -Xcomp -XX:-TieredCompilation -XX:CompileCommand=compileonly,TestUncommonTrapMerging::test* TestUncommonTrapMerging Test1 + * @run main/othervm -XX:CompileCommand=compileonly,TestUncommonTrapMerging::test* TestUncommonTrapMerging Test2 + */ +public class TestUncommonTrapMerging { + + public static void main(String[] args) throws Throwable { + if (args.length < 1) { + throw new RuntimeException("Not enough arguments!"); + } + TestUncommonTrapMerging mytest = new TestUncommonTrapMerging(); + String testcase = args[0]; + if (testcase.equals("Test1")) { + try { + // '42' should hit the 'arg > 0' check + mytest.test(42); + + } catch (OutOfMemoryError e) { + // expected + } + } else if (testcase.equals("Test2")) { + // Compile test2 with uncommon traps at path 1 and path 2 + for (int i = 0; i < 100_000; i++) { + mytest.test2(-1, 0); + } + + // Compile test3 which inlines test2 with uncommon traps at + // path 1 and path 2. Because test3 always passes 'value = 1', + // C2 will remove the 'value > 0' check and then merge the two + // uncommon traps. + for (int i = 0; i < 100_000; i++) { + mytest.test3(0); + } + + // This should return through path 2 + if (!mytest.test3(42)) { + throw new RuntimeException("test2 returned through wrong path!"); + } + } + } + + public void test(int arg) throws Throwable { + // The following two checks should not be merged if the + // uncommon trap of the dominating if has 'Reason_unloaded' + // because we need to re-execute both checks after deopt. + if (arg < 0) { + throw new RuntimeException("Should not reach here"); + } else if (arg > 0) { + throw new OutOfMemoryError(); + } + throw new RuntimeException("Should not reach here"); + } + + public boolean test2(int arg, int value) { + if (arg < 0) { + if (value > 0) { + // path 1 + return false; + } + } else if (arg > 0) { + // path 2 + return true; + } + // path 3 + return false; + } + + public boolean test3(int arg) { + int i; + for (i = 0; i < 1; ++i) { } + // i == 1 + return test2(arg, i); + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/stable/TestStableMemoryBarrier.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/stable/TestStableMemoryBarrier.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestStableMemoryBarrier + * @bug 8139758 + * @summary tests memory barrier correctly inserted for stable fields + * @library /testlibrary /../../test/lib + * + * @run main/bootclasspath -Xcomp -XX:CompileOnly=::testCompile + * java.lang.invoke.TestStableMemoryBarrier + * + * @author hui.shi@linaro.org + */ +package java.lang.invoke; + +import java.lang.reflect.InvocationTargetException; + +public class TestStableMemoryBarrier { + + public static void main(String[] args) throws Exception { + run(NotDominate.class); + + } + + /* ==================================================== + * Stable field initialized in method, but its allocation + * doesn't dominate MemBar Release at the end of method. + */ + + static class NotDominate{ + public @Stable int v; + public static int[] array = new int[100]; + public static NotDominate testCompile(int n) { + if ((n % 2) == 0) return null; + // add a loop here, trigger PhaseIdealLoop::verify_dominance + for (int i = 0; i < 100; i++) { + array[i] = n; + } + NotDominate nm = new NotDominate(); + nm.v = n; + return nm; + } + + public static void test() throws Exception { + for (int i = 0; i < 1000000; i++) + testCompile(i); + } + } + + public static void run(Class test) { + Throwable ex = null; + System.out.print(test.getName()+": "); + try { + test.getMethod("test").invoke(null); + } catch (InvocationTargetException e) { + ex = e.getCause(); + } catch (Throwable e) { + ex = e; + } finally { + if (ex == null) { + System.out.println("PASSED"); + } else { + System.out.println("FAILED"); + ex.printStackTrace(System.out); + } + } + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/tiered/CompLevelsTest.java --- a/hotspot/test/compiler/tiered/CompLevelsTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/tiered/CompLevelsTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -26,6 +26,9 @@ * * @author igor.ignatyev@oracle.com */ + +import compiler.whitebox.CompilerWhiteBoxTest; + public abstract class CompLevelsTest extends CompilerWhiteBoxTest { protected CompLevelsTest(TestCase testCase) { super(testCase); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/tiered/ConstantGettersTransitionsTest.java --- a/hotspot/test/compiler/tiered/ConstantGettersTransitionsTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/tiered/ConstantGettersTransitionsTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -23,10 +23,11 @@ import java.lang.reflect.Executable; import java.util.concurrent.Callable; +import compiler.whitebox.CompilerWhiteBoxTest; /** * @test ConstantGettersTransitionsTest - * @library /testlibrary /test/lib /compiler/whitebox + * @library /testlibrary /test/lib /compiler/whitebox / * @modules java.base/sun.misc * java.management * @build TransitionsTestExecutor ConstantGettersTransitionsTest diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/tiered/LevelTransitionTest.java --- a/hotspot/test/compiler/tiered/LevelTransitionTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/tiered/LevelTransitionTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -25,10 +25,12 @@ import java.lang.reflect.Method; import java.util.Objects; import java.util.concurrent.Callable; +import compiler.whitebox.CompilerWhiteBoxTest; +import compiler.whitebox.SimpleTestCase; /** * @test LevelTransitionTest - * @library /testlibrary /test/lib /compiler/whitebox + * @library /testlibrary /test/lib /compiler/whitebox / * @modules java.base/sun.misc * java.management * @ignore 8067651 @@ -36,7 +38,7 @@ * @run main ClassFileInstaller sun.hotspot.WhiteBox sun.hotspot.WhiteBox$WhiteBoxPermission * @run main/othervm/timeout=240 -Xmixed -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+WhiteBoxAPI -XX:+TieredCompilation - * -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* + * -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* * -XX:CompileCommand=compileonly,ExtendedTestCase$CompileMethodHolder::* * TransitionsTestExecutor LevelTransitionTest * @summary Test the correctness of compilation level transitions for different methods diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/tiered/NonTieredLevelsTest.java --- a/hotspot/test/compiler/tiered/NonTieredLevelsTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/tiered/NonTieredLevelsTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -22,17 +22,18 @@ */ import java.util.function.IntPredicate; +import compiler.whitebox.CompilerWhiteBoxTest; /** * @test NonTieredLevelsTest - * @library /testlibrary /test/lib /compiler/whitebox + * @library /testlibrary /test/lib /compiler/whitebox / * @modules java.management * @build NonTieredLevelsTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission * @run main/othervm -Xbootclasspath/a:. -XX:-TieredCompilation * -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI - * -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* + * -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* * NonTieredLevelsTest * @summary Verify that only one level can be used * @author igor.ignatyev@oracle.com diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/tiered/TieredLevelsTest.java --- a/hotspot/test/compiler/tiered/TieredLevelsTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/tiered/TieredLevelsTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -21,16 +21,18 @@ * questions. */ +import compiler.whitebox.CompilerWhiteBoxTest; + /** * @test TieredLevelsTest - * @library /testlibrary /test/lib /compiler/whitebox + * @library /testlibrary /test/lib /compiler/whitebox / * @modules java.management * @build TieredLevelsTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission * @run main/othervm -Xbootclasspath/a:. -XX:+TieredCompilation * -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI - * -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* + * -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* * TieredLevelsTest * @summary Verify that all levels < 'TieredStopAtLevel' can be used * @author igor.ignatyev@oracle.com diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/tiered/TransitionsTestExecutor.java --- a/hotspot/test/compiler/tiered/TransitionsTestExecutor.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/tiered/TransitionsTestExecutor.java Tue Nov 24 10:30:23 2015 +0100 @@ -29,6 +29,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import compiler.whitebox.CompilerWhiteBoxTest; /** * Executes given test in a separate VM with enabled Tiered Compilation for diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/types/TestMeetIncompatibleInterfaceArrays.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/types/TestMeetIncompatibleInterfaceArrays.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,353 @@ +/* + * Copyright 2015 SAP AG. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8141551 + * @summary C2 can not handle returns with inccompatible interface arrays + * @modules java.base/jdk.internal.org.objectweb.asm + * java.base/sun.misc + * @library /testlibrary /../../test/lib + * @build sun.hotspot.WhiteBox + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission + * @run main/othervm + * -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * -Xbatch + * -XX:CompileThreshold=1 + * -XX:-TieredCompilation + * -XX:CICompilerCount=1 + * -XX:+PrintCompilation + * -XX:+PrintInlining + * -XX:CompileCommand=compileonly,MeetIncompatibleInterfaceArrays*.run + * -XX:CompileCommand=dontinline,TestMeetIncompatibleInterfaceArrays$Helper.createI2* + * -XX:CompileCommand=quiet + * TestMeetIncompatibleInterfaceArrays 0 + * @run main/othervm + * -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * -Xbatch + * -XX:CompileThreshold=1 + * -XX:-TieredCompilation + * -XX:CICompilerCount=1 + * -XX:+PrintCompilation + * -XX:+PrintInlining + * -XX:CompileCommand=compileonly,MeetIncompatibleInterfaceArrays*.run + * -XX:CompileCommand=inline,TestMeetIncompatibleInterfaceArrays$Helper.createI2* + * -XX:CompileCommand=quiet + * TestMeetIncompatibleInterfaceArrays 1 + * @run main/othervm + * -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * -Xbatch + * -XX:CompileThreshold=1 + * -XX:Tier0InvokeNotifyFreqLog=0 -XX:Tier2InvokeNotifyFreqLog=0 -XX:Tier3InvokeNotifyFreqLog=0 -XX:Tier23InlineeNotifyFreqLog=0 + * -XX:Tier3InvocationThreshold=2 -XX:Tier3MinInvocationThreshold=2 -XX:Tier3CompileThreshold=2 + * -XX:Tier4InvocationThreshold=1 -XX:Tier4MinInvocationThreshold=1 -XX:Tier4CompileThreshold=1 + * -XX:+TieredCompilation + * -XX:CICompilerCount=2 + * -XX:+PrintCompilation + * -XX:+PrintInlining + * -XX:CompileCommand=compileonly,MeetIncompatibleInterfaceArrays*.run + * -XX:CompileCommand=compileonly,TestMeetIncompatibleInterfaceArrays$Helper.createI2* + * -XX:CompileCommand=inline,TestMeetIncompatibleInterfaceArrays$Helper.createI2* + * -XX:CompileCommand=quiet + * TestMeetIncompatibleInterfaceArrays 2 + * + * @author volker.simonis@gmail.com + */ + +import java.io.FileOutputStream; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import jdk.internal.org.objectweb.asm.ClassWriter; +import jdk.internal.org.objectweb.asm.MethodVisitor; +import static jdk.internal.org.objectweb.asm.Opcodes.*; +import sun.hotspot.WhiteBox; + +public class TestMeetIncompatibleInterfaceArrays extends ClassLoader { + + private static final WhiteBox WB = WhiteBox.getWhiteBox(); + + public static interface I1 { public String getName(); } + public static interface I2 { public String getName(); } + public static class I2C implements I2 { public String getName() { return "I2";} } + public static class I21C implements I2, I1 { public String getName() { return "I2 and I1";} } + + public static class Helper { + public static I2 createI2Array0() { + return new I2C(); + } + public static I2[] createI2Array1() { + return new I2C[] { new I2C() }; + } + public static I2[][] createI2Array2() { + return new I2C[][] { new I2C[] { new I2C() } }; + } + public static I2[][][] createI2Array3() { + return new I2C[][][] { new I2C[][] { new I2C[] { new I2C() } } }; + } + public static I2[][][][] createI2Array4() { + return new I2C[][][][] { new I2C[][][] { new I2C[][] { new I2C[] { new I2C() } } } }; + } + public static I2[][][][][] createI2Array5() { + return new I2C[][][][][] { new I2C[][][][] { new I2C[][][] { new I2C[][] { new I2C[] { new I2C() } } } } }; + } + public static I2 createI21Array0() { + return new I21C(); + } + public static I2[] createI21Array1() { + return new I21C[] { new I21C() }; + } + public static I2[][] createI21Array2() { + return new I21C[][] { new I21C[] { new I21C() } }; + } + public static I2[][][] createI21Array3() { + return new I21C[][][] { new I21C[][] { new I21C[] { new I21C() } } }; + } + public static I2[][][][] createI21Array4() { + return new I21C[][][][] { new I21C[][][] { new I21C[][] { new I21C[] { new I21C() } } } }; + } + public static I2[][][][][] createI21Array5() { + return new I21C[][][][][] { new I21C[][][][] { new I21C[][][] { new I21C[][] { new I21C[] { new I21C() } } } } }; + } + } + + // Location for the generated class files + public static final String PATH = System.getProperty("test.classes", ".") + java.io.File.separator; + + /* + * With 'good == false' this helper method creates the following classes + * (using the nested 'Helper' class and the nested interfaces 'I1' and 'I2'). + * For brevity I omit the enclosing class 'TestMeetIncompatibleInterfaceArrays' in the + * following examples: + * + * public class MeetIncompatibleInterfaceArrays0ASM { + * public static I1 run() { + * return Helper.createI2Array0(); // returns I2 + * } + * public static void test() { + * I1 i1 = run(); + * System.out.println(i1.getName()); + * } + * } + * public class MeetIncompatibleInterfaceArrays1ASM { + * public static I1[] run() { + * return Helper.createI2Array1(); // returns I2[] + * } + * public static void test() { + * I1[] i1 = run(); + * System.out.println(i1[0].getName()); + * } + * } + * ... + * // MeetIncompatibleInterfaceArrays4ASM is special because it creates + * // an illegal class which will be rejected by the verifier. + * public class MeetIncompatibleInterfaceArrays4ASM { + * public static I1[][][][] run() { + * return Helper.createI2Array3(); // returns I1[][][] which gives a verifier error because return expects I1[][][][] + * } + * public static void test() { + * I1[][][][][] i1 = run(); + * System.out.println(i1[0][0][0][0][0].getName()); + * } + * ... + * public class MeetIncompatibleInterfaceArrays5ASM { + * public static I1[][][][][] run() { + * return Helper.createI2Array5(); // returns I2[][][][][] + * } + * public static void test() { + * I1[][][][][] i1 = run(); + * System.out.println(i1[0][0][0][0][0].getName()); + * } + * } + * + * Notice that this is not legal Java code. We would have to use a cast in "run()" to make it legal: + * + * public static I1[] run() { + * return (I1[])Helper.createI2Array1(); // returns I2[] + * } + * + * But in pure bytecode, the "run()" methods are perfectly legal: + * + * public static I1[] run(); + * Code: + * 0: invokestatic #16 // Method Helper.createI2Array1:()[LI2; + * 3: areturn + * + * The "test()" method calls the "getName()" function from I1 on the objects returned by "run()". + * This will epectedly fail with an "IncompatibleClassChangeError" because the objects returned + * by "run()" (and by createI2Array()) are actually of type "I2C" and only implement "I2" but not "I1". + * + * + * With 'good == true' this helper method will create the following classes: + * + * public class MeetIncompatibleInterfaceArraysGood0ASM { + * public static I1 run() { + * return Helper.createI21Array0(); // returns I2 + * } + * public static void test() { + * I1 i1 = run(); + * System.out.println(i1.getName()); + * } + * } + * + * Calling "test()" on these objects will succeed and output "I2 and I1" because now the "run()" + * method calls "createI21Array()" which actually return an object (or an array of objects) of + * type "I21C" which implements both "I2" and "I1". + * + * Notice that at the bytecode level, the code for the "run()" and "test()" methods in + * "MeetIncompatibleInterfaceArraysASM" and "MeetIncompatibleInterfaceArraysGoodASM" look exactly + * the same. I.e. the verifier has no chance to verify if the I2 object returned by "createI1Array()" + * or "createI21Array()" implements "I1" or not. That's actually the reason why both versions of + * generated classes are legal from a verifier point of view. + * + */ + static void generateTestClass(int dim, boolean good) throws Exception { + String baseClassName = "MeetIncompatibleInterfaceArrays"; + if (good) + baseClassName += "Good"; + String createName = "createI2" + (good ? "1" : "") + "Array"; + String a = ""; + for (int i = 0; i < dim; i++) + a += "["; + ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES); + cw.visit(V1_8, ACC_PUBLIC, baseClassName + dim + "ASM", null, "java/lang/Object", null); + MethodVisitor constr = cw.visitMethod(ACC_PUBLIC, "", "()V", null, null); + constr.visitCode(); + constr.visitVarInsn(ALOAD, 0); + constr.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "", "()V", false); + constr.visitInsn(RETURN); + constr.visitMaxs(0, 0); + constr.visitEnd(); + MethodVisitor run = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, "run", + "()" + a + "LTestMeetIncompatibleInterfaceArrays$I1;", null, null); + run.visitCode(); + if (dim == 4) { + run.visitMethodInsn(INVOKESTATIC, "TestMeetIncompatibleInterfaceArrays$Helper", createName + 3, + "()" + "[[[" + "LTestMeetIncompatibleInterfaceArrays$I2;", false); + } else { + run.visitMethodInsn(INVOKESTATIC, "TestMeetIncompatibleInterfaceArrays$Helper", createName + dim, + "()" + a + "LTestMeetIncompatibleInterfaceArrays$I2;", false); + } + run.visitInsn(ARETURN); + run.visitMaxs(0, 0); + run.visitEnd(); + MethodVisitor test = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, "test", "()V", null, null); + test.visitCode(); + test.visitMethodInsn(INVOKESTATIC, baseClassName + dim + "ASM", "run", + "()" + a + "LTestMeetIncompatibleInterfaceArrays$I1;", false); + test.visitVarInsn(ASTORE, 0); + if (dim > 0) { + test.visitVarInsn(ALOAD, 0); + for (int i = 1; i <= dim; i++) { + test.visitInsn(ICONST_0); + test.visitInsn(AALOAD); + } + test.visitVarInsn(ASTORE, 1); + } + test.visitFieldInsn(GETSTATIC, "java/lang/System", "out", "Ljava/io/PrintStream;"); + test.visitVarInsn(ALOAD, dim > 0 ? 1 : 0); + test.visitMethodInsn(INVOKEINTERFACE, "TestMeetIncompatibleInterfaceArrays$I1", "getName", + "()Ljava/lang/String;", true); + test.visitMethodInsn(INVOKEVIRTUAL, "java/io/PrintStream", "println", "(Ljava/lang/Object;)V", false); + test.visitInsn(RETURN); + test.visitMaxs(0, 0); + test.visitEnd(); + + // Get the bytes of the class.. + byte[] b = cw.toByteArray(); + // ..and write them into a class file (for debugging) + FileOutputStream fos = new FileOutputStream(PATH + baseClassName + dim + "ASM.class"); + fos.write(b); + fos.close(); + + } + + public static String[][] tier = { { "interpreted", "C2 (tier 4) without inlining", "C2 (tier4) without inlining" }, + { "interpreted", "C2 (tier 4) with inlining", "C2 (tier4) with inlining" }, + { "interpreted", "C1 (tier 3) with inlining", "C2 (tier4) with inlining" } }; + + public static void main(String[] args) throws Exception { + final int pass = Integer.parseInt(args.length > 0 ? args[0] : "0"); + + // Load and initialize some classes required for compilation + Class.forName("TestMeetIncompatibleInterfaceArrays$I1"); + Class.forName("TestMeetIncompatibleInterfaceArrays$I2"); + Class.forName("TestMeetIncompatibleInterfaceArrays$Helper"); + + for (int g = 0; g < 2; g++) { + String baseClassName = "MeetIncompatibleInterfaceArrays"; + boolean good = (g == 0) ? false : true; + if (good) + baseClassName += "Good"; + for (int i = 0; i < 6; i++) { + System.out.println(); + System.out.println("Creating " + baseClassName + i + "ASM.class"); + System.out.println("========================================" + "=" + "========="); + // Create the "MeetIncompatibleInterfaceArraysASM" class + generateTestClass(i, good); + Class c = null; + try { + c = Class.forName(baseClassName + i + "ASM"); + } catch (VerifyError ve) { + if (i == 4) { + System.out.println("OK - must be (" + ve.getMessage() + ")."); + } else { + throw ve; + } + continue; + } + // Call MeetIncompatibleInterfaceArraysASM.test() + Method m = c.getMethod("test"); + Method r = c.getMethod("run"); + for (int j = 0; j < 3; j++) { + System.out.println((j + 1) + ". invokation of " + baseClassName + i + "ASM.test() [should be " + + tier[pass][j] + "]"); + try { + m.invoke(null); + } catch (InvocationTargetException ite) { + if (good) { + throw ite; + } else { + if (ite.getCause() instanceof IncompatibleClassChangeError) { + System.out.println(" OK - catched InvocationTargetException(" + + ite.getCause().getMessage() + ")."); + } else { + throw ite; + } + } + } + } + System.out.println("Method " + r + (WB.isMethodCompiled(r) ? " has" : " has not") + " been compiled."); + if (!WB.isMethodCompiled(r)) { + throw new Exception("Method " + r + " must be compiled!"); + } + } + } + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/ClearMethodStateTest.java --- a/hotspot/test/compiler/whitebox/ClearMethodStateTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/ClearMethodStateTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -22,16 +22,17 @@ */ import java.util.function.Function; +import compiler.whitebox.CompilerWhiteBoxTest; /* * @test ClearMethodStateTest * @bug 8006683 8007288 8022832 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build ClearMethodStateTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* ClearMethodStateTest + * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* ClearMethodStateTest * @summary testing of WB::clearMethodState() * @author igor.ignatyev@oracle.com */ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/CompilerWhiteBoxTest.java --- a/hotspot/test/compiler/whitebox/CompilerWhiteBoxTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/CompilerWhiteBoxTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -20,13 +20,11 @@ * or visit www.oracle.com if you need additional information or have any * questions. */ +package compiler.whitebox; import sun.hotspot.WhiteBox; import sun.hotspot.code.NMethod; - -import java.lang.reflect.Constructor; import java.lang.reflect.Executable; -import java.lang.reflect.Method; import java.util.Objects; import java.util.concurrent.Callable; import java.util.function.Function; @@ -38,19 +36,19 @@ */ public abstract class CompilerWhiteBoxTest { /** {@code CompLevel::CompLevel_none} -- Interpreter */ - protected static final int COMP_LEVEL_NONE = 0; + public static final int COMP_LEVEL_NONE = 0; /** {@code CompLevel::CompLevel_any}, {@code CompLevel::CompLevel_all} */ - protected static final int COMP_LEVEL_ANY = -1; + public static final int COMP_LEVEL_ANY = -1; /** {@code CompLevel::CompLevel_simple} -- C1 */ - protected static final int COMP_LEVEL_SIMPLE = 1; + public static final int COMP_LEVEL_SIMPLE = 1; /** {@code CompLevel::CompLevel_limited_profile} -- C1, invocation & backedge counters */ - protected static final int COMP_LEVEL_LIMITED_PROFILE = 2; + public static final int COMP_LEVEL_LIMITED_PROFILE = 2; /** {@code CompLevel::CompLevel_full_profile} -- C1, invocation & backedge counters + mdo */ - protected static final int COMP_LEVEL_FULL_PROFILE = 3; + public static final int COMP_LEVEL_FULL_PROFILE = 3; /** {@code CompLevel::CompLevel_full_optimization} -- C2 or Shark */ - protected static final int COMP_LEVEL_FULL_OPTIMIZATION = 4; + public static final int COMP_LEVEL_FULL_OPTIMIZATION = 4; /** Maximal value for CompLevel */ - protected static final int COMP_LEVEL_MAX = COMP_LEVEL_FULL_OPTIMIZATION; + public static final int COMP_LEVEL_MAX = COMP_LEVEL_FULL_OPTIMIZATION; /** Instance of WhiteBox */ protected static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox(); @@ -70,7 +68,7 @@ protected static final boolean IS_VERBOSE = System.getProperty("verbose") != null; /** invocation count to trigger compilation */ - protected static final int THRESHOLD; + public static final int THRESHOLD; /** invocation count to trigger OSR compilation */ protected static final long BACKEDGE_THRESHOLD; /** Value of {@code java.vm.info} (interpreted|mixed|comp mode) */ @@ -312,7 +310,7 @@ * * @param executable Executable */ - protected static final void waitBackgroundCompilation(Executable executable) { + public static final void waitBackgroundCompilation(Executable executable) { if (!BACKGROUND_COMPILATION) { return; } @@ -441,7 +439,7 @@ * @return {@code true} if the test should be skipped, * {@code false} otherwise */ - protected static boolean skipOnTieredCompilation(boolean value) { + public static boolean skipOnTieredCompilation(boolean value) { if (value == CompilerWhiteBoxTest.TIERED_COMPILATION) { System.err.println("Test isn't applicable w/ " + (value ? "enabled" : "disabled") @@ -452,256 +450,3 @@ } } -enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase { - /** constructor test case */ - CONSTRUCTOR_TEST(Helper.CONSTRUCTOR, Helper.CONSTRUCTOR_CALLABLE, false), - /** method test case */ - METHOD_TEST(Helper.METHOD, Helper.METHOD_CALLABLE, false), - /** static method test case */ - STATIC_TEST(Helper.STATIC, Helper.STATIC_CALLABLE, false), - /** OSR constructor test case */ - OSR_CONSTRUCTOR_TEST(Helper.OSR_CONSTRUCTOR, - Helper.OSR_CONSTRUCTOR_CALLABLE, true), - /** OSR method test case */ - OSR_METHOD_TEST(Helper.OSR_METHOD, Helper.OSR_METHOD_CALLABLE, true), - /** OSR static method test case */ - OSR_STATIC_TEST(Helper.OSR_STATIC, Helper.OSR_STATIC_CALLABLE, true); - - private final Executable executable; - private final Callable callable; - private final boolean isOsr; - - private SimpleTestCase(Executable executable, Callable callable, - boolean isOsr) { - this.executable = executable; - this.callable = callable; - this.isOsr = isOsr; - } - - @Override - public Executable getExecutable() { - return executable; - } - - @Override - public Callable getCallable() { - return callable; - } - - @Override - public boolean isOsr() { - return isOsr; - } - - private static class Helper { - - private static final Callable CONSTRUCTOR_CALLABLE - = new Callable() { - @Override - public Integer call() throws Exception { - return new Helper(1337).hashCode(); - } - }; - - private static final Callable METHOD_CALLABLE - = new Callable() { - private final Helper helper = new Helper(); - - @Override - public Integer call() throws Exception { - return helper.method(); - } - }; - - private static final Callable STATIC_CALLABLE - = new Callable() { - @Override - public Integer call() throws Exception { - return staticMethod(); - } - }; - - private static final Callable OSR_CONSTRUCTOR_CALLABLE - = new Callable() { - @Override - public Integer call() throws Exception { - return new Helper(null, CompilerWhiteBoxTest.BACKEDGE_THRESHOLD).hashCode(); - } - }; - - private static final Callable OSR_METHOD_CALLABLE - = new Callable() { - private final Helper helper = new Helper(); - - @Override - public Integer call() throws Exception { - return helper.osrMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD); - } - }; - - private static final Callable OSR_STATIC_CALLABLE - = new Callable() { - @Override - public Integer call() throws Exception { - return osrStaticMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD); - } - }; - - private static final Constructor CONSTRUCTOR; - private static final Constructor OSR_CONSTRUCTOR; - private static final Method METHOD; - private static final Method STATIC; - private static final Method OSR_METHOD; - private static final Method OSR_STATIC; - - static { - try { - CONSTRUCTOR = Helper.class.getDeclaredConstructor(int.class); - } catch (NoSuchMethodException | SecurityException e) { - throw new RuntimeException( - "exception on getting method Helper.(int)", e); - } - try { - OSR_CONSTRUCTOR = Helper.class.getDeclaredConstructor( - Object.class, long.class); - } catch (NoSuchMethodException | SecurityException e) { - throw new RuntimeException( - "exception on getting method Helper.(Object, long)", e); - } - METHOD = getMethod("method"); - STATIC = getMethod("staticMethod"); - OSR_METHOD = getMethod("osrMethod", long.class); - OSR_STATIC = getMethod("osrStaticMethod", long.class); - } - - private static Method getMethod(String name, Class... parameterTypes) { - try { - return Helper.class.getDeclaredMethod(name, parameterTypes); - } catch (NoSuchMethodException | SecurityException e) { - throw new RuntimeException( - "exception on getting method Helper." + name, e); - } - } - - private static int staticMethod() { - return 1138; - } - - private int method() { - return 42; - } - - /** - * Deoptimizes all non-osr versions of the given executable after - * compilation finished. - * - * @param e Executable - * @throws Exception - */ - private static void waitAndDeoptimize(Executable e) { - CompilerWhiteBoxTest.waitBackgroundCompilation(e); - if (WhiteBox.getWhiteBox().isMethodQueuedForCompilation(e)) { - throw new RuntimeException(e + " must not be in queue"); - } - // Deoptimize non-osr versions of executable - WhiteBox.getWhiteBox().deoptimizeMethod(e, false); - } - - /** - * Executes the method multiple times to make sure we have - * enough profiling information before triggering an OSR - * compilation. Otherwise the C2 compiler may add uncommon traps. - * - * @param m Method to be executed - * @return Number of times the method was executed - * @throws Exception - */ - private static int warmup(Method m) throws Exception { - waitAndDeoptimize(m); - Helper helper = new Helper(); - int result = 0; - for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) { - result += (int)m.invoke(helper, 1); - } - // Wait to make sure OSR compilation is not blocked by - // non-OSR compilation in the compile queue - CompilerWhiteBoxTest.waitBackgroundCompilation(m); - return result; - } - - /** - * Executes the constructor multiple times to make sure we - * have enough profiling information before triggering an OSR - * compilation. Otherwise the C2 compiler may add uncommon traps. - * - * @param c Constructor to be executed - * @return Number of times the constructor was executed - * @throws Exception - */ - private static int warmup(Constructor c) throws Exception { - waitAndDeoptimize(c); - int result = 0; - for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) { - result += c.newInstance(null, 1).hashCode(); - } - // Wait to make sure OSR compilation is not blocked by - // non-OSR compilation in the compile queue - CompilerWhiteBoxTest.waitBackgroundCompilation(c); - return result; - } - - private static int osrStaticMethod(long limit) throws Exception { - int result = 0; - if (limit != 1) { - result = warmup(OSR_STATIC); - } - // Trigger osr compilation - for (long i = 0; i < limit; ++i) { - result += staticMethod(); - } - return result; - } - - private int osrMethod(long limit) throws Exception { - int result = 0; - if (limit != 1) { - result = warmup(OSR_METHOD); - } - // Trigger osr compilation - for (long i = 0; i < limit; ++i) { - result += method(); - } - return result; - } - - private final int x; - - // for method and OSR method test case - public Helper() { - x = 0; - } - - // for OSR constructor test case - private Helper(Object o, long limit) throws Exception { - int result = 0; - if (limit != 1) { - result = warmup(OSR_CONSTRUCTOR); - } - // Trigger osr compilation - for (long i = 0; i < limit; ++i) { - result += method(); - } - x = result; - } - - // for constructor test case - private Helper(int x) { - this.x = x; - } - - @Override - public int hashCode() { - return x; - } - } -} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/DeoptimizeAllTest.java --- a/hotspot/test/compiler/whitebox/DeoptimizeAllTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/DeoptimizeAllTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -21,15 +21,17 @@ * questions. */ +import compiler.whitebox.CompilerWhiteBoxTest; + /* * @test DeoptimizeAllTest * @bug 8006683 8007288 8022832 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build DeoptimizeAllTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* DeoptimizeAllTest + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* DeoptimizeAllTest * @summary testing of WB::deoptimizeAll() * @author igor.ignatyev@oracle.com */ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/DeoptimizeFramesTest.java --- a/hotspot/test/compiler/whitebox/DeoptimizeFramesTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/DeoptimizeFramesTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -21,10 +21,12 @@ * questions. */ +import compiler.whitebox.CompilerWhiteBoxTest; + /* * @test DeoptimizeFramesTest * @bug 8028595 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build DeoptimizeFramesTest * @run main ClassFileInstaller sun.hotspot.WhiteBox diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/DeoptimizeMethodTest.java --- a/hotspot/test/compiler/whitebox/DeoptimizeMethodTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/DeoptimizeMethodTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -21,15 +21,17 @@ * questions. */ +import compiler.whitebox.CompilerWhiteBoxTest; + /* * @test DeoptimizeMethodTest * @bug 8006683 8007288 8022832 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build DeoptimizeMethodTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* DeoptimizeMethodTest + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* DeoptimizeMethodTest * @summary testing of WB::deoptimizeMethod() * @author igor.ignatyev@oracle.com */ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/DeoptimizeMultipleOSRTest.java --- a/hotspot/test/compiler/whitebox/DeoptimizeMultipleOSRTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/DeoptimizeMultipleOSRTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,11 +24,12 @@ import sun.hotspot.WhiteBox; import java.lang.reflect.Executable; import java.lang.reflect.Method; +import compiler.whitebox.CompilerWhiteBoxTest; /* * @test DeoptimizeMultipleOSRTest * @bug 8061817 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build DeoptimizeMultipleOSRTest * @run main ClassFileInstaller sun.hotspot.WhiteBox diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/EnqueueMethodForCompilationTest.java --- a/hotspot/test/compiler/whitebox/EnqueueMethodForCompilationTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/EnqueueMethodForCompilationTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -21,15 +21,17 @@ * questions. */ +import compiler.whitebox.CompilerWhiteBoxTest; + /* * @test EnqueueMethodForCompilationTest * @bug 8006683 8007288 8022832 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build EnqueueMethodForCompilationTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* EnqueueMethodForCompilationTest + * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* EnqueueMethodForCompilationTest * @summary testing of WB::enqueueMethodForCompilation() * @author igor.ignatyev@oracle.com */ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/ForceNMethodSweepTest.java --- a/hotspot/test/compiler/whitebox/ForceNMethodSweepTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/ForceNMethodSweepTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -30,18 +30,19 @@ import jdk.test.lib.Asserts; import jdk.test.lib.InfiniteLoop; +import compiler.whitebox.CompilerWhiteBoxTest; /* * @test * @bug 8059624 8064669 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build ForceNMethodSweepTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:-TieredCompilation -XX:+WhiteBoxAPI - * -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* + * -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* * -XX:-BackgroundCompilation ForceNMethodSweepTest * @summary testing of WB::forceNMethodSweep */ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/GetNMethodTest.java --- a/hotspot/test/compiler/whitebox/GetNMethodTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/GetNMethodTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -25,16 +25,17 @@ import sun.hotspot.code.BlobType; import sun.hotspot.code.NMethod; import jdk.test.lib.Asserts; +import compiler.whitebox.CompilerWhiteBoxTest; /* * @test GetNMethodTest * @bug 8038240 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build GetNMethodTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* GetNMethodTest + * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* GetNMethodTest * @summary testing of WB::getNMethod() * @author igor.ignatyev@oracle.com */ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/IsMethodCompilableTest.java --- a/hotspot/test/compiler/whitebox/IsMethodCompilableTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/IsMethodCompilableTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,7 +24,7 @@ /* * @test IsMethodCompilableTest * @bug 8007270 8006683 8007288 8022832 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.base/sun.misc * java.management * @build jdk.test.lib.* sun.hotspot.WhiteBox @@ -32,12 +32,13 @@ * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission * @run main ClassFileInstaller jdk.test.lib.Platform - * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -Xmixed -XX:-TieredCompilation -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:PerMethodRecompilationCutoff=3 -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* IsMethodCompilableTest + * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -Xmixed -XX:-TieredCompilation -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:PerMethodRecompilationCutoff=3 -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* IsMethodCompilableTest * @summary testing of WB::isMethodCompilable() * @author igor.ignatyev@oracle.com */ import jdk.test.lib.Platform; +import compiler.whitebox.CompilerWhiteBoxTest; public class IsMethodCompilableTest extends CompilerWhiteBoxTest { /** diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/LockCompilationTest.java --- a/hotspot/test/compiler/whitebox/LockCompilationTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/LockCompilationTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -24,12 +24,12 @@ /* * @test LockCompilationTest * @bug 8059624 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build LockCompilationTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* LockCompilationTest + * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* LockCompilationTest * @summary testing of WB::lock/unlockCompilation() */ @@ -37,7 +37,7 @@ import java.io.PrintWriter; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; - +import compiler.whitebox.CompilerWhiteBoxTest; import jdk.test.lib.Asserts; public class LockCompilationTest extends CompilerWhiteBoxTest { diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/MakeMethodNotCompilableTest.java --- a/hotspot/test/compiler/whitebox/MakeMethodNotCompilableTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/MakeMethodNotCompilableTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -21,15 +21,17 @@ * questions. */ +import compiler.whitebox.CompilerWhiteBoxTest; + /* * @test MakeMethodNotCompilableTest * @bug 8012322 8006683 8007288 8022832 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build MakeMethodNotCompilableTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* MakeMethodNotCompilableTest + * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* MakeMethodNotCompilableTest * @summary testing of WB::makeMethodNotCompilable() * @author igor.ignatyev@oracle.com */ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/SetDontInlineMethodTest.java --- a/hotspot/test/compiler/whitebox/SetDontInlineMethodTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/SetDontInlineMethodTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -21,15 +21,17 @@ * questions. */ +import compiler.whitebox.CompilerWhiteBoxTest; + /* * @test SetDontInlineMethodTest * @bug 8006683 8007288 8022832 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build SetDontInlineMethodTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* SetDontInlineMethodTest + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* SetDontInlineMethodTest * @summary testing of WB::testSetDontInlineMethod() * @author igor.ignatyev@oracle.com */ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/SetForceInlineMethodTest.java --- a/hotspot/test/compiler/whitebox/SetForceInlineMethodTest.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/compiler/whitebox/SetForceInlineMethodTest.java Tue Nov 24 10:30:23 2015 +0100 @@ -21,15 +21,17 @@ * questions. */ +import compiler.whitebox.CompilerWhiteBoxTest; + /* * @test SetForceInlineMethodTest * @bug 8006683 8007288 8022832 - * @library /testlibrary /test/lib + * @library /testlibrary /test/lib / * @modules java.management * @build SetForceInlineMethodTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* SetForceInlineMethodTest + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* SetForceInlineMethodTest * @summary testing of WB::testSetForceInlineMethod() * @author igor.ignatyev@oracle.com */ diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/compiler/whitebox/SimpleTestCase.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/whitebox/SimpleTestCase.java Tue Nov 24 10:30:23 2015 +0100 @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.whitebox; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Executable; +import java.lang.reflect.Method; +import java.util.concurrent.Callable; +import sun.hotspot.WhiteBox; + +public enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase { + /** constructor test case */ + CONSTRUCTOR_TEST(Helper.CONSTRUCTOR, Helper.CONSTRUCTOR_CALLABLE, false), + /** method test case */ + METHOD_TEST(Helper.METHOD, Helper.METHOD_CALLABLE, false), + /** static method test case */ + STATIC_TEST(Helper.STATIC, Helper.STATIC_CALLABLE, false), + /** OSR constructor test case */ + OSR_CONSTRUCTOR_TEST(Helper.OSR_CONSTRUCTOR, + Helper.OSR_CONSTRUCTOR_CALLABLE, true), + /** OSR method test case */ + OSR_METHOD_TEST(Helper.OSR_METHOD, Helper.OSR_METHOD_CALLABLE, true), + /** OSR static method test case */ + OSR_STATIC_TEST(Helper.OSR_STATIC, Helper.OSR_STATIC_CALLABLE, true); + + private final Executable executable; + private final Callable callable; + private final boolean isOsr; + + private SimpleTestCase(Executable executable, Callable callable, + boolean isOsr) { + this.executable = executable; + this.callable = callable; + this.isOsr = isOsr; + } + + @Override + public Executable getExecutable() { + return executable; + } + + @Override + public Callable getCallable() { + return callable; + } + + @Override + public boolean isOsr() { + return isOsr; + } + + private static class Helper { + + private static final Callable CONSTRUCTOR_CALLABLE + = new Callable() { + @Override + public Integer call() throws Exception { + return new Helper(1337).hashCode(); + } + }; + + private static final Callable METHOD_CALLABLE + = new Callable() { + private final Helper helper = new Helper(); + + @Override + public Integer call() throws Exception { + return helper.method(); + } + }; + + private static final Callable STATIC_CALLABLE + = new Callable() { + @Override + public Integer call() throws Exception { + return staticMethod(); + } + }; + + private static final Callable OSR_CONSTRUCTOR_CALLABLE + = new Callable() { + @Override + public Integer call() throws Exception { + return new Helper(null, CompilerWhiteBoxTest.BACKEDGE_THRESHOLD).hashCode(); + } + }; + + private static final Callable OSR_METHOD_CALLABLE + = new Callable() { + private final Helper helper = new Helper(); + + @Override + public Integer call() throws Exception { + return helper.osrMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD); + } + }; + + private static final Callable OSR_STATIC_CALLABLE + = new Callable() { + @Override + public Integer call() throws Exception { + return osrStaticMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD); + } + }; + + private static final Constructor CONSTRUCTOR; + private static final Constructor OSR_CONSTRUCTOR; + private static final Method METHOD; + private static final Method STATIC; + private static final Method OSR_METHOD; + private static final Method OSR_STATIC; + + static { + try { + CONSTRUCTOR = Helper.class.getDeclaredConstructor(int.class); + } catch (NoSuchMethodException | SecurityException e) { + throw new RuntimeException( + "exception on getting method Helper.(int)", e); + } + try { + OSR_CONSTRUCTOR = Helper.class.getDeclaredConstructor( + Object.class, long.class); + } catch (NoSuchMethodException | SecurityException e) { + throw new RuntimeException( + "exception on getting method Helper.(Object, long)", e); + } + METHOD = getMethod("method"); + STATIC = getMethod("staticMethod"); + OSR_METHOD = getMethod("osrMethod", long.class); + OSR_STATIC = getMethod("osrStaticMethod", long.class); + } + + private static Method getMethod(String name, Class... parameterTypes) { + try { + return Helper.class.getDeclaredMethod(name, parameterTypes); + } catch (NoSuchMethodException | SecurityException e) { + throw new RuntimeException( + "exception on getting method Helper." + name, e); + } + } + + private static int staticMethod() { + return 1138; + } + + private int method() { + return 42; + } + + /** + * Deoptimizes all non-osr versions of the given executable after + * compilation finished. + * + * @param e Executable + * @throws Exception + */ + private static void waitAndDeoptimize(Executable e) { + CompilerWhiteBoxTest.waitBackgroundCompilation(e); + if (WhiteBox.getWhiteBox().isMethodQueuedForCompilation(e)) { + throw new RuntimeException(e + " must not be in queue"); + } + // Deoptimize non-osr versions of executable + WhiteBox.getWhiteBox().deoptimizeMethod(e, false); + } + + /** + * Executes the method multiple times to make sure we have + * enough profiling information before triggering an OSR + * compilation. Otherwise the C2 compiler may add uncommon traps. + * + * @param m Method to be executed + * @return Number of times the method was executed + * @throws Exception + */ + private static int warmup(Method m) throws Exception { + waitAndDeoptimize(m); + Helper helper = new Helper(); + int result = 0; + for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) { + result += (int)m.invoke(helper, 1); + } + // Wait to make sure OSR compilation is not blocked by + // non-OSR compilation in the compile queue + CompilerWhiteBoxTest.waitBackgroundCompilation(m); + return result; + } + + /** + * Executes the constructor multiple times to make sure we + * have enough profiling information before triggering an OSR + * compilation. Otherwise the C2 compiler may add uncommon traps. + * + * @param c Constructor to be executed + * @return Number of times the constructor was executed + * @throws Exception + */ + private static int warmup(Constructor c) throws Exception { + waitAndDeoptimize(c); + int result = 0; + for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) { + result += c.newInstance(null, 1).hashCode(); + } + // Wait to make sure OSR compilation is not blocked by + // non-OSR compilation in the compile queue + CompilerWhiteBoxTest.waitBackgroundCompilation(c); + return result; + } + + private static int osrStaticMethod(long limit) throws Exception { + int result = 0; + if (limit != 1) { + result = warmup(OSR_STATIC); + } + // Trigger osr compilation + for (long i = 0; i < limit; ++i) { + result += staticMethod(); + } + return result; + } + + private int osrMethod(long limit) throws Exception { + int result = 0; + if (limit != 1) { + result = warmup(OSR_METHOD); + } + // Trigger osr compilation + for (long i = 0; i < limit; ++i) { + result += method(); + } + return result; + } + + private final int x; + + // for method and OSR method test case + public Helper() { + x = 0; + } + + // for OSR constructor test case + private Helper(Object o, long limit) throws Exception { + int result = 0; + if (limit != 1) { + result = warmup(OSR_CONSTRUCTOR); + } + // Trigger osr compilation + for (long i = 0; i < limit; ++i) { + result += method(); + } + x = result; + } + + // for constructor test case + private Helper(int x) { + this.x = x; + } + + @Override + public int hashCode() { + return x; + } + } +} diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java --- a/hotspot/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java Tue Nov 24 10:30:23 2015 +0100 @@ -59,6 +59,13 @@ allOptionsAsMap.remove("ThreadStackSize"); /* + * Remove the flag controlling the size of the stack because the + * flag has direct influence on the physical memory usage of + * the VM. + */ + allOptionsAsMap.remove("CompilerThreadStackSize"); + + /* * Exclude MallocMaxTestWords as it is expected to exit VM at small values (>=0) */ allOptionsAsMap.remove("MallocMaxTestWords"); diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/serviceability/dcmd/compiler/control2.txt --- a/hotspot/test/serviceability/dcmd/compiler/control2.txt Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/serviceability/dcmd/compiler/control2.txt Tue Nov 24 10:30:23 2015 +0100 @@ -11,7 +11,7 @@ }, inline : [ "+javax/util.*", "-comx/sun.*"], PrintAssembly: false, - IGVPrintLevel: 2 + MaxNodeLimit: 80001 }, { match: ["baz.*","frob.*"], diff -r 6efbc7ffd767 -r d25c2fc1e248 hotspot/test/testlibrary/jdk/test/lib/Utils.java --- a/hotspot/test/testlibrary/jdk/test/lib/Utils.java Tue Nov 17 16:40:52 2015 -0500 +++ b/hotspot/test/testlibrary/jdk/test/lib/Utils.java Tue Nov 24 10:30:23 2015 +0100 @@ -44,6 +44,7 @@ import java.util.Map; import java.util.HashMap; import java.util.List; +import java.util.Objects; import java.util.Random; import java.util.function.BooleanSupplier; import java.util.concurrent.TimeUnit; @@ -82,6 +83,16 @@ */ public static final String TEST_SRC = System.getProperty("test.src", ".").trim(); + /* + * Returns the value of 'test.jdk' system property + */ + public static final String TEST_JDK = System.getProperty("test.jdk"); + + /** + * Returns the value of 'test.classes' system property + */ + public static final String TEST_CLASSES = System.getProperty("test.classes", "."); + private static Unsafe unsafe = null; /** @@ -616,5 +627,18 @@ NULL_VALUES.put(float.class, 0.0f); NULL_VALUES.put(double.class, 0.0d); } + + /** + * Returns mandatory property value + * @param propName is a name of property to request + * @return a String with requested property value + */ + public static String getMandatoryProperty(String propName) { + Objects.requireNonNull(propName, "Requested null property"); + String prop = System.getProperty(propName); + Objects.requireNonNull(prop, + String.format("A mandatory property '%s' isn't set", propName)); + return prop; + } }