jdk-sandbox: changeset 34211:d25c2fc1e248

--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Tue Nov 24 10:30:23 2015 +0100
@@ -1079,10 +1079,10 @@
   // and for a volatile write we need
   //
   //   stlr<x>
-  // 
+  //
   // Alternatively, we can implement them by pairing a normal
   // load/store with a memory barrier. For a volatile read we need
-  // 
+  //
   //   ldr<x>
   //   dmb ishld
   //
@@ -1240,7 +1240,7 @@
   // Alternatively, we can elide generation of the dmb instructions
   // and plant the alternative CompareAndSwap macro-instruction
   // sequence (which uses ldaxr<x>).
-  // 
+  //
   // Of course, the above only applies when we see these signature
   // configurations. We still want to plant dmb instructions in any
   // other cases where we may see a MemBarAcquire, MemBarRelease or
@@ -1367,7 +1367,7 @@
     opcode = parent->Opcode();
     return opcode == Op_MemBarRelease;
   }
- 
+
   // 2) card mark detection helper
 
   // helper predicate which can be used to detect a volatile membar
@@ -1383,7 +1383,7 @@
   // true
   //
   // iii) the node's Mem projection feeds a StoreCM node.
-  
+
   bool is_card_mark_membar(const MemBarNode *barrier)
   {
     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
@@ -1402,7 +1402,7 @@
 	return true;
       }
     }
-  
+
     return false;
   }
 
@@ -1430,7 +1430,7 @@
   // where
   //  || and \\ represent Ctl and Mem feeds via Proj nodes
   //  | \ and / indicate further routing of the Ctl and Mem feeds
-  // 
+  //
   // this is the graph we see for non-object stores. however, for a
   // volatile Object store (StoreN/P) we may see other nodes below the
   // leading membar because of the need for a GC pre- or post-write
@@ -1592,7 +1592,7 @@
   // ordering but neither will a releasing store (stlr). The latter
   // guarantees that the object put is visible but does not guarantee
   // that writes by other threads have also been observed.
-  // 
+  //
   // So, returning to the task of translating the object put and the
   // leading/trailing membar nodes: what do the non-normal node graph
   // look like for these 2 special cases? and how can we determine the
@@ -1731,7 +1731,7 @@
   //       |         |         |            |
   //     C |       M |       M |          M |
   //        \        |         |           /
-  //                  . . . 
+  //                  . . .
   //          (post write subtree elided)
   //                    . . .
   //             C \         M /
@@ -1812,12 +1812,12 @@
   //   |    |                 |  /        /
   //   |  Region  . . .     Phi[M]  _____/
   //   |    /                 |    /
-  //   |                      |   /   
+  //   |                      |   /
   //   | . . .   . . .        |  /
   //   | /                    | /
   // Region           |  |  Phi[M]
   //   |              |  |  / Bot
-  //    \            MergeMem 
+  //    \            MergeMem
   //     \            /
   //     MemBarVolatile
   //
@@ -1858,7 +1858,7 @@
   // to a trailing barrier via a MergeMem. That feed is either direct
   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
   // memory flow (for G1).
-  // 
+  //
   // The predicates controlling generation of instructions for store
   // and barrier nodes employ a few simple helper functions (described
   // below) which identify the presence or absence of all these
@@ -2112,8 +2112,8 @@
       x = x->in(MemNode::Memory);
     } else {
       // the merge should get its Bottom mem feed from the leading membar
-      x = mm->in(Compile::AliasIdxBot);      
-    } 
+      x = mm->in(Compile::AliasIdxBot);
+    }
 
     // ensure this is a non control projection
     if (!x->is_Proj() || x->is_CFG()) {
@@ -2190,12 +2190,12 @@
   //     . . .
   //       |
   //   MemBarVolatile (card mark)
-  //      |          |     
+  //      |          |
   //      |        StoreCM
   //      |          |
   //      |        . . .
-  //  Bot |  / 
-  //   MergeMem 
+  //  Bot |  /
+  //   MergeMem
   //      |
   //      |
   //    MemBarVolatile {trailing}
@@ -2203,10 +2203,10 @@
   // 2)
   //   MemBarRelease/CPUOrder (leading)
   //    |
-  //    | 
+  //    |
   //    |\       . . .
-  //    | \        | 
-  //    |  \  MemBarVolatile (card mark) 
+  //    | \        |
+  //    |  \  MemBarVolatile (card mark)
   //    |   \   |     |
   //     \   \  |   StoreCM    . . .
   //      \   \ |
@@ -2231,7 +2231,7 @@
   //    |  \   \  |   StoreCM    . . .
   //    |   \   \ |
   //     \   \  Phi
-  //      \   \ /  
+  //      \   \ /
   //       \  Phi
   //        \ /
   //        Phi  . . .
@@ -2506,7 +2506,7 @@
 
     return (x->is_Load() && x->as_Load()->is_acquire());
   }
-  
+
   // now check for an unsafe volatile get
 
   // need to check for
@@ -2644,7 +2644,7 @@
   }
 
   membar = child_membar(membar);
-  
+
   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
     return false;
   }
@@ -2703,7 +2703,7 @@
 
   // first we check if this is part of a card mark. if so then we have
   // to generate a StoreLoad barrier
-  
+
   if (is_card_mark_membar(mbvol)) {
       return false;
   }
@@ -2769,7 +2769,7 @@
   if (!is_card_mark_membar(mbvol)) {
     return true;
   }
-  
+
   // we found a card mark -- just make sure we have a trailing barrier
 
   return (card_mark_to_trailing(mbvol) != NULL);
@@ -2808,7 +2808,7 @@
 
   assert(barrier->Opcode() == Op_MemBarCPUOrder,
 	 "CAS not fed by cpuorder membar!");
-      
+
   MemBarNode *b = parent_membar(barrier);
   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
 	  "CAS not fed by cpuorder+release membar pair!");
@@ -3463,6 +3463,17 @@
   return true;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   return default_pressure_threshold;
 }
@@ -4663,7 +4674,7 @@
       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
     }
     if (call == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full"); 
+      ciEnv::current()->record_failure("CodeCache is full");
       return;
     }
 
@@ -4671,7 +4682,7 @@
       // Emit stub for static call
       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
       if (stub == NULL) {
-        ciEnv::current()->record_failure("CodeCache is full"); 
+        ciEnv::current()->record_failure("CodeCache is full");
         return;
       }
     }
@@ -4681,7 +4692,7 @@
     MacroAssembler _masm(&cbuf);
     address call = __ ic_call((address)$meth$$method);
     if (call == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full"); 
+      ciEnv::current()->record_failure("CodeCache is full");
       return;
     }
   %}
@@ -4706,7 +4717,7 @@
     if (cb) {
       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
       if (call == NULL) {
-        ciEnv::current()->record_failure("CodeCache is full"); 
+        ciEnv::current()->record_failure("CodeCache is full");
         return;
       }
     } else {

--- a/hotspot/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -41,7 +41,9 @@
 
 void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
-  ce->store_parameter(_method->as_register(), 1);
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  __ mov_metadata(rscratch1, m);
+  ce->store_parameter(rscratch1, 1);
   ce->store_parameter(_bci, 0);
   __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
   ce->add_call_info_here(_info);

--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -70,6 +70,7 @@
 LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
 LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
 LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::syncLockOpr()     { return new_register(T_INT); }
 LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::r0_opr; }
 LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }

--- a/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -73,6 +73,7 @@
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
 
 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);

--- a/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -29,16 +29,16 @@
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_aarch64.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   Unimplemented();
   return 0;
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
@@ -46,20 +46,20 @@
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   Unimplemented();
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   return NULL;
 }

--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -91,20 +91,18 @@
       unsigned offset_lo = dest & 0xfff;
       offset = adr_page - pc_page;
 
-      // We handle 3 types of PC relative addressing
+      // We handle 4 types of PC relative addressing
       //   1 - adrp    Rx, target_page
       //       ldr/str Ry, [Rx, #offset_in_page]
       //   2 - adrp    Rx, target_page
       //       add     Ry, Rx, #offset_in_page
       //   3 - adrp    Rx, target_page (page aligned reloc, offset == 0)
-      // In the first 2 cases we must check that Rx is the same in the adrp and the
-      // subsequent ldr/str or add instruction. Otherwise we could accidentally end
-      // up treating a type 3 relocation as a type 1 or 2 just because it happened
-      // to be followed by a random unrelated ldr/str or add instruction.
-      //
-      // In the case of a type 3 relocation, we know that these are only generated
-      // for the safepoint polling page, or for the card type byte map base so we
-      // assert as much and of course that the offset is 0.
+      //       movk    Rx, #imm16<<32
+      //   4 - adrp    Rx, target_page (page aligned reloc, offset == 0)
+      // In the first 3 cases we must check that Rx is the same in the adrp and the
+      // subsequent ldr/str, add or movk instruction. Otherwise we could accidentally end
+      // up treating a type 4 relocation as a type 1, 2 or 3 just because it happened
+      // to be followed by a random unrelated ldr/str, add or movk instruction.
       //
       unsigned insn2 = ((unsigned*)branch)[1];
       if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
@@ -123,13 +121,13 @@
         Instruction_aarch64::patch(branch + sizeof (unsigned),
                                    21, 10, offset_lo);
         instructions = 2;
-      } else {
-        assert((jbyte *)target ==
-                ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
-               target == StubRoutines::crc_table_addr() ||
-               (address)target == os::get_polling_page(),
-               "adrp must be polling page or byte map base");
-        assert(offset_lo == 0, "offset must be 0 for polling page or byte map base");
+      } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
+                   Instruction_aarch64::extract(insn, 4, 0) ==
+                     Instruction_aarch64::extract(insn2, 4, 0)) {
+        // movk #imm16<<32
+        Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32);
+        offset &= (1<<20)-1;
+        instructions = 2;
       }
     }
     int offset_lo = offset & 3;
@@ -212,16 +210,16 @@
       // Return the target address for the following sequences
       //   1 - adrp    Rx, target_page
       //       ldr/str Ry, [Rx, #offset_in_page]
-      //   2 - adrp    Rx, target_page         ]
+      //   2 - adrp    Rx, target_page
       //       add     Ry, Rx, #offset_in_page
       //   3 - adrp    Rx, target_page (page aligned reloc, offset == 0)
+      //       movk    Rx, #imm12<<32
+      //   4 - adrp    Rx, target_page (page aligned reloc, offset == 0)
       //
       // In the first two cases  we check that the register is the same and
       // return the target_page + the offset within the page.
       // Otherwise we assume it is a page aligned relocation and return
-      // the target page only. The only cases this is generated is for
-      // the safepoint polling page or for the card table byte map base so
-      // we assert as much.
+      // the target page only.
       //
       unsigned insn2 = ((unsigned*)insn_addr)[1];
       if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
@@ -238,10 +236,12 @@
         unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
         return address(target_page + byte_offset);
       } else {
-        assert((jbyte *)target_page ==
-                ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
-               (address)target_page == os::get_polling_page(),
-               "adrp must be polling page or byte map base");
+        if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110  &&
+               Instruction_aarch64::extract(insn, 4, 0) ==
+                 Instruction_aarch64::extract(insn2, 4, 0)) {
+          target_page = (target_page & 0xffffffff) |
+                         ((uint64_t)Instruction_aarch64::extract(insn2, 20, 5) << 32);
+        }
         return (address)target_page;
       }
     } else {
@@ -3964,22 +3964,26 @@
 
 void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
   relocInfo::relocType rtype = dest.rspec().reloc()->type();
-  if (uabs(pc() - dest.target()) >= (1LL << 32)) {
-    guarantee(rtype == relocInfo::none
-              || rtype == relocInfo::external_word_type
-              || rtype == relocInfo::poll_type
-              || rtype == relocInfo::poll_return_type,
-              "can only use a fixed address with an ADRP");
-    // Out of range.  This doesn't happen very often, but we have to
-    // handle it
-    mov(reg1, dest);
-    byte_offset = 0;
+  unsigned long low_page = (unsigned long)CodeCache::low_bound() >> 12;
+  unsigned long high_page = (unsigned long)(CodeCache::high_bound()-1) >> 12;
+  unsigned long dest_page = (unsigned long)dest.target() >> 12;
+  long offset_low = dest_page - low_page;
+  long offset_high = dest_page - high_page;
+
+  InstructionMark im(this);
+  code_section()->relocate(inst_mark(), dest.rspec());
+  // 8143067: Ensure that the adrp can reach the dest from anywhere within
+  // the code cache so that if it is relocated we know it will still reach
+  if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
+    _adrp(reg1, dest.target());
   } else {
-    InstructionMark im(this);
-    code_section()->relocate(inst_mark(), dest.rspec());
-    byte_offset = (uint64_t)dest.target() & 0xfff;
-    _adrp(reg1, dest.target());
+    unsigned long pc_page = (unsigned long)pc() >> 12;
+    long offset = dest_page - pc_page;
+    offset = (offset & ((1<<20)-1)) << 12;
+    _adrp(reg1, pc()+offset);
+    movk(reg1, ((unsigned long)dest.target() >> 32) & 0xffff, 32);
   }
+  byte_offset = (unsigned long)dest.target() & 0xfff;
 }
 
 void MacroAssembler::build_frame(int framesize) {

--- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -2384,6 +2384,7 @@
   }
 #endif // ASSERT
   __ mov(c_rarg0, rthread);
+  __ mov(c_rarg1, rcpool);
   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
   __ blrt(rscratch1, 1, 0, 1);
   __ bind(retaddr);
@@ -2397,6 +2398,7 @@
   // Load UnrollBlock* into rdi
   __ mov(r5, r0);
 
+  __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
    Label noException;
   __ cmpw(rcpool, Deoptimization::Unpack_exception);   // Was exception pending?
   __ br(Assembler::NE, noException);
@@ -2609,6 +2611,7 @@
   // n.b. 2 gp args, 0 fp args, integral return type
 
   __ mov(c_rarg0, rthread);
+  __ movw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
   __ lea(rscratch1,
          RuntimeAddress(CAST_FROM_FN_PTR(address,
                                          Deoptimization::uncommon_trap)));
@@ -2628,6 +2631,16 @@
   // move UnrollBlock* into r4
   __ mov(r4, r0);
 
+#ifdef ASSERT
+  { Label L;
+    __ ldrw(rscratch1, Address(r4, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+    __ cmpw(rscratch1, (unsigned)Deoptimization::Unpack_uncommon_trap);
+    __ br(Assembler::EQ, L);
+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
+    __ bind(L);
+  }
+#endif
+
   // Pop all the frames we must move/replace.
   //
   // Frame picture (youngest to oldest)

--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -61,6 +61,7 @@
 define_pd_global(bool, UseCISCSpill,                 false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
 // GL:
 // Detected a problem with unscaled compressed oops and
 // narrow_oop_use_complex_address() == false.

--- a/hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -2697,7 +2697,7 @@
   // Provide a debugger breakpoint in the frame manager if breakpoints
   // in osr'd methods are requested.
 #ifdef COMPILER2
-  NOT_PRODUCT( if (OptoBreakpointOSR) { __ illtrap(); } )
+  if (OptoBreakpointOSR) { __ illtrap(); }
 #endif
 
   // Load callee's pointer to locals array from callee's state.

--- a/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -297,8 +297,16 @@
   __ bind(do_float);
   __ lfs(floatSlot, 0, arg_java);
 #if defined(LINUX)
+  // Linux uses ELF ABI. Both original ELF and ELFv2 ABIs have float
+  // in the least significant word of an argument slot.
+#if defined(VM_LITTLE_ENDIAN)
+  __ stfs(floatSlot, 0, arg_c);
+#else
   __ stfs(floatSlot, 4, arg_c);
+#endif
 #elif defined(AIX)
+  // Although AIX runs on big endian CPU, float is in most significant
+  // word of an argument slot.
   __ stfs(floatSlot, 0, arg_c);
 #else
 #error "unknown OS"

--- a/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -29,16 +29,16 @@
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_ppc.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   Unimplemented();
   return 0;
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
@@ -46,20 +46,20 @@
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   Unimplemented();
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   return NULL;
 }

--- a/hotspot/src/cpu/ppc/vm/ppc.ad	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad	Tue Nov 24 10:30:23 2015 +0100
@@ -2064,6 +2064,17 @@
   return true;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   return default_pressure_threshold;
 }
@@ -3416,7 +3427,7 @@
       // The stub for call to interpreter.
       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
       if (stub == NULL) {
-        ciEnv::current()->record_failure("CodeCache is full"); 
+        ciEnv::current()->record_failure("CodeCache is full");
         return;
       }
     }
@@ -3465,7 +3476,7 @@
     // The stub for call to interpreter.
     address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
     if (stub == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full"); 
+      ciEnv::current()->record_failure("CodeCache is full");
       return;
     }
 
@@ -6911,7 +6922,7 @@
     n_compare->_opnds[0] = op_crx;
     n_compare->_opnds[1] = op_src;
     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
-    
+
     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
     n2->add_req(n_region, n_src, n1);
     n2->_opnds[0] = op_dst;
@@ -10588,7 +10599,7 @@
 
 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
   // Needs matchrule, see cmpDUnordered.
-  match(Set crx (CmpF src1 src2)); 
+  match(Set crx (CmpF src1 src2));
   // no match-rule, false predicate
   predicate(false);
 
@@ -10697,13 +10708,13 @@
 %}
 
 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
-  // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the 
-  // node right before the conditional move using it. 
+  // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
+  // node right before the conditional move using it.
   // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
   // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
   // crashed in register allocation where the flags Reg between cmpDUnoredered and a
   // conditional move was supposed to be spilled.
-  match(Set crx (CmpD src1 src2)); 
+  match(Set crx (CmpD src1 src2));
   // False predicate, shall not be matched.
   predicate(false);

--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -753,6 +753,21 @@
     // in farg_reg[j] if argument i is the j-th float argument of this call.
     //
     case T_FLOAT:
+#if defined(LINUX)
+      // Linux uses ELF ABI. Both original ELF and ELFv2 ABIs have float
+      // in the least significant word of an argument slot.
+#if defined(VM_LITTLE_ENDIAN)
+#define FLOAT_WORD_OFFSET_IN_SLOT 0
+#else
+#define FLOAT_WORD_OFFSET_IN_SLOT 1
+#endif
+#elif defined(AIX)
+      // Although AIX runs on big endian CPU, float is in the most
+      // significant word of an argument slot.
+#define FLOAT_WORD_OFFSET_IN_SLOT 0
+#else
+#error "unknown OS"
+#endif
       if (freg < Argument::n_float_register_parameters_c) {
         // Put float in register ...
         reg = farg_reg[freg];
@@ -766,14 +781,14 @@
         if (arg >= Argument::n_regs_not_on_stack_c) {
           // ... and on the stack.
           guarantee(regs2 != NULL, "must pass float in register and stack slot");
-          VMReg reg2 = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
+          VMReg reg2 = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT);
           regs2[i].set1(reg2);
           stk += inc_stk_for_intfloat;
         }
 
       } else {
         // Put float on stack.
-        reg = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
+        reg = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT);
         stk += inc_stk_for_intfloat;
       }
       regs[i].set1(reg);
@@ -2802,7 +2817,7 @@
   __ set_last_Java_frame(R1_SP, noreg);
 
   // With EscapeAnalysis turned on, this call may safepoint!
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg);
   address calls_return_pc = __ last_calls_return_pc();
   // Set an oopmap for the call site that describes all our saved registers.
   oop_maps->add_gc_map(calls_return_pc - start, map);
@@ -2815,6 +2830,8 @@
   // by save_volatile_registers(...).
   RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes);
 
+  // reload the exec mode from the UnrollBlock (it might have changed)
+  __ lwz(exec_mode_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg);
   // In excp_deopt_mode, restore and clear exception oop which we
   // stored in the thread during exception entry above. The exception
   // oop will be the return value of this stub.
@@ -2945,8 +2962,9 @@
   __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
 
   __ mr(klass_index_reg, R3);
+  __ li(R5_ARG3, Deoptimization::Unpack_uncommon_trap);
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
-                  R16_thread, klass_index_reg);
+                  R16_thread, klass_index_reg, R5_ARG3);
 
   // Set an oopmap for the call site.
   oop_maps->add_gc_map(gc_map_pc - start, map);
@@ -2966,6 +2984,12 @@
 
   // stack: (caller_of_deoptee, ...).
 
+#ifdef ASSERT
+  __ lwz(R22_tmp2, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg);
+  __ cmpdi(CCR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap);
+  __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
+#endif
+
   // Allocate new interpreter frame(s) and possibly a c2i adapter
   // frame.
   push_skeleton_frames(masm, false/*deopt*/,

--- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -94,8 +94,10 @@
 void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
   __ set(_bci, G4);
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  __ set_metadata_constant(m, G5);
   __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type);
-  __ delayed()->mov_or_nop(_method->as_register(), G5);
+  __ delayed()->nop();
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);

--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -2812,7 +2812,23 @@
 }
 
 void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
-  fatal("CRC32 intrinsic is not implemented on this platform");
+  assert(op->crc()->is_single_cpu(),  "crc must be register");
+  assert(op->val()->is_single_cpu(),  "byte value must be register");
+  assert(op->result_opr()->is_single_cpu(), "result must be register");
+  Register crc = op->crc()->as_register();
+  Register val = op->val()->as_register();
+  Register table = op->result_opr()->as_register();
+  Register res   = op->result_opr()->as_register();
+
+  assert_different_registers(val, crc, table);
+
+  __ set(ExternalAddress(StubRoutines::crc_table_addr()), table);
+  __ not1(crc);
+  __ clruwu(crc);
+  __ update_byte_crc32(crc, val, table);
+  __ not1(crc);
+
+  __ mov(crc, res);
 }
 
 void LIR_Assembler::emit_lock(LIR_OpLock* op) {

--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -68,6 +68,7 @@
 
 LIR_Opr LIRGenerator::exceptionOopOpr()              { return FrameMap::Oexception_opr;  }
 LIR_Opr LIRGenerator::exceptionPcOpr()               { return FrameMap::Oissuing_pc_opr; }
+LIR_Opr LIRGenerator::syncLockOpr()                  { return new_register(T_INT); }
 LIR_Opr LIRGenerator::syncTempOpr()                  { return new_register(T_OBJECT); }
 LIR_Opr LIRGenerator::getThreadTemp()                { return rlock_callee_saved(NOT_LP64(T_INT) LP64_ONLY(T_LONG)); }
 
@@ -785,7 +786,86 @@
 }
 
 void LIRGenerator::do_update_CRC32(Intrinsic* x) {
-  fatal("CRC32 intrinsic is not implemented on this platform");
+  // Make all state_for calls early since they can emit code
+  LIR_Opr result = rlock_result(x);
+  int flags = 0;
+  switch (x->id()) {
+    case vmIntrinsics::_updateCRC32: {
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem val(x->argument_at(1), this);
+      // val is destroyed by update_crc32
+      val.set_destroys_register();
+      crc.load_item();
+      val.load_item();
+      __ update_crc32(crc.result(), val.result(), result);
+      break;
+    }
+    case vmIntrinsics::_updateBytesCRC32:
+    case vmIntrinsics::_updateByteBufferCRC32: {
+
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem len(x->argument_at(3), this);
+
+      buf.load_item();
+      off.load_nonconstant();
+
+      LIR_Opr index = off.result();
+      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+      if(off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+        offset += off.result()->as_jint();
+      }
+
+      LIR_Opr base_op = buf.result();
+
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+        if (index->is_constant()) {
+          offset += index->as_constant_ptr()->as_jint();
+          index = LIR_OprFact::illegalOpr;
+        } else if (index->is_register()) {
+          LIR_Opr tmp2 = new_register(T_LONG);
+          LIR_Opr tmp3 = new_register(T_LONG);
+          __ move(base_op, tmp2);
+          __ move(index, tmp3);
+          __ add(tmp2, tmp3, tmp2);
+          base_op = tmp2;
+        } else {
+          ShouldNotReachHere();
+        }
+      }
+
+      LIR_Address* a = new LIR_Address(base_op, offset, T_BYTE);
+
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for(x->type());
+
+      LIR_Opr addr = new_pointer_register();
+      __ leal(LIR_OprFact::address(a), addr);
+
+      crc.load_item_force(cc->at(0));
+      __ move(addr, cc->at(1));
+      len.load_item_force(cc->at(2));
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
+      __ move(result_reg, result);
+
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
 }
 
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f

--- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -65,6 +65,7 @@
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoScheduling,               true);
 define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
 
 #ifdef _LP64
 // We need to make sure that all generated code is within

--- a/hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -43,8 +43,9 @@
   void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
   void generate_counter_overflow(Label& Lcontinue);
 
+  address generate_CRC32_update_entry();
+  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+
   // Not supported
-  address generate_CRC32_update_entry() { return NULL; }
-  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
   address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
 #endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP

--- a/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -29,7 +29,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_sparc.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   if (inst->is_call() || inst->is_jump()) {
     return pc_offset + NativeCall::instruction_size;
   } else if (inst->is_call_reg()) {
@@ -37,12 +37,12 @@
   } else if (inst->is_sethi()) {
     return pc_offset + NativeFarCall::instruction_size;
   } else {
-    fatal("unsupported type of instruction for call site");
+    JVMCI_ERROR_0("unsupported type of instruction for call site");
     return 0;
   }
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   Handle obj = HotSpotObjectConstantImpl::object(constant);
   jobject value = JNIHandles::make_local(obj());
@@ -52,7 +52,7 @@
     RelocationHolder rspec = oop_Relocation::spec(oop_index);
     _instructions->relocate(pc, rspec, 1);
 #else
-    fatal("compressed oop on 32bit");
+    JVMCI_ERROR("compressed oop on 32bit");
 #endif
   } else {
     NativeMovConstReg* move = nativeMovConstReg_at(pc);
@@ -66,20 +66,20 @@
   }
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
 #ifdef _LP64
     NativeMovConstReg32* move = nativeMovConstReg32_at(pc);
-    narrowKlass narrowOop = record_narrow_metadata_reference(constant);
+    narrowKlass narrowOop = record_narrow_metadata_reference(constant, CHECK);
     move->set_data((intptr_t)narrowOop);
     TRACE_jvmci_3("relocating (narrow metaspace constant) at %p/%p", pc, narrowOop);
 #else
-    fatal("compressed Klass* on 32bit");
+    JVMCI_ERROR("compressed Klass* on 32bit");
 #endif
   } else {
     NativeMovConstReg* move = nativeMovConstReg_at(pc);
-    Metadata* reference = record_metadata_reference(constant);
+    Metadata* reference = record_metadata_reference(constant, CHECK);
     move->set_data((intptr_t)reference);
     TRACE_jvmci_3("relocating (metaspace constant) at %p/%p", pc, reference);
   }
@@ -106,7 +106,7 @@
   }
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   address pc = (address) inst;
   if (inst->is_call()) {
     NativeCall* call = nativeCall_at(pc);
@@ -117,17 +117,17 @@
     jump->set_jump_destination((address) foreign_call_destination);
     _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
   } else {
-    fatal(err_msg("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)));
+    JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc));
   }
   TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
 #ifdef ASSERT
   Method* method = NULL;
   // we need to check, this might also be an unresolved method
   if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
-    method = getMethodFromHotSpotMethod(hotspot_method);
+    method = getMethodFromHotSpotMethod(hotspot_method());
   }
 #endif
   switch (_next_call_type) {
@@ -156,33 +156,33 @@
       break;
     }
     default:
-      fatal("invalid _next_call_type value");
+      JVMCI_ERROR("invalid _next_call_type value");
       break;
   }
 }
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   switch (mark) {
     case POLL_NEAR:
-      fatal("unimplemented");
+      JVMCI_ERROR("unimplemented");
       break;
     case POLL_FAR:
       _instructions->relocate(pc, relocInfo::poll_type);
       break;
     case POLL_RETURN_NEAR:
-      fatal("unimplemented");
+      JVMCI_ERROR("unimplemented");
       break;
     case POLL_RETURN_FAR:
       _instructions->relocate(pc, relocInfo::poll_return_type);
       break;
     default:
-      fatal("invalid mark value");
+      JVMCI_ERROR("invalid mark value");
       break;
   }
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   // JVMCI Registers are numbered as follows:
   //   0..31: Thirty-two General Purpose registers (CPU Registers)
   //   32..63: Thirty-two single precision float registers
@@ -199,7 +199,7 @@
     } else if(jvmci_reg < 112) {
       floatRegisterNumber = 4 * (jvmci_reg - 96);
     } else {
-      fatal("Unknown jvmci register");
+      JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
     }
     return as_FloatRegister(floatRegisterNumber)->as_VMReg();
   }

--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -4771,3 +4771,243 @@
   movdtox(src, tmp1);
   reverse_bytes_32(tmp1, dst, tmp2);
 }
+
+void MacroAssembler::fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset) {
+  xmulx(xcrc_hi, xK_hi, xtmp_lo);
+  xmulxhi(xcrc_hi, xK_hi, xtmp_hi);
+  xmulxhi(xcrc_lo, xK_lo, xcrc_hi);
+  xmulx(xcrc_lo, xK_lo, xcrc_lo);
+  xor3(xcrc_lo, xtmp_lo, xcrc_lo);
+  xor3(xcrc_hi, xtmp_hi, xcrc_hi);
+  ldxl(buf, G0, xtmp_lo);
+  inc(buf, 8);
+  ldxl(buf, G0, xtmp_hi);
+  inc(buf, 8);
+  xor3(xcrc_lo, xtmp_lo, xcrc_lo);
+  xor3(xcrc_hi, xtmp_hi, xcrc_hi);
+}
+
+void MacroAssembler::fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register xbuf_hi, Register xbuf_lo) {
+  mov(xcrc_lo, xtmp_lo);
+  mov(xcrc_hi, xtmp_hi);
+  xmulx(xtmp_hi, xK_hi, xtmp_lo);
+  xmulxhi(xtmp_hi, xK_hi, xtmp_hi);
+  xmulxhi(xcrc_lo, xK_lo, xcrc_hi);
+  xmulx(xcrc_lo, xK_lo, xcrc_lo);
+  xor3(xcrc_lo, xbuf_lo, xcrc_lo);
+  xor3(xcrc_hi, xbuf_hi, xcrc_hi);
+  xor3(xcrc_lo, xtmp_lo, xcrc_lo);
+  xor3(xcrc_hi, xtmp_hi, xcrc_hi);
+}
+
+void MacroAssembler::fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp) {
+  and3(xcrc, 0xFF, tmp);
+  sllx(tmp, 2, tmp);
+  lduw(table, tmp, xtmp);
+  srlx(xcrc, 8, xcrc);
+  xor3(xtmp, xcrc, xcrc);
+}
+
+void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
+  and3(crc, 0xFF, tmp);
+  srlx(crc, 8, crc);
+  sllx(tmp, 2, tmp);
+  lduw(table, tmp, tmp);
+  xor3(tmp, crc, crc);
+}
+
+#define CRC32_TMP_REG_NUM 18
+
+#define CRC32_CONST_64  0x163cd6124
+#define CRC32_CONST_96  0x0ccaa009e
+#define CRC32_CONST_160 0x1751997d0
+#define CRC32_CONST_480 0x1c6e41596
+#define CRC32_CONST_544 0x154442bd4
+
+void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table) {
+
+  Label L_cleanup_loop, L_cleanup_check, L_align_loop, L_align_check;
+  Label L_main_loop_prologue;
+  Label L_fold_512b, L_fold_512b_loop, L_fold_128b;
+  Label L_fold_tail, L_fold_tail_loop;
+  Label L_8byte_fold_loop, L_8byte_fold_check;
+
+  const Register tmp[CRC32_TMP_REG_NUM] = {L0, L1, L2, L3, L4, L5, L6, G1, I0, I1, I2, I3, I4, I5, I7, O4, O5, G3};
+
+  Register const_64  = tmp[CRC32_TMP_REG_NUM-1];
+  Register const_96  = tmp[CRC32_TMP_REG_NUM-1];
+  Register const_160 = tmp[CRC32_TMP_REG_NUM-2];
+  Register const_480 = tmp[CRC32_TMP_REG_NUM-1];
+  Register const_544 = tmp[CRC32_TMP_REG_NUM-2];
+
+  set(ExternalAddress(StubRoutines::crc_table_addr()), table);
+
+  not1(crc); // ~c
+  clruwu(crc); // clear upper 32 bits of crc
+
+  // Check if below cutoff, proceed directly to cleanup code
+  mov(31, G4);
+  cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check);
+
+  // Align buffer to 8 byte boundry
+  mov(8, O5);
+  and3(buf, 0x7, O4);
+  sub(O5, O4, O5);
+  and3(O5, 0x7, O5);
+  sub(len, O5, len);
+  ba(L_align_check);
+  delayed()->nop();
+
+  // Alignment loop, table look up method for up to 7 bytes
+  bind(L_align_loop);
+  ldub(buf, 0, O4);
+  inc(buf);
+  dec(O5);
+  xor3(O4, crc, O4);
+  and3(O4, 0xFF, O4);
+  sllx(O4, 2, O4);
+  lduw(table, O4, O4);
+  srlx(crc, 8, crc);
+  xor3(O4, crc, crc);
+  bind(L_align_check);
+  nop();
+  cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_align_loop);
+
+  // Aligned on 64-bit (8-byte) boundry at this point
+  // Check if still above cutoff (31-bytes)
+  mov(31, G4);
+  cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check);
+  // At least 32 bytes left to process
+
+  // Free up registers by storing them to FP registers
+  for (int i = 0; i < CRC32_TMP_REG_NUM; i++) {
+    movxtod(tmp[i], as_FloatRegister(2*i));
+  }
+
+  // Determine which loop to enter
+  // Shared prologue
+  ldxl(buf, G0, tmp[0]);
+  inc(buf, 8);
+  ldxl(buf, G0, tmp[1]);
+  inc(buf, 8);
+  xor3(tmp[0], crc, tmp[0]); // Fold CRC into first few bytes
+  and3(crc, 0, crc); // Clear out the crc register
+  // Main loop needs 128-bytes at least
+  mov(128, G4);
+  mov(64, tmp[2]);
+  cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_main_loop_prologue);
+  // Less than 64 bytes
+  nop();
+  cmp_and_br_short(len, tmp[2], Assembler::lessUnsigned, Assembler::pt, L_fold_tail);
+  // Between 64 and 127 bytes
+  set64(CRC32_CONST_96,  const_96,  tmp[8]);
+  set64(CRC32_CONST_160, const_160, tmp[9]);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[2], tmp[3], buf, 0);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[4], tmp[5], buf, 16);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[6], tmp[7], buf, 32);
+  dec(len, 48);
+  ba(L_fold_tail);
+  delayed()->nop();
+
+  bind(L_main_loop_prologue);
+  for (int i = 2; i < 8; i++) {
+    ldxl(buf, G0, tmp[i]);
+    inc(buf, 8);
+  }
+
+  // Fold total 512 bits of polynomial on each iteration,
+  // 128 bits per each of 4 parallel streams
+  set64(CRC32_CONST_480, const_480, tmp[8]);
+  set64(CRC32_CONST_544, const_544, tmp[9]);
+
+  mov(128, G4);
+  bind(L_fold_512b_loop);
+  fold_128bit_crc32(tmp[1], tmp[0], const_480, const_544, tmp[9],  tmp[8],  buf,  0);
+  fold_128bit_crc32(tmp[3], tmp[2], const_480, const_544, tmp[11], tmp[10], buf, 16);
+  fold_128bit_crc32(tmp[5], tmp[4], const_480, const_544, tmp[13], tmp[12], buf, 32);
+  fold_128bit_crc32(tmp[7], tmp[6], const_480, const_544, tmp[15], tmp[14], buf, 64);
+  dec(len, 64);
+  cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_fold_512b_loop);
+
+  // Fold 512 bits to 128 bits
+  bind(L_fold_512b);
+  set64(CRC32_CONST_96,  const_96,  tmp[8]);
+  set64(CRC32_CONST_160, const_160, tmp[9]);
+
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[3], tmp[2]);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[5], tmp[4]);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[7], tmp[6]);
+  dec(len, 48);
+
+  // Fold the rest of 128 bits data chunks
+  bind(L_fold_tail);
+  mov(32, G4);
+  cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_fold_128b);
+
+  set64(CRC32_CONST_96,  const_96,  tmp[8]);
+  set64(CRC32_CONST_160, const_160, tmp[9]);
+
+  bind(L_fold_tail_loop);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[2], tmp[3], buf, 0);
+  sub(len, 16, len);
+  cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_fold_tail_loop);
+
+  // Fold the 128 bits in tmps 0 - 1 into tmp 1
+  bind(L_fold_128b);
+
+  set64(CRC32_CONST_64, const_64, tmp[4]);
+
+  xmulx(const_64, tmp[0], tmp[2]);
+  xmulxhi(const_64, tmp[0], tmp[3]);
+
+  srl(tmp[2], G0, tmp[4]);
+  xmulx(const_64, tmp[4], tmp[4]);
+
+  srlx(tmp[2], 32, tmp[2]);
+  sllx(tmp[3], 32, tmp[3]);
+  or3(tmp[2], tmp[3], tmp[2]);
+
+  xor3(tmp[4], tmp[1], tmp[4]);
+  xor3(tmp[4], tmp[2], tmp[1]);
+  dec(len, 8);
+
+  // Use table lookup for the 8 bytes left in tmp[1]
+  dec(len, 8);
+
+  // 8 8-bit folds to compute 32-bit CRC.
+  for (int j = 0; j < 4; j++) {
+    fold_8bit_crc32(tmp[1], table, tmp[2], tmp[3]);
+  }
+  srl(tmp[1], G0, crc); // move 32 bits to general register
+  for (int j = 0; j < 4; j++) {
+    fold_8bit_crc32(crc, table, tmp[3]);
+  }
+
+  bind(L_8byte_fold_check);
+
+  // Restore int registers saved in FP registers
+  for (int i = 0; i < CRC32_TMP_REG_NUM; i++) {
+    movdtox(as_FloatRegister(2*i), tmp[i]);
+  }
+
+  ba(L_cleanup_check);
+  delayed()->nop();
+
+  // Table look-up method for the remaining few bytes
+  bind(L_cleanup_loop);
+  ldub(buf, 0, O4);
+  inc(buf);
+  dec(len);
+  xor3(O4, crc, O4);
+  and3(O4, 0xFF, O4);
+  sllx(O4, 2, O4);
+  lduw(table, O4, O4);
+  srlx(crc, 8, crc);
+  xor3(O4, crc, crc);
+  bind(L_cleanup_check);
+  nop();
+  cmp_and_br_short(len, 0, Assembler::greaterUnsigned, Assembler::pt, L_cleanup_loop);
+
+  not1(crc);
+}
+

--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -904,7 +904,9 @@
   inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0);
 
   // little-endian
-  inline void ldxl(Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); }
+  inline void lduwl(Register s1, Register s2, Register d) { lduwa(s1, s2, ASI_PRIMARY_LITTLE, d); }
+  inline void ldswl(Register s1, Register s2, Register d) { ldswa(s1, s2, ASI_PRIMARY_LITTLE, d);}
+  inline void ldxl( Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); }
   inline void ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { ldfa(w, s1, s2, ASI_PRIMARY_LITTLE, d); }
 
   // membar psuedo instruction.  takes into account target memory model.
@@ -1469,6 +1471,15 @@
   void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2);
   void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2);
 
+  // CRC32 code for java.util.zip.CRC32::updateBytes0() instrinsic.
+  void kernel_crc32(Register crc, Register buf, Register len, Register table);
+  // Fold 128-bit data chunk
+  void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset);
+  void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register xbuf_hi, Register xbuf_lo);
+  // Fold 8-bit data
+  void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp);
+  void fold_8bit_crc32(Register crc, Register table, Register tmp);
+
 #undef VIRTUAL
 };

--- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -3036,6 +3036,7 @@
 
     __ mov((int32_t)Deoptimization::Unpack_reexecute, L0deopt_mode);
     __ mov(G2_thread, O0);
+    __ mov(L0deopt_mode, O2);
     __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
     __ delayed()->nop();
     oop_maps->add_gc_map( __ offset()-start, map->deep_copy());
@@ -3121,6 +3122,7 @@
   // do the call by hand so we can get the oopmap
 
   __ mov(G2_thread, L7_thread_cache);
+  __ mov(L0deopt_mode, O1);
   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
   __ delayed()->mov(G2_thread, O0);
 
@@ -3146,6 +3148,7 @@
 
   RegisterSaver::restore_result_registers(masm);
 
+  __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), G4deopt_mode);
   Label noException;
   __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException);
 
@@ -3269,7 +3272,8 @@
   __ save_frame(0);
   __ set_last_Java_frame(SP, noreg);
   __ mov(I0, O2klass_index);
-  __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index);
+  __ mov(Deoptimization::Unpack_uncommon_trap, O3); // exec mode
+  __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index, O3);
   __ reset_last_Java_frame();
   __ mov(O0, O2UnrollBlock->after_save());
   __ restore();
@@ -3278,6 +3282,15 @@
   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
   __ restore();
 
+#ifdef ASSERT
+  { Label L;
+    __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), O1);
+    __ cmp_and_br_short(O1, Deoptimization::Unpack_uncommon_trap, Assembler::equal, Assembler::pt, L);
+    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
+    __ bind(L);
+  }
+#endif
+
   // Allocate new interpreter frame(s) and possible c2i adapter frame
 
   make_new_frames(masm, false);

--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Tue Nov 24 10:30:23 2015 +0100
@@ -1860,6 +1860,17 @@
   return true;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   return default_pressure_threshold;
 }
@@ -1905,7 +1916,7 @@
 }
 
 // Current (2013) SPARC platforms need to read original key
-// to construct decryption expanded key 
+// to construct decryption expanded key
 const bool Matcher::pass_original_key_for_aes() {
   return true;
 }
@@ -2612,7 +2623,7 @@
       if (stub == NULL && !(TraceJumps && Compile::current()->in_scratch_emit_size())) {
         ciEnv::current()->record_failure("CodeCache is full");
         return;
-      } 
+      }
     }
   %}
 
@@ -3132,10 +3143,10 @@
 // AVOID_NONE   - instruction can be placed anywhere
 // AVOID_BEFORE - instruction cannot be placed after an
 //                instruction with MachNode::AVOID_AFTER
-// AVOID_AFTER  - the next instruction cannot be the one 
+// AVOID_AFTER  - the next instruction cannot be the one
 //                with MachNode::AVOID_BEFORE
-// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at 
-//                          the same time                                
+// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at
+//                          the same time
 ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE);
 
 ins_attrib ins_short_branch(0);    // Required flag: is this instruction a

--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -5292,6 +5292,38 @@
     return start;
   }
 
+/**
+   *  Arguments:
+   *
+   * Inputs:
+   *   O0   - int   crc
+   *   O1   - byte* buf
+   *   O2   - int   len
+   *   O3   - int*  table
+   *
+   * Output:
+   *   O0   - int crc result
+   */
+  address generate_updateBytesCRC32() {
+    assert(UseCRC32Intrinsics, "need VIS3 instructions");
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
+    address start = __ pc();
+
+    const Register crc   = O0; // crc
+    const Register buf   = O1; // source java byte array address
+    const Register len   = O2; // length
+    const Register table = O3; // crc_table address (reuse register)
+
+    __ kernel_crc32(crc, buf, len, table);
+
+    __ retl();
+    __ delayed()->nop();
+
+    return start;
+  }
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points
 
@@ -5324,6 +5356,12 @@
 
     // Build this early so it's available for the interpreter.
     StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
+
+    if (UseCRC32Intrinsics) {
+      // set table address before stub generation which use it
+      StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
+      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
+    }
   }

--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -52,3 +52,98 @@
 address StubRoutines::Sparc::_flush_callers_register_windows_entry = CAST_FROM_FN_PTR(address, bootstrap_flush_windows);
 
 address StubRoutines::Sparc::_partial_subtype_check = NULL;
+
+uint64_t StubRoutines::Sparc::_crc_by128_masks[] =
+{
+  /* The fields in this structure are arranged so that they can be
+   * picked up two at a time with 128-bit loads.
+   *
+   * Because of flipped bit order for this CRC polynomials
+   * the constant for X**N is left-shifted by 1.  This is because
+   * a 64 x 64 polynomial multiply produces a 127-bit result
+   * but the highest term is always aligned to bit 0 in the container.
+   * Pre-shifting by one fixes this, at the cost of potentially making
+   * the 32-bit constant no longer fit in a 32-bit container (thus the
+   * use of uint64_t, though this is also the size used by the carry-
+   * less multiply instruction.
+   *
+   * In addition, the flipped bit order and highest-term-at-least-bit
+   * multiply changes the constants used.  The 96-bit result will be
+   * aligned to the high-term end of the target 128-bit container,
+   * not the low-term end; that is, instead of a 512-bit or 576-bit fold,
+   * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold.
+   *
+   * This cause additional problems in the 128-to-64-bit reduction; see the
+   * code for details.  By storing a mask in the otherwise unused half of
+   * a 128-bit constant, bits can be cleared before multiplication without
+   * storing and reloading.  Note that staying on a 128-bit datapath means
+   * that some data is uselessly stored and some unused data is intersected
+   * with an irrelevant constant.
+   */
+
+  ((uint64_t) 0xffffffffUL),     /* low  of K_M_64    */
+  ((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64    */
+  ((uint64_t) 0xba8ccbe8U << 1), /* low  of K_160_96  */
+  ((uint64_t) 0x6655004fU << 1), /* high of K_160_96  */
+  ((uint64_t) 0xaa2215eaU << 1), /* low  of K_544_480 */
+  ((uint64_t) 0xe3720acbU << 1)  /* high of K_544_480 */
+};
+
+/**
+ *  crc_table[] from jdk/src/java.base/share/native/libzip/zlib-1.2.8/crc32.h
+ */
+juint StubRoutines::Sparc::_crc_table[] =
+{
+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+    0x2d02ef8dUL
+};

--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -53,6 +53,9 @@
   static address _flush_callers_register_windows_entry;
 
   static address _partial_subtype_check;
+  // masks and table for CRC32
+  static uint64_t _crc_by128_masks[];
+  static juint    _crc_table[];
 
  public:
   // test assembler stop routine by setting registers
@@ -65,6 +68,8 @@
   static intptr_t* (*flush_callers_register_windows_func())() { return CAST_TO_FN_PTR(intptr_t* (*)(void), _flush_callers_register_windows_entry); }
 
   static address partial_subtype_check()                  { return _partial_subtype_check; }
+
+  static address crc_by128_masks_addr()  { return (address)_crc_by128_masks; }
 };
 
 #endif // CPU_SPARC_VM_STUBROUTINES_SPARC_HPP

--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -803,6 +803,106 @@
   return NULL;
 }
 
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.update(int crc, int b)
+ */
+address InterpreterGenerator::generate_CRC32_update_entry() {
+
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    Label L_slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2);
+    __ set(SafepointSynchronize::_not_synchronized, O3);
+    __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path);
+
+    // Load parameters
+    const Register crc   = O0; // initial crc
+    const Register val   = O1; // byte to update with
+    const Register table = O2; // address of 256-entry lookup table
+
+    __ ldub(Gargs, 3, val);
+    __ lduw(Gargs, 8, crc);
+
+    __ set(ExternalAddress(StubRoutines::crc_table_addr()), table);
+
+    __ not1(crc); // ~crc
+    __ clruwu(crc);
+    __ update_byte_crc32(crc, val, table);
+    __ not1(crc); // ~crc
+
+    // result in O0
+    __ retl();
+    __ delayed()->nop();
+
+    // generate a vanilla native entry as the slow path
+    __ bind(L_slow_path);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
+    return entry;
+  }
+  return NULL;
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    Label L_slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2);
+    __ set(SafepointSynchronize::_not_synchronized, O3);
+    __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path);
+
+    // Load parameters from the stack
+    const Register crc    = O0; // initial crc
+    const Register buf    = O1; // source java byte array address
+    const Register len    = O2; // len
+    const Register offset = O3; // offset
+
+    // Arguments are reversed on java expression stack
+    // Calculate address of start element
+    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
+      __ lduw(Gargs, 0,  len);
+      __ lduw(Gargs, 8,  offset);
+      __ ldx( Gargs, 16, buf);
+      __ lduw(Gargs, 32, crc);
+      __ add(buf, offset, buf);
+    } else {
+      __ lduw(Gargs, 0,  len);
+      __ lduw(Gargs, 8,  offset);
+      __ ldx( Gargs, 16, buf);
+      __ lduw(Gargs, 24, crc);
+      __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size
+      __ add(buf ,offset, buf);
+    }
+
+    // Call the crc32 kernel
+    __ MacroAssembler::save_thread(L7_thread_cache);
+    __ kernel_crc32(crc, buf, len, O3);
+    __ MacroAssembler::restore_thread(L7_thread_cache);
+
+    // result in O0
+    __ retl();
+    __ delayed()->nop();
+
+    // generate a vanilla native entry as the slow path
+    __ bind(L_slow_path);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
+    return entry;
+  }
+  return NULL;
+}
+
 //
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the native method

--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -229,35 +229,35 @@
 
   // SPARC T4 and above should have support for AES instructions
   if (has_aes()) {
-    if (UseVIS > 2) { // AES intrinsics use MOVxTOd/MOVdTOx which are VIS3
-      if (FLAG_IS_DEFAULT(UseAES)) {
-        FLAG_SET_DEFAULT(UseAES, true);
+    if (FLAG_IS_DEFAULT(UseAES)) {
+      FLAG_SET_DEFAULT(UseAES, true);
+    }
+    if (!UseAES) {
+      if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+        warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
       }
-      if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
-        FLAG_SET_DEFAULT(UseAESIntrinsics, true);
-      }
-      // we disable both the AES flags if either of them is disabled on the command line
-      if (!UseAES || !UseAESIntrinsics) {
-        FLAG_SET_DEFAULT(UseAES, false);
+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+    } else {
+      // The AES intrinsic stubs require AES instruction support (of course)
+      // but also require VIS3 mode or higher for instructions it use.
+      if (UseVIS > 2) {
+        if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+          FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+        }
+      } else {
+        if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+          warning("SPARC AES intrinsics require VIS3 instructions. Intrinsics will be disabled.");
+        }
         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
       }
-    } else {
-        if (UseAES || UseAESIntrinsics) {
-          warning("SPARC AES intrinsics require VIS3 instruction support. Intrinsics will be disabled.");
-          if (UseAES) {
-            FLAG_SET_DEFAULT(UseAES, false);
-          }
-          if (UseAESIntrinsics) {
-            FLAG_SET_DEFAULT(UseAESIntrinsics, false);
-          }
-        }
     }
   } else if (UseAES || UseAESIntrinsics) {
-    warning("AES instructions are not available on this CPU");
-    if (UseAES) {
+    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
+      warning("AES instructions are not available on this CPU");
       FLAG_SET_DEFAULT(UseAES, false);
     }
-    if (UseAESIntrinsics) {
+    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+      warning("AES intrinsics are not available on this CPU");
       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
     }
   }
@@ -347,6 +347,15 @@
     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
   }
 
+  if (UseVIS > 2) {
+    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
+      FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
+    }
+  } else if (UseCRC32Intrinsics) {
+    warning("SPARC CRC32 intrinsics require VIS3 insructions support. Intriniscs will be disabled");
+    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+  }
+
   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
     (cache_line_size > ContendedPaddingWidth))
     ContendedPaddingWidth = cache_line_size;
@@ -358,7 +367,6 @@
     FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
   }
 
-#ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
     tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
     tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size());
@@ -391,7 +399,6 @@
       tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
     }
   }
-#endif // PRODUCT
 }
 
 void VM_Version::print_features() {
@@ -400,7 +407,7 @@
 
 int VM_Version::determine_features() {
   if (UseV8InstrsOnly) {
-    NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-V8");)
+    if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-V8"); }
     return generic_v8_m;
   }
 
@@ -416,12 +423,12 @@
     if (is_T_family(features)) {
       // Happy to accomodate...
     } else {
-      NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Niagara");)
+      if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-Niagara"); }
       features |= T_family_m;
     }
   } else {
     if (is_T_family(features) && !FLAG_IS_DEFAULT(UseNiagaraInstrs)) {
-      NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Not-Niagara");)
+      if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-Not-Niagara"); }
       features &= ~(T_family_m | T1_model_m);
     } else {
       // Happy to accomodate...

--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -313,7 +313,7 @@
     switch (cur_tuple_type) {
     case EVEX_FV:
       if ((cur_encoding & VEX_W) == VEX_W) {
-        mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+        mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
       } else {
         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
       }
@@ -394,25 +394,27 @@
   int mod_idx = 0;
   // We will test if the displacement fits the compressed format and if so
   // apply the compression to the displacment iff the result is8bit.
-  if (VM_Version::supports_evex() && _is_evex_instruction) {
-    switch (_tuple_type) {
+  if (VM_Version::supports_evex() && (_attributes != NULL) && _attributes->is_evex_instruction()) {
+    int evex_encoding = _attributes->get_evex_encoding();
+    int tuple_type = _attributes->get_tuple_type();
+    switch (tuple_type) {
     case EVEX_FV:
-      if ((_evex_encoding & VEX_W) == VEX_W) {
-        mod_idx += 2 + ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      if ((evex_encoding & VEX_W) == VEX_W) {
+        mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
       } else {
-        mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+        mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
       }
       break;
 
     case EVEX_HV:
-      mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
       break;
 
     case EVEX_FVM:
       break;
 
     case EVEX_T1S:
-      switch (_input_size_in_bits) {
+      switch (_attributes->get_input_size()) {
       case EVEX_8bit:
         break;
 
@@ -433,7 +435,7 @@
     case EVEX_T1F:
     case EVEX_T2:
     case EVEX_T4:
-      mod_idx = (_input_size_in_bits == EVEX_64bit) ? 1 : 0;
+      mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
       break;
 
     case EVEX_T8:
@@ -459,8 +461,9 @@
       break;
     }
 
-    if (_avx_vector_len >= AVX_128bit && _avx_vector_len <= AVX_512bit) {
-      int disp_factor = tuple_table[_tuple_type + mod_idx][_avx_vector_len];
+    int vector_len = _attributes->get_vector_len();
+    if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
+      int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
       if ((disp % disp_factor) == 0) {
         int new_disp = disp / disp_factor;
         if (is8bit(new_disp)) {
@@ -591,7 +594,6 @@
       emit_data(disp, rspec, disp32_operand);
     }
   }
-  _is_evex_instruction = false;
 }
 
 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
@@ -770,7 +772,7 @@
     case 0x55: // andnps
     case 0x56: // orps
     case 0x57: // xorps
-    case 0x59: //mulpd
+    case 0x59: // mulpd
     case 0x6E: // movd
     case 0x7E: // movd
     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
@@ -1234,51 +1236,53 @@
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::addsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::addss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::addss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::aesdec(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDE);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDE);
   emit_int8(0xC0 | encode);
 }
@@ -1286,16 +1290,16 @@
 void Assembler::aesdeclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDF);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDF);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1303,16 +1307,16 @@
 void Assembler::aesenc(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDC);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDC);
   emit_int8(0xC0 | encode);
 }
@@ -1320,16 +1324,16 @@
 void Assembler::aesenclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDD);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDD);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1361,15 +1365,17 @@
 
 void Assembler::andnl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andnl(Register dst, Register src1, Address src2) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(dst, src1, src2);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2);
 }
@@ -1396,45 +1402,51 @@
 
 void Assembler::blsil(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsil(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(rbx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src);
 }
 
 void Assembler::blsmskl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsmskl(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(rdx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rdx, src);
 }
 
 void Assembler::blsrl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsrl(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(rcx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src);
 }
@@ -1581,36 +1593,38 @@
   // NOTE: dbx seems to decode this as comiss even though the
   // 0x66 is there. Strangly ucomisd comes out correct
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_operand(dst, src);
 }
 
 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::comiss(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_operand(dst, src);
 }
 
 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cpuid() {
@@ -1699,100 +1713,113 @@
 
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE6);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5B);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1F;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VM_Version::supports_evex());
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-    emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2A);
+  emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2A);
+  emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_operand(dst, src);
 }
 
 
 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1807,36 +1834,38 @@
 
 void Assembler::divsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::divss(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::divss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::emms() {
@@ -2082,36 +2111,26 @@
 
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512novl()) {
-    int vector_len = AVX_512bit;
-    int dst_enc = dst->encoding();
-    int src_enc = src->encoding();
-    int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F,
-                                       /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
-    emit_int8(0x28);
-    emit_int8((unsigned char)(0xC0 | encode));
-  } else if (VM_Version::supports_evex()) {
-    emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
-  }
+  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x28);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_avx512novl()) {
-    int vector_len = AVX_512bit;
-    int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, vector_len);
-    emit_int8(0x28);
-    emit_int8((unsigned char)(0xC0 | encode));
-  } else {
-    emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
-  }
+  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x28);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -2125,39 +2144,53 @@
 }
 
 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false, VEX_OPCODE_0F,
-                                      /* rex_w */ VM_Version::supports_evex(), AVX_128bit, /* legacy_mode */ false);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x12);
   emit_int8(0xC0 | encode);
-
+}
+
+void Assembler::kmovwl(KRegister dst, Register src) {
+  NOT_LP64(assert(VM_Version::supports_evex(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x92);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::kmovdl(KRegister dst, Register src) {
+  NOT_LP64(assert(VM_Version::supports_evex(), ""));
+  VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x92);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::kmovql(KRegister dst, KRegister src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
-                                      /* no_mask_reg */ true, VEX_OPCODE_0F, /* rex_w */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::kmovql(KRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int dst_enc = dst->encoding();
-  int nds_enc = 0;
-  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE,
-             VEX_OPCODE_0F, /* vex_w */  true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_operand((Register)dst, src);
 }
 
 void Assembler::kmovql(Address dst, KRegister src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int src_enc = src->encoding();
-  int nds_enc = 0;
-  vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE,
-             VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_operand((Register)src, dst);
 }
@@ -2165,24 +2198,45 @@
 void Assembler::kmovql(KRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
   VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* legacy_mode */ !_legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_bw, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x92);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::kmovdl(KRegister dst, Register src) {
+// This instruction produces ZF or CF flags
+void Assembler::kortestbl(KRegister src1, KRegister src2) {
+  NOT_LP64(assert(VM_Version::supports_avx512dq(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x98);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// This instruction produces ZF or CF flags
+void Assembler::kortestwl(KRegister src1, KRegister src2) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true);
-  emit_int8((unsigned char)0x92);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::kmovwl(KRegister dst, Register src) {
-  NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
-  emit_int8((unsigned char)0x92);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x98);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// This instruction produces ZF or CF flags
+void Assembler::kortestdl(KRegister src1, KRegister src2) {
+  NOT_LP64(assert(VM_Version::supports_avx512bw(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x98);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// This instruction produces ZF or CF flags
+void Assembler::kortestql(KRegister src1, KRegister src2) {
+  NOT_LP64(assert(VM_Version::supports_avx512bw(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x98);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
@@ -2205,190 +2259,231 @@
 
 void Assembler::movdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6E);
   emit_operand(dst, src);
 }
 
 void Assembler::movdl(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_operand(src, dst);
 }
 
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
+  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdqa(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
 }
 
 void Assembler::movdqu(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
 }
 
 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdqu(Address dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
 // Move Unaligned 256bit Vector
 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmovdqu(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  int vector_len = AVX_256bit;
-  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::vmovdqu(Address dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  int vector_len = AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   // swap src<->dst for encoding
   assert(src != xnoreg, "sanity");
-  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
+void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
+}
+
+void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  assert(src != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x7F);
+  emit_operand(src, dst);
+}
+
+void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
+}
+
+void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  assert(src != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x7F);
+  emit_operand(src, dst);
+}
 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(src != xnoreg, "sanity");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  // swap src<->dst for encoding
-  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 2, "");
-  InstructionMark im(this);
-  _tuple_type = EVEX_FVM;
-  vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 2, "");
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(src != xnoreg, "sanity");
-  _tuple_type = EVEX_FVM;
-  // swap src<->dst for encoding
-  vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
@@ -2434,13 +2529,12 @@
 // The selection is done in MacroAssembler::movdbl() and movflt().
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-    emit_simd_arith_q(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x12);
+  emit_operand(dst, src);
 }
 
 void Assembler::movq( MMXRegister dst, Address src ) {
@@ -2466,13 +2560,9 @@
 void Assembler::movq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, /* no_mask_reg */ true);
-  } else {
-    simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_operand(dst, src);
 }
@@ -2480,14 +2570,9 @@
 void Assembler::movq(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    simd_prefix(src, xnoreg, dst, VEX_SIMD_66, /* no_mask_reg */ true,
-                VEX_OPCODE_0F, /* rex_w */ true);
-  } else {
-    simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xD6);
   emit_operand(src, dst);
 }
@@ -2510,60 +2595,56 @@
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_operand(dst, src);
 }
 
 void Assembler::movsd(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
-  } else {
-    simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, /* no_mask_reg */ false);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x11);
   emit_operand(src, dst);
 }
 
 void Assembler::movss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_operand(dst, src);
 }
 
 void Assembler::movss(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x11);
   emit_operand(src, dst);
 }
@@ -2655,36 +2736,38 @@
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::mulss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::negl(Register dst) {
@@ -2985,28 +3068,35 @@
 void Assembler::packuswb(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x67);
+  emit_operand(dst, src);
 }
 
 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x67);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "some form of AVX must be enabled");
-  emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x67);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx2(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ true, vector_len);
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x00);
   emit_int8(0xC0 | encode);
   emit_int8(imm8);
@@ -3020,8 +3110,8 @@
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
   InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_3A,
-              /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x61);
   emit_operand(dst, src);
   emit_int8(imm8);
@@ -3029,46 +3119,162 @@
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x61);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
 }
 
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x74);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x74);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx512bw(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x74);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x75, dst, src, VEX_SIMD_66,
-                  false, (VM_Version::supports_avx512dq() == false));
-}
-
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x75);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  assert(UseAVX > 0, "some form of AVX must be enabled");
-  emit_vex_arith(0x75, dst, nds, src, VEX_SIMD_66, vector_len,
-                 false, (VM_Version::supports_avx512dq() == false));
+  assert(VM_Version::supports_avx(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x75);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx512bw(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x75);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x76);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x76);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x76);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse4_1(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x29);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x29);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x29);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int dst_enc = kdst->encoding();
+  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x29);
+  emit_operand(as_Register(dst_enc), src);
 }
 
 void Assembler::pmovmskb(Register dst, XMMRegister src) {
   assert(VM_Version::supports_sse2(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F,
-                                      false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xD7);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66,
-                                     vector_len, VEX_OPCODE_0F, true, false);
+  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xD7);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3076,8 +3282,8 @@
 
 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */  true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3085,8 +3291,8 @@
 
 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC5);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3094,8 +3300,8 @@
 
 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x22);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3103,8 +3309,8 @@
 
 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x22);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3112,8 +3318,8 @@
 
 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC4);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3121,29 +3327,29 @@
 
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_HVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_operand(dst, src);
 }
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::vpmovzxbw(XMMRegister dst, Address src) {
+void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   InstructionMark im(this);
-  bool vector256 = true;
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_operand(dst, src);
 }
@@ -3246,43 +3452,41 @@
 
 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_ssse3(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x00);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pshufb(XMMRegister dst, Address src) {
   assert(VM_Version::supports_ssse3(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x00);
   emit_operand(dst, src);
 }
 
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
-  _instruction_uses_vl = true;
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x70);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(mode & 0xFF);
 }
 
 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
-  _instruction_uses_vl = true;
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x70);
   emit_operand(dst, src);
   emit_int8(mode & 0xFF);
@@ -3291,7 +3495,10 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x70);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(mode & 0xFF);
 }
 
@@ -3299,12 +3506,10 @@
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false,
-              VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x70);
   emit_operand(dst, src);
   emit_int8(mode & 0xFF);
@@ -3313,9 +3518,9 @@
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
   // XMM3 is for /3 encoding: 66 0F 73 /3 ib
-  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift);
@@ -3324,9 +3529,9 @@
 void Assembler::pslldq(XMMRegister dst, int shift) {
   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
-  int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift);
@@ -3336,16 +3541,16 @@
   assert(VM_Version::supports_sse4_1(), "");
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3353,20 +3558,18 @@
 void Assembler::vptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx(), "");
   InstructionMark im(this);
-  int vector_len = AVX_256bit;
+  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* rex_w */ false,
-             vector_len, /* legacy_mode  */ true, /* no_mask_reg */ false);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3374,42 +3577,47 @@
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x60);
+  emit_operand(dst, src);
 }
 
 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x60);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::punpckldq(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x62);
+  emit_operand(dst, src);
 }
 
 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x62);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x6C, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::push(int32_t imm32) {
@@ -3454,16 +3662,18 @@
 
 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8(0x53);
-  emit_int8(0xC0 | encode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x53);
-  emit_int8(0xC0 | encode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::rdtsc() {
@@ -3622,27 +3832,28 @@
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_operand(dst, src);
 }
 
 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::std() {
@@ -3651,11 +3862,12 @@
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_operand(dst, src);
 }
 
 void Assembler::stmxcsr( Address dst) {
@@ -3705,38 +3917,38 @@
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::subsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-  }
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::subss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::subss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::testb(Register dst, int imm8) {
@@ -3765,7 +3977,7 @@
   emit_arith(0x85, 0xC0, dst, src);
 }
 
-void Assembler::testl(Register dst, Address  src) {
+void Assembler::testl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
   emit_int8((unsigned char)0x85);
@@ -3792,36 +4004,38 @@
 
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_operand(dst, src);
 }
 
 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::ucomiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_operand(dst, src);
 }
 
 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xabort(int8_t imm8) {
@@ -3903,138 +4117,162 @@
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 //====================VECTOR ARITHMETIC=====================================
@@ -4042,414 +4280,433 @@
 // Float-point vector arithmetic
 
 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::addps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::subps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::mulpd(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::divps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_operand(dst, src);
 }
 
 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::andpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false,  /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x15);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x14);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xorpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 void Assembler::xorps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 // Integer vector arithmetic
 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx() && (vector_len == 0) ||
          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x01);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -4457,280 +4714,324 @@
 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx() && (vector_len == 0) ||
          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x02);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFD, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD4);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x01);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x02);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD4);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFC);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFD);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFE);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD4);
+  emit_operand(dst, src);
 }
 
 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF8, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF9, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF9);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFA);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF9);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFA);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF8);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF9);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFA);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFB);
+  emit_operand(dst, src);
 }
 
 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD5);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66,
-                                      /* no_mask_reg */ false, VEX_OPCODE_0F_38);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD5);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 2, "requires some form of AVX");
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD5);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
-             VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_operand(dst, src);
 }
 
 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-  }
-  InstructionMark im(this);
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
-             VEX_OPCODE_0F_38, /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq);
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_operand(dst, src);
 }
@@ -4738,29 +5039,29 @@
 // Shift packed integers left by specified number of bits.
 void Assembler::psllw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F,
-                                      /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::pslld(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psllq(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ true);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -4768,102 +5069,111 @@
 
 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
-  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
-  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
+  int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
-  }
+  int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Shift packed integers logically right by specified number of bits.
 void Assembler::psrlw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrld(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrlq(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   // Do not confuse it with psrldq SSE2 instruction which
   // shifts 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F, /* rex_w */ VM_Version::supports_evex());
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -4871,89 +5181,98 @@
 
 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
-  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
-  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
+  int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
-  }
+  int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Shift packed integers arithmetically right by specified number of bits.
 void Assembler::psraw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrad(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
-  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -4961,128 +5280,157 @@
 
 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
+  int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 // logical operations packed integers
 void Assembler::pand(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDB);
+  emit_operand(dst, src);
 }
 
 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
-  }
-  else {
-    emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::por(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEB);
+  emit_operand(dst, src);
 }
 
 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEF);
+  emit_operand(dst, src);
 }
 
 
 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5090,45 +5438,38 @@
   emit_int8(0x01);
 }
 
-void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1A);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 256 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vinsertf64x4h(XMMRegister dst, Address src) {
+  emit_int8(value & 0x01);
+}
+
+void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) {
   assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ true, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1A);
   emit_operand(dst, src);
+  // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 128 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into q0 128 bits (0..127)
@@ -5139,15 +5480,14 @@
 }
 
 void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) {
-  assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
+  assert(VM_Version::supports_avx(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
   // 0x00 - insert into q0 128 bits (0..127)
@@ -5159,17 +5499,13 @@
 
 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
@@ -5178,11 +5514,9 @@
 
 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5192,16 +5526,12 @@
 
 void Assembler::vextractf128h(Address dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
@@ -5210,11 +5540,10 @@
 
 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5222,34 +5551,27 @@
   emit_int8(0x01);
 }
 
-void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_reg_mask */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 256 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vinserti128h(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
@@ -5258,11 +5580,9 @@
 
 void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5272,48 +5592,33 @@
 
 void Assembler::vextracti128h(Address dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
   emit_int8(0x01);
 }
 
-void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
+void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x3B);
   emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode;
-  if (VM_Version::supports_avx512dq()) {
-    encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                   /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
-  } else {
-    encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                   /* vex_w */ false, vector_len, /* legacy_mode */ true, /* no_mask_reg */ false);
-  }
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x01 - extract from bits 255:128
@@ -5322,42 +5627,36 @@
   emit_int8(value & 0x3);
 }
 
-void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
+void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1B);
   emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vextractf64x4h(Address dst, XMMRegister src) {
+  emit_int8(value & 0x1);
+}
+
+void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-             /* vex_w */ true, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1B);
   emit_operand(src, dst);
+  // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
-  assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_avx(), "");
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - extract from bits 127:0
@@ -5369,13 +5668,11 @@
 
 void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_operand(src, dst);
   // 0x00 - extract from bits 127:0
@@ -5387,11 +5684,8 @@
 
 void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ !_legacy_mode_dq, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x01 - extract from bits 255:128
@@ -5402,10 +5696,9 @@
 
 // duplicate 4-bytes integer data from src into 8 locations in dest
 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_avx2(), "");
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x58);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5413,189 +5706,170 @@
 // duplicate 2-bytes integer data from src into 16 locations in dest
 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  bool vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
-                                     vector_len, VEX_OPCODE_0F_38, false);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x79);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x78);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_8bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x78);
   emit_operand(dst, src);
 }
 
 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x79);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_16bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x79);
   emit_operand(dst, src);
 }
 
 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x58);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x58);
   emit_operand(dst, src);
 }
 
 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x59);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x59);
   emit_operand(dst, src);
 }
 
 // duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
 }
 
 // duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /*vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x19);
   emit_operand(dst, src);
 }
 
 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /*vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7B);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5603,8 +5877,8 @@
 // Carry-Less Multiplication Quadword
 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
   assert(VM_Version::supports_clmul(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x44);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)mask);
@@ -5613,8 +5887,9 @@
 // Carry-Less Multiplication Quadword
 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
-  int vector_len = AVX_128bit;
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x44);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)mask);
@@ -5622,11 +5897,9 @@
 
 void Assembler::vzeroupper() {
   assert(VM_Version::supports_avx(), "");
-  if (UseAVX < 3)
-  {
-    (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
-    emit_int8(0x77);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x77);
 }
 
 
@@ -6130,8 +6403,7 @@
   if (pre > 0) {
     emit_int8(simd_pre[pre]);
   }
-  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
-                          prefix_and_encode(dst_enc, src_enc);
+  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
   if (opc > 0) {
     emit_int8(0x0F);
     int opc2 = simd_opc[opc];
@@ -6143,7 +6415,9 @@
 }
 
 
-void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
+void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
+  int vector_len = _attributes->get_vector_len();
+  bool vex_w = _attributes->is_rex_vex_w();
   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
     prefix(VEX_3bytes);
 
@@ -6167,13 +6441,13 @@
 }
 
 // This is a 4 byte encoding
-void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
-                            int nds_enc, VexSimdPrefix pre, VexOpcode opc,
-                            bool is_extended_context, bool is_merge_context,
-                            int vector_len, bool no_mask_reg ){
+void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
   // EVEX 0x62 prefix
   prefix(EVEX_4bytes);
-  _evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
+  bool vex_w = _attributes->is_rex_vex_w();
+  int evex_encoding = (vex_w ? VEX_W : 0);
+  // EVEX.b is not currently used for broadcast of single element or data rounding modes
+  _attributes->set_evex_encoding(evex_encoding);
 
   // P0: byte 2, initialized to RXBR`00mm
   // instead of not'd
@@ -6195,214 +6469,127 @@
   emit_int8(byte3);
 
   // P2: byte 4 as zL'Lbv'aaa
-  int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
+  int byte4 = (_attributes->is_no_reg_mask()) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
   // EVEX.v` for extending EVEX.vvvv or VIDX
   byte4 |= (evex_v ? 0: EVEX_V);
   // third EXEC.b for broadcast actions
-  byte4 |= (is_extended_context ? EVEX_Rb : 0);
+  byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
-  byte4 |= ((vector_len) & 0x3) << 5;
+  byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
   // last is EVEX.z for zero/merge actions
-  byte4 |= (is_merge_context ? EVEX_Z : 0);
+  byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
   emit_int8(byte4);
 }
 
-void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
-                           VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
+void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
   bool vex_b = adr.base_needs_rex();
   bool vex_x = adr.index_needs_rex();
-  _avx_vector_len = vector_len;
+  set_attributes(attributes);
+  attributes->set_current_assembler(this);
 
   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
-  if (_legacy_mode_vl && _instruction_uses_vl) {
-    switch (vector_len) {
+  if ((UseAVX > 2) && _legacy_mode_vl && attributes->uses_vl()) {
+    switch (attributes->get_vector_len()) {
     case AVX_128bit:
     case AVX_256bit:
-      legacy_mode = true;
+      attributes->set_is_legacy_mode();
       break;
     }
   }
 
-  if ((UseAVX > 2) && (legacy_mode == false))
+  if ((UseAVX > 2) && !attributes->is_legacy_mode())
   {
     bool evex_r = (xreg_enc >= 16);
     bool evex_v = (nds_enc >= 16);
-    _is_evex_instruction = true;
-    evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
+    attributes->set_is_evex_instruction();
+    evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
   } else {
-    vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
+    vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
   }
-  _instruction_uses_vl = false;
-}
-
-int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
-                                     bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
+}
+
+int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
   bool vex_x = false;
-  _avx_vector_len = vector_len;
+  set_attributes(attributes);
+  attributes->set_current_assembler(this);
 
   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
-  if (_legacy_mode_vl && _instruction_uses_vl) {
-    switch (vector_len) {
+  if ((UseAVX > 2) && _legacy_mode_vl && attributes->uses_vl()) {
+    switch (attributes->get_vector_len()) {
     case AVX_128bit:
     case AVX_256bit:
-      legacy_mode = true;
+      if ((dst_enc >= 16) | (nds_enc >= 16) | (src_enc >= 16)) {
+        // up propagate arithmetic instructions to meet RA requirements
+        attributes->set_vector_len(AVX_512bit);
+      } else {
+        attributes->set_is_legacy_mode();
+      }
       break;
     }
   }
 
-  if ((UseAVX > 2) && (legacy_mode == false))
+  if ((UseAVX > 2) && !attributes->is_legacy_mode())
   {
     bool evex_r = (dst_enc >= 16);
     bool evex_v = (nds_enc >= 16);
     // can use vex_x as bank extender on rm encoding
     vex_x = (src_enc >= 16);
-    evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
+    attributes->set_is_evex_instruction();
+    evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
   } else {
-    vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
+    vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
   }
 
-  _instruction_uses_vl = false;
-
   // return modrm byte components for operands
   return (((dst_enc & 7) << 3) | (src_enc & 7));
 }
 
 
 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
-                            bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
+                            VexOpcode opc, InstructionAttr *attributes) {
   if (UseAVX > 0) {
     int xreg_enc = xreg->encoding();
-    int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
+    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
   } else {
     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
-    rex_prefix(adr, xreg, pre, opc, rex_w);
+    rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
   }
 }
 
 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
-                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
+                                      VexOpcode opc, InstructionAttr *attributes) {
   int dst_enc = dst->encoding();
   int src_enc = src->encoding();
   if (UseAVX > 0) {
     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
+    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
   } else {
     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
-    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
+    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
   }
 }
 
 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
-                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
+                                      VexOpcode opc, InstructionAttr *attributes) {
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
+  return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes);
 }
 
 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
-                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
+                                      VexOpcode opc, InstructionAttr *attributes) {
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
-}
-
-void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, pre, no_mask_reg);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
-  int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
-  int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-// Versions with no second source register (non-destructive source).
-void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
-  InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
-  InstructionMark im(this);
-  simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-// 3-operands AVX instructions
-void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src,
-                               VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
-                                 Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
-  InstructionMark im(this);
-  vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
-                               VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
-  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, legacy_mode, no_mask_reg);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
-                                 VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
+  return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes);
 }
 
 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   assert(!VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F, /* no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC2);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)(0xF & cop));
@@ -6411,7 +6598,9 @@
 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   assert(!VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src1, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0x4B);
   emit_int8((unsigned char)(0xC0 | encode));
   int src2_enc = src2->encoding();
@@ -6430,7 +6619,7 @@
   leal(dst, src);
 }
 
-void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
+void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst);
@@ -6948,15 +7137,17 @@
 
 void Assembler::andnq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andnq(Register dst, Register src1, Address src2) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(dst, src1, src2);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2);
 }
@@ -6983,45 +7174,51 @@
 
 void Assembler::blsiq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsiq(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(rbx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src);
 }
 
 void Assembler::blsmskq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsmskq(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(rdx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rdx, src);
 }
 
 void Assembler::blsrq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsrq(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(rcx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src);
 }
@@ -7095,45 +7292,44 @@
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7316,7 +7512,8 @@
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7324,8 +7521,9 @@
 void Assembler::movdq(Register dst, XMMRegister src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7458,8 +7656,8 @@
 
 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38,
-                                    /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF6);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7621,8 +7819,8 @@
 
 void Assembler::rorxq(Register dst, Register src, int imm8) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0xF0);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);

--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -438,6 +438,8 @@
 
 };
 
+class InstructionAttr;
+
 // 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
 // See fxsave and xsave(EVEX enabled) documentation for layout
 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);
@@ -568,7 +570,8 @@
     EVEX_8bit  = 0,
     EVEX_16bit = 1,
     EVEX_32bit = 2,
-    EVEX_64bit = 3
+    EVEX_64bit = 3,
+    EVEX_NObit = 4
   };
 
   enum WhichOperand {
@@ -598,16 +601,12 @@
 
 private:
 
-  int _evex_encoding;
-  int _input_size_in_bits;
-  int _avx_vector_len;
-  int _tuple_type;
-  bool _is_evex_instruction;
   bool _legacy_mode_bw;
   bool _legacy_mode_dq;
   bool _legacy_mode_vl;
   bool _legacy_mode_vlbw;
-  bool _instruction_uses_vl;
+
+  class InstructionAttr *_attributes;
 
   // 64bit prefixes
   int prefix_and_encode(int reg_enc, bool byteinst = false);
@@ -637,181 +636,30 @@
   int  rex_prefix_and_encode(int dst_enc, int src_enc,
                              VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 
-  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
-                  int nds_enc, VexSimdPrefix pre, VexOpcode opc,
-                  int vector_len);
-
-  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
-                   int nds_enc, VexSimdPrefix pre, VexOpcode opc,
-                   bool is_extended_context, bool is_merge_context,
-                   int vector_len, bool no_mask_reg );
+  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
+
+  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
+                   int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 
   void vex_prefix(Address adr, int nds_enc, int xreg_enc,
                   VexSimdPrefix pre, VexOpcode opc,
-                  bool vex_w, int vector_len,
-                  bool legacy_mode = false, bool no_mask_reg = false);
-
-  void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
-                  VexSimdPrefix pre, int vector_len = AVX_128bit,
-                  bool no_mask_reg = false, bool legacy_mode = false) {
-    int dst_enc = dst->encoding();
-    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
-  }
-
-  void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
-                    VexSimdPrefix pre, int vector_len = AVX_128bit,
-                    bool no_mask_reg = false) {
-    int dst_enc = dst->encoding();
-    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
-  }
-
-  void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, no_mask_reg);
-  }
-
-  void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, true, no_mask_reg);
-  }
-
-  void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, no_mask_reg);
-  }
-
-  void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, true, no_mask_reg);
-  }
+                  InstructionAttr *attributes);
 
   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
                              VexSimdPrefix pre, VexOpcode opc,
-                             bool vex_w, int vector_len,
-                             bool legacy_mode, bool no_mask_reg);
-
-  int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-                                 false, no_mask_reg);
-  }
-
-  int  vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-      VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-      true, no_mask_reg);
-  }
-
-  int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-                                 false, no_mask_reg);
-  }
-
-  int  vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-                                 true, no_mask_reg);
-  }
-
-  int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
-                             VexSimdPrefix pre, int vector_len = AVX_128bit,
-                             VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
-                             bool no_mask_reg = false) {
-    int src_enc = src->encoding();
-    int dst_enc = dst->encoding();
-    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
-  }
-
-  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
-                   VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
-                   bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
-
-  void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
-                   bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
-    simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
-  }
-
-  void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
-    simd_prefix(src, dst, pre, no_mask_reg);
-  }
-  void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
-                     VexSimdPrefix pre, bool no_mask_reg = false) {
-    bool rex_w = true;
-    simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
-  }
-
-  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
-                             VexSimdPrefix pre, bool no_mask_reg,
-                             VexOpcode opc = VEX_OPCODE_0F,
-                             bool rex_w = false, int vector_len = AVX_128bit,
-                             bool legacy_mode = false);
-
-  int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
-                             VexSimdPrefix pre, bool no_mask_reg,
-                             VexOpcode opc = VEX_OPCODE_0F,
-                             bool rex_w = false, int vector_len = AVX_128bit);
-
-  int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
-                             VexSimdPrefix pre, bool no_mask_reg,
-                             VexOpcode opc = VEX_OPCODE_0F,
-                             bool rex_w = false, int vector_len = AVX_128bit);
-
-  // Move/convert 32-bit integer value.
-  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
-                             VexSimdPrefix pre, bool no_mask_reg) {
-    // It is OK to cast from Register to XMMRegister to pass argument here
-    // since only encoding is used in simd_prefix_and_encode() and number of
-    // Gen and Xmm registers are the same.
-    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
-  }
-  int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
-    return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
-  }
-  int simd_prefix_and_encode(Register dst, XMMRegister src,
-                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
-                             bool no_mask_reg = false) {
-    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
-  }
-
-  // Move/convert 64-bit integer value.
-  int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
-                               VexSimdPrefix pre, bool no_mask_reg = false) {
-    bool rex_w = true;
-    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
-  }
-  int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
-    return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
-  }
-  int simd_prefix_and_encode_q(Register dst, XMMRegister src,
-                               VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
-                               bool no_mask_reg = false) {
-    bool rex_w = true;
-    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
-  }
+                             InstructionAttr *attributes);
+
+  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
+                   VexOpcode opc, InstructionAttr *attributes);
+
+  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
+                             VexOpcode opc, InstructionAttr *attributes);
+
+  int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
+                             VexOpcode opc, InstructionAttr *attributes);
+
+  int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
+                             VexOpcode opc, InstructionAttr *attributes);
 
   // Helper functions for groups of instructions
   void emit_arith_b(int op1, int op2, Register dst, int imm8);
@@ -821,27 +669,6 @@
   void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
   void emit_arith(int op1, int op2, Register dst, Register src);
 
-  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
-                      Address src, VexSimdPrefix pre, int vector_len,
-                      bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
-                        Address src, VexSimdPrefix pre, int vector_len,
-                        bool no_mask_reg = false);
-  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
-                      XMMRegister src, VexSimdPrefix pre, int vector_len,
-                      bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
-                        XMMRegister src, VexSimdPrefix pre, int vector_len,
-                        bool no_mask_reg = false);
-
   bool emit_compressed_disp_byte(int &disp);
 
   void emit_operand(Register reg,
@@ -986,18 +813,16 @@
   // belong in macro assembler but there is no need for both varieties to exist
 
   void init_attributes(void) {
-    _evex_encoding = 0;
-    _input_size_in_bits = 0;
-    _avx_vector_len = AVX_NoVec;
-    _tuple_type = EVEX_ETUP;
-    _is_evex_instruction = false;
     _legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
     _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
     _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
     _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
-    _instruction_uses_vl = false;
+    _attributes = NULL;
   }
 
+  void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
+  void clear_attributes(void) { _attributes = NULL; }
+
   void lea(Register dst, Address src);
 
   void mov(Register dst, Register src);
@@ -1506,13 +1331,18 @@
 
   void movddup(XMMRegister dst, XMMRegister src);
 
+  void kmovwl(KRegister dst, Register src);
+  void kmovdl(KRegister dst, Register src);
   void kmovql(KRegister dst, KRegister src);
   void kmovql(KRegister dst, Register src);
-  void kmovdl(KRegister dst, Register src);
-  void kmovwl(KRegister dst, Register src);
   void kmovql(Address dst, KRegister src);
   void kmovql(KRegister dst, Address src);
 
+  void kortestbl(KRegister dst, KRegister src);
+  void kortestwl(KRegister dst, KRegister src);
+  void kortestdl(KRegister dst, KRegister src);
+  void kortestql(KRegister dst, KRegister src);
+
   void movdl(XMMRegister dst, Register src);
   void movdl(Register dst, XMMRegister src);
   void movdl(XMMRegister dst, Address src);
@@ -1537,6 +1367,12 @@
   void vmovdqu(XMMRegister dst, XMMRegister src);
 
    // Move Unaligned 512bit Vector
+  void evmovdqub(Address dst, XMMRegister src, int vector_len);
+  void evmovdqub(XMMRegister dst, Address src, int vector_len);
+  void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
+  void evmovdquw(Address dst, XMMRegister src, int vector_len);
+  void evmovdquw(XMMRegister dst, Address src, int vector_len);
+  void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len);
   void evmovdqul(Address dst, XMMRegister src, int vector_len);
   void evmovdqul(XMMRegister dst, Address src, int vector_len);
   void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
@@ -1682,8 +1518,22 @@
   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
 
+  void pcmpeqb(XMMRegister dst, XMMRegister src);
+  void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
+
   void pcmpeqw(XMMRegister dst, XMMRegister src);
   void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
+
+  void pcmpeqd(XMMRegister dst, XMMRegister src);
+  void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
+
+  void pcmpeqq(XMMRegister dst, XMMRegister src);
+  void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
+  void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
 
   void pmovmskb(Register dst, XMMRegister src);
   void vpmovmskb(Register dst, XMMRegister src);
@@ -1704,7 +1554,7 @@
   void pmovzxbw(XMMRegister dst, XMMRegister src);
   void pmovzxbw(XMMRegister dst, Address src);
 
-  void vpmovzxbw(XMMRegister dst, Address src);
+  void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
 
 #ifndef _LP64 // no 32bit push/pop on amd64
   void popl(Address dst);
@@ -2106,12 +1956,12 @@
   void vextracti128h(Address dst, XMMRegister src);
 
   // Copy low 256bit into high 256bit of ZMM registers.
-  void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vextracti64x4h(XMMRegister dst, XMMRegister src);
-  void vextractf64x4h(XMMRegister dst, XMMRegister src);
-  void vextractf64x4h(Address dst, XMMRegister src);
-  void vinsertf64x4h(XMMRegister dst, Address src);
+  void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
+  void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
+  void vextracti64x4h(XMMRegister dst, XMMRegister src, int value);
+  void vextractf64x4h(XMMRegister dst, XMMRegister src, int value);
+  void vextractf64x4h(Address dst, XMMRegister src, int value);
+  void vinsertf64x4h(XMMRegister dst, Address src, int value);
 
   // Copy targeted 128bit segments of the ZMM registers
   void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
@@ -2173,4 +2023,95 @@
 
 };
 
+// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
+// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
+// are applied.
+class InstructionAttr {
+public:
+  InstructionAttr(
+    int vector_len,
+    bool rex_vex_w,
+    bool legacy_mode,
+    bool no_reg_mask,
+    bool uses_vl)
+    :
+      _avx_vector_len(vector_len),
+      _rex_vex_w(rex_vex_w),
+      _legacy_mode(legacy_mode),
+      _no_reg_mask(no_reg_mask),
+      _uses_vl(uses_vl),
+      _tuple_type(Assembler::EVEX_ETUP),
+      _input_size_in_bits(Assembler::EVEX_NObit),
+      _is_evex_instruction(false),
+      _evex_encoding(0),
+      _is_clear_context(false),
+      _is_extended_context(false),
+      _current_assembler(NULL) {
+    if (UseAVX < 3) _legacy_mode = true;
+  }
+
+  ~InstructionAttr() {
+    if (_current_assembler != NULL) {
+      _current_assembler->clear_attributes();
+    }
+    _current_assembler = NULL;
+  }
+
+private:
+  int  _avx_vector_len;
+  bool _rex_vex_w;
+  bool _legacy_mode;
+  bool _no_reg_mask;
+  bool _uses_vl;
+  int  _tuple_type;
+  int  _input_size_in_bits;
+  bool _is_evex_instruction;
+  int  _evex_encoding;
+  bool _is_clear_context;
+  bool _is_extended_context;
+
+  Assembler *_current_assembler;
+
+public:
+  // query functions for field accessors
+  int  get_vector_len(void) const { return _avx_vector_len; }
+  bool is_rex_vex_w(void) const { return _rex_vex_w; }
+  bool is_legacy_mode(void) const { return _legacy_mode; }
+  bool is_no_reg_mask(void) const { return _no_reg_mask; }
+  bool uses_vl(void) const { return _uses_vl; }
+  int  get_tuple_type(void) const { return _tuple_type; }
+  int  get_input_size(void) const { return _input_size_in_bits; }
+  int  is_evex_instruction(void) const { return _is_evex_instruction; }
+  int  get_evex_encoding(void) const { return _evex_encoding; }
+  bool is_clear_context(void) const { return _is_clear_context; }
+  bool is_extended_context(void) const { return _is_extended_context; }
+
+  // Set the vector len manually
+  void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
+
+  // Set the instruction to be encoded in AVX mode
+  void set_is_legacy_mode(void) { _legacy_mode = true; }
+
+  // Set the current instuction to be encoded as an EVEX instuction
+  void set_is_evex_instruction(void) { _is_evex_instruction = true; }
+
+  // Internal encoding data used in compressed immediate offset programming
+  void set_evex_encoding(int value) { _evex_encoding = value; }
+
+  // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
+  void set_is_clear_context(void) { _is_clear_context = true; }
+
+  // Map back to current asembler so that we can manage object level assocation
+  void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
+
+  // Address modifiers used for compressed displacement calculation
+  void set_address_attributes(int tuple_type, int input_size_in_bits) {
+    if (VM_Version::supports_evex()) {
+      _tuple_type = tuple_type;
+      _input_size_in_bits = input_size_in_bits;
+    }
+  }
+
+};
+
 #endif // CPU_X86_VM_ASSEMBLER_X86_HPP

--- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -81,7 +81,8 @@
 
 void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
-  ce->store_parameter(_method->as_register(), 1);
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  ce->store_parameter(m, 1);
   ce->store_parameter(_bci, 0);
   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
   ce->add_call_info_here(_info);

--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -2971,6 +2971,14 @@
 }
 
 
+void LIR_Assembler::store_parameter(Metadata* m,  int offset_from_rsp_in_words) {
+  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
+  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
+  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ mov_metadata(Address(rsp, offset_from_rsp_in_bytes), m);
+}
+
+
 // This code replaces a call to arraycopy; no exception may
 // be thrown in this code, they must be thrown in the System.arraycopy
 // activation frame; we could save some checks if this would not be the case
@@ -3711,7 +3719,7 @@
     if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
       __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
     }
-    if (UseAVX > 1) {
+    if (UseAVX > 0) {
       __ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(),
                    ExternalAddress((address)float_signflip_pool));
     } else {
@@ -3722,7 +3730,7 @@
     if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
       __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
     }
-    if (UseAVX > 1) {
+    if (UseAVX > 0) {
       __ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(),
                    ExternalAddress((address)double_signflip_pool));
     } else {

--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,9 +49,10 @@
                            Register recv, Label* update_done);
 public:
 
-  void store_parameter(Register r, int offset_from_esp_in_words);
-  void store_parameter(jint c,     int offset_from_esp_in_words);
-  void store_parameter(jobject c,  int offset_from_esp_in_words);
+  void store_parameter(Register r,  int offset_from_esp_in_words);
+  void store_parameter(jint c,      int offset_from_esp_in_words);
+  void store_parameter(jobject c,   int offset_from_esp_in_words);
+  void store_parameter(Metadata* c, int offset_from_esp_in_words);
 
   enum { call_stub_size = NOT_LP64(15) LP64_ONLY(28),
          exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),

--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -80,6 +80,7 @@
 LIR_Opr LIRGenerator::divOutOpr()       { return FrameMap::rax_opr; }
 LIR_Opr LIRGenerator::remOutOpr()       { return FrameMap::rdx_opr; }
 LIR_Opr LIRGenerator::shiftCountOpr()   { return FrameMap::rcx_opr; }
+LIR_Opr LIRGenerator::syncLockOpr()     { return new_register(T_INT); }
 LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::rax_opr; }
 LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }

--- a/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -84,6 +84,7 @@
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoRegScheduling,            true);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
 
 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);

--- a/hotspot/src/cpu/x86/vm/c2_init_x86.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/c2_init_x86.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -58,6 +58,4 @@
       OptoReg::invalidate(i);
     }
   }
-
-  SuperWordLoopUnrollAnalysis = true;
 }

--- a/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -36,7 +36,7 @@
 #include "code/vmreg.hpp"
 #include "vmreg_x86.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   if (inst->is_call() || inst->is_jump()) {
     assert(NativeCall::instruction_size == (int)NativeJump::instruction_size, "unexpected size");
     return (pc_offset + NativeCall::instruction_size);
@@ -53,18 +53,17 @@
     return (offset);
   } else if (inst->is_call_reg()) {
     // the inlined vtable stub contains a "call register" instruction
-    assert(method != NULL, "only valid for virtual calls");
+    assert(method.not_null(), "only valid for virtual calls");
     return (pc_offset + ((NativeCallReg *) inst)->next_instruction_offset());
   } else if (inst->is_cond_jump()) {
     address pc = (address) (inst);
     return pc_offset + (jint) (Assembler::locate_next_instruction(pc) - pc);
   } else {
-    fatal("unsupported type of instruction for call site");
-    return 0;
+    JVMCI_ERROR_0("unsupported type of instruction for call site");
   }
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   Handle obj = HotSpotObjectConstantImpl::object(constant);
   jobject value = JNIHandles::make_local(obj());
@@ -75,7 +74,7 @@
     _instructions->relocate(pc, oop_Relocation::spec(oop_index), Assembler::narrow_oop_operand);
     TRACE_jvmci_3("relocating (narrow oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
 #else
-    fatal("compressed oop on 32bit");
+    JVMCI_ERROR("compressed oop on 32bit");
 #endif
   } else {
     address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
@@ -85,19 +84,19 @@
   }
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
 #ifdef _LP64
     address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand);
-    *((narrowKlass*) operand) = record_narrow_metadata_reference(constant);
+    *((narrowKlass*) operand) = record_narrow_metadata_reference(constant, CHECK);
     TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
 #else
-    fatal("compressed Klass* on 32bit");
+    JVMCI_ERROR("compressed Klass* on 32bit");
 #endif
   } else {
     address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
-    *((Metadata**) operand) = record_metadata_reference(constant);
+    *((Metadata**) operand) = record_metadata_reference(constant, CHECK);
     TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
   }
 }
@@ -117,7 +116,7 @@
   TRACE_jvmci_3("relocating at " PTR_FORMAT "/" PTR_FORMAT " with destination at " PTR_FORMAT " (%d)", p2i(pc), p2i(operand), p2i(dest), data_offset);
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   address pc = (address) inst;
   if (inst->is_call()) {
     // NOTE: for call without a mov, the offset must fit a 32-bit immediate
@@ -139,18 +138,18 @@
     *(jint*) disp += ((address) foreign_call_destination) - old_dest;
     _instructions->relocate(pc, runtime_call_Relocation::spec(), Assembler::call32_operand);
   } else {
-    fatal("unsupported relocation for foreign call");
+    JVMCI_ERROR("unsupported relocation for foreign call");
   }
 
   TRACE_jvmci_3("relocating (foreign call)  at " PTR_FORMAT, p2i(inst));
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
 #ifdef ASSERT
   Method* method = NULL;
   // we need to check, this might also be an unresolved method
   if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
-    method = getMethodFromHotSpotMethod(hotspot_method);
+    method = getMethodFromHotSpotMethod(hotspot_method());
   }
 #endif
   switch (_next_call_type) {
@@ -185,6 +184,7 @@
       break;
     }
     default:
+      JVMCI_ERROR("invalid _next_call_type value");
       break;
   }
 }
@@ -198,7 +198,7 @@
 }
 
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   switch (mark) {
     case POLL_NEAR: {
       relocate_poll_near(pc);
@@ -222,13 +222,13 @@
       _instructions->relocate(pc, relocInfo::poll_return_type, Assembler::imm_operand);
       break;
     default:
-      fatal("invalid mark value");
+      JVMCI_ERROR("invalid mark value: %d", mark);
       break;
   }
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   if (jvmci_reg < RegisterImpl::number_of_registers) {
     return as_Register(jvmci_reg)->as_VMReg();
   } else {
@@ -236,8 +236,7 @@
     if (floatRegisterNumber < XMMRegisterImpl::number_of_registers) {
       return as_XMMRegister(floatRegisterNumber)->as_VMReg();
     }
-    ShouldNotReachHere();
-    return NULL;
+    JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
   }
 }

--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -3651,12 +3651,71 @@
   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
 }
 
+void MacroAssembler::movdqu(Address dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
+    Assembler::vextractf32x4h(dst, src, 0);
+  } else {
+    Assembler::movdqu(dst, src);
+  }
+}
+
+void MacroAssembler::movdqu(XMMRegister dst, Address src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
+    Assembler::vinsertf32x4h(dst, src, 0);
+  } else {
+    Assembler::movdqu(dst, src);
+  }
+}
+
+void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+    Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
+  } else {
+    Assembler::movdqu(dst, src);
+  }
+}
+
 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
-    Assembler::movdqu(dst, as_Address(src));
+    movdqu(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    Assembler::movdqu(dst, Address(rscratch1, 0));
+    movdqu(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
+    Assembler::vextractf64x4h(dst, src, 0);
+  } else {
+    Assembler::vmovdqu(dst, src);
+  }
+}
+
+void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
+    Assembler::vinsertf64x4h(dst, src, 0);
+  } else {
+    Assembler::vmovdqu(dst, src);
+  }
+}
+
+void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+    Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
+  }
+  else {
+    Assembler::vmovdqu(dst, src);
+  }
+}
+
+void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    vmovdqu(dst, as_Address(src));
+  }
+  else {
+    lea(rscratch1, src);
+    vmovdqu(dst, Address(rscratch1, 0));
   }
 }
 
@@ -3726,6 +3785,10 @@
   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 }
 
+#ifdef _LP64
+#define XSTATE_BV 0x200
+#endif
+
 void MacroAssembler::pop_CPU_state() {
   pop_FPU_state();
   pop_IU_state();
@@ -3735,27 +3798,7 @@
 #ifndef _LP64
   frstor(Address(rsp, 0));
 #else
-  // AVX will continue to use the fxsave area.
-  // EVEX needs to utilize the xsave area, which is under different
-  // management.
-  if(VM_Version::supports_evex()) {
-    // EDX:EAX describe the XSAVE header and
-    // are obtained while fetching info for XCR0 via cpuid.
-    // These two registers make up 64-bits in the header for which bits
-    // 62:10 are currently reserved for future implementations and unused.  Bit 63
-    // is unused for our implementation as we do not utilize
-    // compressed XSAVE areas.  Bits 9..8 are currently ignored as we do not use
-    // the functionality for PKRU state and MSR tracing.
-    // Ergo we are primarily concerned with bits 7..0, which define
-    // which ISA extensions and features are enabled for a given machine and are
-    // defined in XemXcr0Eax and is used to map the XSAVE area
-    // for restoring registers as described via XCR0.
-    movl(rdx,VM_Version::get_xsave_header_upper_segment());
-    movl(rax,VM_Version::get_xsave_header_lower_segment());
-    xrstor(Address(rsp, 0));
-  } else {
-    fxrstor(Address(rsp, 0));
-  }
+  fxrstor(Address(rsp, 0));
 #endif
   addptr(rsp, FPUStateSizeInWords * wordSize);
 }
@@ -3773,49 +3816,13 @@
   push_FPU_state();
 }
 
-#ifdef _LP64
-#define XSTATE_BV 0x200
-#endif
-
 void MacroAssembler::push_FPU_state() {
   subptr(rsp, FPUStateSizeInWords * wordSize);
 #ifndef _LP64
   fnsave(Address(rsp, 0));
   fwait();
 #else
-  // AVX will continue to use the fxsave area.
-  // EVEX needs to utilize the xsave area, which is under different
-  // management.
-  if(VM_Version::supports_evex()) {
-    // Save a copy of EAX and EDX
-    push(rax);
-    push(rdx);
-    // EDX:EAX describe the XSAVE header and
-    // are obtained while fetching info for XCR0 via cpuid.
-    // These two registers make up 64-bits in the header for which bits
-    // 62:10 are currently reserved for future implementations and unused.  Bit 63
-    // is unused for our implementation as we do not utilize
-    // compressed XSAVE areas.  Bits 9..8 are currently ignored as we do not use
-    // the functionality for PKRU state and MSR tracing.
-    // Ergo we are primarily concerned with bits 7..0, which define
-    // which ISA extensions and features are enabled for a given machine and are
-    // defined in XemXcr0Eax and is used to program XSAVE area
-    // for saving the required registers as defined in XCR0.
-    int xcr0_edx = VM_Version::get_xsave_header_upper_segment();
-    int xcr0_eax = VM_Version::get_xsave_header_lower_segment();
-    movl(rdx,xcr0_edx);
-    movl(rax,xcr0_eax);
-    xsave(Address(rsp, wordSize*2));
-    // now Apply control bits and clear bytes 8..23 in the header
-    pop(rdx);
-    pop(rax);
-    movl(Address(rsp, XSTATE_BV), xcr0_eax);
-    movl(Address(rsp, XSTATE_BV+4), xcr0_edx);
-    andq(Address(rsp, XSTATE_BV+8), 0);
-    andq(Address(rsp, XSTATE_BV+16), 0);
-  } else {
-    fxsave(Address(rsp, 0));
-  }
+  fxsave(Address(rsp, 0));
 #endif // LP64
 }
 
@@ -3942,6 +3949,236 @@
   testl(dst, as_Address(src));
 }
 
+void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::pcmpeqb(dst, src);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::pcmpeqb(dst, src);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::pcmpeqb(xmm0, src);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::pcmpeqb(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::pcmpeqb(xmm1, xmm0);
+    movdqu(dst, xmm1);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::pcmpeqw(dst, src);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::pcmpeqw(dst, src);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::pcmpeqw(xmm0, src);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::pcmpeqw(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::pcmpeqw(xmm1, xmm0);
+    movdqu(dst, xmm1);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
+  int dst_enc = dst->encoding();
+  if (dst_enc < 16) {
+    Assembler::pcmpestri(dst, src, imm8);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::pcmpestri(xmm0, src, imm8);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::pcmpestri(dst, src, imm8);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::pcmpestri(xmm0, src, imm8);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::pcmpestri(dst, xmm0, imm8);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::pcmpestri(xmm1, xmm0, imm8);
+    movdqu(dst, xmm1);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::pmovzxbw(dst, src);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::pmovzxbw(dst, src);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::pmovzxbw(xmm0, src);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::pmovzxbw(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::pmovzxbw(xmm1, xmm0);
+    movdqu(dst, xmm1);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) {
+  int dst_enc = dst->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::pmovzxbw(dst, src);
+  } else if (dst_enc < 16) {
+    Assembler::pmovzxbw(dst, src);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::pmovzxbw(xmm0, src);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::pmovmskb(Register dst, XMMRegister src) {
+  int src_enc = src->encoding();
+  if (src_enc < 16) {
+    Assembler::pmovmskb(dst, src);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::pmovmskb(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::ptest(dst, src);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::ptest(xmm0, src);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::ptest(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::ptest(xmm1, xmm0);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
     Assembler::sqrtsd(dst, as_Address(src));
@@ -4007,6 +4244,23 @@
   }
 }
 
+void MacroAssembler::xorpd(XMMRegister dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) {
+    Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit);
+  }
+  else {
+    Assembler::xorpd(dst, src);
+  }
+}
+
+void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) {
+    Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit);
+  } else {
+    Assembler::xorps(dst, src);
+  }
+}
+
 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
   // Used in sign-bit flipping with aligned address.
   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
@@ -4050,6 +4304,864 @@
   }
 }
 
+void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if ((dst_enc < 16) && (nds_enc < 16)) {
+    vandps(dst, nds, negate_field, vector_len);
+  } else if ((src_enc < 16) && (dst_enc < 16)) {
+    movss(src, nds);
+    vandps(dst, src, negate_field, vector_len);
+  } else if (src_enc < 16) {
+    movss(src, nds);
+    vandps(src, src, negate_field, vector_len);
+    movss(dst, src);
+  } else if (dst_enc < 16) {
+    movdqu(src, xmm0);
+    movss(xmm0, nds);
+    vandps(dst, xmm0, negate_field, vector_len);
+    movdqu(xmm0, src);
+  } else if (nds_enc < 16) {
+    movdqu(src, xmm0);
+    vandps(xmm0, nds, negate_field, vector_len);
+    movss(dst, xmm0);
+    movdqu(xmm0, src);
+  } else {
+    movdqu(src, xmm0);
+    movss(xmm0, nds);
+    vandps(xmm0, xmm0, negate_field, vector_len);
+    movss(dst, xmm0);
+    movdqu(xmm0, src);
+  }
+}
+
+void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if ((dst_enc < 16) && (nds_enc < 16)) {
+    vandpd(dst, nds, negate_field, vector_len);
+  } else if ((src_enc < 16) && (dst_enc < 16)) {
+    movsd(src, nds);
+    vandpd(dst, src, negate_field, vector_len);
+  } else if (src_enc < 16) {
+    movsd(src, nds);
+    vandpd(src, src, negate_field, vector_len);
+    movsd(dst, src);
+  } else if (dst_enc < 16) {
+    movdqu(src, xmm0);
+    movsd(xmm0, nds);
+    vandpd(dst, xmm0, negate_field, vector_len);
+    movdqu(xmm0, src);
+  } else if (nds_enc < 16) {
+    movdqu(src, xmm0);
+    vandpd(xmm0, nds, negate_field, vector_len);
+    movsd(dst, xmm0);
+    movdqu(xmm0, src);
+  } else {
+    movdqu(src, xmm0);
+    movsd(xmm0, nds);
+    vandpd(xmm0, xmm0, negate_field, vector_len);
+    movsd(dst, xmm0);
+    movdqu(xmm0, src);
+  }
+}
+
+void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpaddb(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpaddb(dst, dst, src, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for src
+    evmovdqul(nds, src, Assembler::AVX_512bit);
+    Assembler::vpaddb(dst, dst, nds, vector_len);
+  } else if ((src_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpaddb(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds as scatch for xmm0 to hold src
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpaddb(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, src, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpaddb(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpaddb(dst, nds, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpaddb(dst, dst, src, vector_len);
+  } else if (nds_enc < 16) {
+    // implies dst_enc in upper bank with src as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpaddb(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs in upper bank
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpaddb(xmm0, xmm0, src, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpaddw(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpaddw(dst, dst, src, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for src
+    evmovdqul(nds, src, Assembler::AVX_512bit);
+    Assembler::vpaddw(dst, dst, nds, vector_len);
+  } else if ((src_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpaddw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds as scatch for xmm0 to hold src
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpaddw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, src, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpaddw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpaddw(dst, nds, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpaddw(dst, dst, src, vector_len);
+  } else if (nds_enc < 16) {
+    // implies dst_enc in upper bank with src as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpaddw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs in upper bank
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpaddw(xmm0, xmm0, src, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpbroadcastw(dst, src);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpbroadcastw(dst, src);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpbroadcastw(xmm0, src);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpbroadcastw(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::vpbroadcastw(xmm1, xmm0);
+    movdqu(dst, xmm1);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  assert(dst_enc == nds_enc, "");
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpcmpeqb(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpcmpeqb(dst, nds, src, vector_len);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpcmpeqb(xmm0, xmm0, src, vector_len);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpcmpeqb(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::vpcmpeqb(xmm1, xmm1, xmm0, vector_len);
+    movdqu(dst, xmm1);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  assert(dst_enc == nds_enc, "");
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpcmpeqw(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpcmpeqw(dst, nds, src, vector_len);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpcmpeqw(xmm0, xmm0, src, vector_len);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpcmpeqw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::vpcmpeqw(xmm1, xmm1, xmm0, vector_len);
+    movdqu(dst, xmm1);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpmovzxbw(dst, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpmovzxbw(dst, src, vector_len);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpmovzxbw(xmm0, src, vector_len);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
+  int src_enc = src->encoding();
+  if (src_enc < 16) {
+    Assembler::vpmovmskb(dst, src);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpmovmskb(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpmullw(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpmullw(dst, dst, src, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for src
+    evmovdqul(nds, src, Assembler::AVX_512bit);
+    Assembler::vpmullw(dst, dst, nds, vector_len);
+  } else if ((src_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpmullw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds as scatch for xmm0 to hold src
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpmullw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, src, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpmullw(dst, nds, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpmullw(dst, dst, src, vector_len);
+  } else if (nds_enc < 16) {
+    // implies dst_enc in upper bank with src as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpmullw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs in upper bank
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpmullw(xmm0, xmm0, src, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsubb(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpsubb(dst, dst, src, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for src
+    evmovdqul(nds, src, Assembler::AVX_512bit);
+    Assembler::vpsubb(dst, dst, nds, vector_len);
+  } else if ((src_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsubb(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds as scatch for xmm0 to hold src
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpsubb(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, src, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsubb(dst, nds, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpsubb(dst, dst, src, vector_len);
+  } else if (nds_enc < 16) {
+    // implies dst_enc in upper bank with src as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsubb(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs in upper bank
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsubw(xmm0, xmm0, src, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsubw(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpsubw(dst, dst, src, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for src
+    evmovdqul(nds, src, Assembler::AVX_512bit);
+    Assembler::vpsubw(dst, dst, nds, vector_len);
+  } else if ((src_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsubw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds as scatch for xmm0 to hold src
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpsubw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, src, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsubw(dst, nds, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpsubw(dst, dst, src, vector_len);
+  } else if (nds_enc < 16) {
+    // implies dst_enc in upper bank with src as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsubw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs in upper bank
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsubw(xmm0, xmm0, src, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int shift_enc = shift->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsraw(dst, nds, shift, vector_len);
+  } else if ((dst_enc < 16) && (shift_enc < 16)) {
+    Assembler::vpsraw(dst, dst, shift, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds_enc as scratch with shift
+    evmovdqul(nds, shift, Assembler::AVX_512bit);
+    Assembler::vpsraw(dst, dst, nds, vector_len);
+  } else if ((shift_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch with dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsraw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds to save a copy of xmm0 and hold shift
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsraw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else if (nds_enc < 16) {
+    // use nds as dest as temps
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsraw(nds, nds, xmm0, vector_len);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, shift, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(xmm1, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsraw(dst, nds, shift, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpsraw(dst, dst, shift, vector_len);
+  } else if (nds_enc < 16) {
+    // use nds as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsraw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // use nds as scratch for xmm0
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsraw(xmm0, xmm0, shift, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int shift_enc = shift->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsrlw(dst, nds, shift, vector_len);
+  } else if ((dst_enc < 16) && (shift_enc < 16)) {
+    Assembler::vpsrlw(dst, dst, shift, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds_enc as scratch with shift
+    evmovdqul(nds, shift, Assembler::AVX_512bit);
+    Assembler::vpsrlw(dst, dst, nds, vector_len);
+  } else if ((shift_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch with dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsrlw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds to save a copy of xmm0 and hold shift
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsrlw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else if (nds_enc < 16) {
+    // use nds as dest as temps
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsrlw(nds, nds, xmm0, vector_len);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, shift, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(xmm1, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsrlw(dst, nds, shift, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpsrlw(dst, dst, shift, vector_len);
+  } else if (nds_enc < 16) {
+    // use nds as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsrlw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // use nds as scratch for xmm0
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsrlw(xmm0, xmm0, shift, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int shift_enc = shift->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsllw(dst, nds, shift, vector_len);
+  } else if ((dst_enc < 16) && (shift_enc < 16)) {
+    Assembler::vpsllw(dst, dst, shift, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds_enc as scratch with shift
+    evmovdqul(nds, shift, Assembler::AVX_512bit);
+    Assembler::vpsllw(dst, dst, nds, vector_len);
+  } else if ((shift_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch with dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds to save a copy of xmm0 and hold shift
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsllw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else if (nds_enc < 16) {
+    // use nds as dest as temps
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsllw(nds, nds, xmm0, vector_len);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, shift, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(xmm1, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsllw(dst, nds, shift, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpsllw(dst, dst, shift, vector_len);
+  } else if (nds_enc < 16) {
+    // use nds as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // use nds as scratch for xmm0
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(xmm0, xmm0, shift, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vptest(dst, src);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vptest(xmm0, src);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vptest(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::vptest(xmm1, xmm0);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+// This instruction exists within macros, ergo we cannot control its input
+// when emitted through those patterns.
+void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) {
+  if (VM_Version::supports_avx512nobw()) {
+    int dst_enc = dst->encoding();
+    int src_enc = src->encoding();
+    if (dst_enc == src_enc) {
+      if (dst_enc < 16) {
+        Assembler::punpcklbw(dst, src);
+      } else {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        Assembler::punpcklbw(xmm0, xmm0);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      }
+    } else {
+      if ((src_enc < 16) && (dst_enc < 16)) {
+        Assembler::punpcklbw(dst, src);
+      } else if (src_enc < 16) {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        Assembler::punpcklbw(xmm0, src);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      } else if (dst_enc < 16) {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, src, Assembler::AVX_512bit);
+        Assembler::punpcklbw(dst, xmm0);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      } else {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        evmovdqul(xmm1, src, Assembler::AVX_512bit);
+        Assembler::punpcklbw(xmm0, xmm1);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      }
+    }
+  } else {
+    Assembler::punpcklbw(dst, src);
+  }
+}
+
+// This instruction exists within macros, ergo we cannot control its input
+// when emitted through those patterns.
+void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
+  if (VM_Version::supports_avx512nobw()) {
+    int dst_enc = dst->encoding();
+    int src_enc = src->encoding();
+    if (dst_enc == src_enc) {
+      if (dst_enc < 16) {
+        Assembler::pshuflw(dst, src, mode);
+      } else {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        Assembler::pshuflw(xmm0, xmm0, mode);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      }
+    } else {
+      if ((src_enc < 16) && (dst_enc < 16)) {
+        Assembler::pshuflw(dst, src, mode);
+      } else if (src_enc < 16) {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        Assembler::pshuflw(xmm0, src, mode);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      } else if (dst_enc < 16) {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, src, Assembler::AVX_512bit);
+        Assembler::pshuflw(dst, xmm0, mode);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      } else {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        evmovdqul(xmm1, src, Assembler::AVX_512bit);
+        Assembler::pshuflw(xmm0, xmm1, mode);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      }
+    }
+  } else {
+    Assembler::pshuflw(dst, src, mode);
+  }
+}
+
 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
   if (reachable(src)) {
     vandpd(dst, nds, as_Address(src), vector_len);
@@ -4133,31 +5245,16 @@
       subptr(rsp, 64);
       evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       movflt(xmm0, nds);
-      if (reachable(src)) {
-        vxorps(xmm0, xmm0, as_Address(src), Assembler::AVX_128bit);
-      } else {
-        lea(rscratch1, src);
-        vxorps(xmm0, xmm0, Address(rscratch1, 0), Assembler::AVX_128bit);
-      }
+      vxorps(xmm0, xmm0, src, Assembler::AVX_128bit);
       movflt(dst, xmm0);
       evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       addptr(rsp, 64);
     } else {
       movflt(dst, nds);
-      if (reachable(src)) {
-        vxorps(dst, dst, as_Address(src), Assembler::AVX_128bit);
-      } else {
-        lea(rscratch1, src);
-        vxorps(dst, dst, Address(rscratch1, 0), Assembler::AVX_128bit);
-      }
-    }
-  } else {
-    if (reachable(src)) {
-      vxorps(dst, nds, as_Address(src), Assembler::AVX_128bit);
-    } else {
-      lea(rscratch1, src);
-      vxorps(dst, nds, Address(rscratch1, 0), Assembler::AVX_128bit);
-    }
+      vxorps(dst, dst, src, Assembler::AVX_128bit);
+    }
+  } else {
+    vxorps(dst, nds, src, Assembler::AVX_128bit);
   }
 }
 
@@ -4172,31 +5269,16 @@
       subptr(rsp, 64);
       evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       movdbl(xmm0, nds);
-      if (reachable(src)) {
-        vxorps(xmm0, xmm0, as_Address(src), Assembler::AVX_128bit);
-      } else {
-        lea(rscratch1, src);
-        vxorps(xmm0, xmm0, Address(rscratch1, 0), Assembler::AVX_128bit);
-      }
+      vxorpd(xmm0, xmm0, src, Assembler::AVX_128bit);
       movdbl(dst, xmm0);
       evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       addptr(rsp, 64);
     } else {
       movdbl(dst, nds);
-      if (reachable(src)) {
-        vxorps(dst, dst, as_Address(src), Assembler::AVX_128bit);
-      } else {
-        lea(rscratch1, src);
-        vxorps(dst, dst, Address(rscratch1, 0), Assembler::AVX_128bit);
-      }
-    }
-  } else {
-    if (reachable(src)) {
-      vxorpd(dst, nds, as_Address(src), Assembler::AVX_128bit);
-    } else {
-      lea(rscratch1, src);
-      vxorpd(dst, nds, Address(rscratch1, 0), Assembler::AVX_128bit);
-    }
+      vxorpd(dst, dst, src, Assembler::AVX_128bit);
+    }
+  } else {
+    vxorpd(dst, nds, src, Assembler::AVX_128bit);
   }
 }
 
@@ -4688,7 +5770,6 @@
   pusha();
 
   // if we are coming from c1, xmm registers may be live
-  int off = 0;
   int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
   if (UseAVX > 2) {
     num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
@@ -4697,7 +5778,7 @@
   if (UseSSE == 1)  {
     subptr(rsp, sizeof(jdouble)*8);
     for (int n = 0; n < 8; n++) {
-      movflt(Address(rsp, off++*sizeof(jdouble)), as_XMMRegister(n));
+      movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
     }
   } else if (UseSSE >= 2)  {
     if (UseAVX > 2) {
@@ -4709,37 +5790,35 @@
 #ifdef COMPILER2
     if (MaxVectorSize > 16) {
       if(UseAVX > 2) {
-        // Save upper half of ZMM registes
+        // Save upper half of ZMM registers
         subptr(rsp, 32*num_xmm_regs);
         for (int n = 0; n < num_xmm_regs; n++) {
-          vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n));
+          vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
         }
-        off = 0;
       }
       assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
-      // Save upper half of YMM registes
+      // Save upper half of YMM registers
       subptr(rsp, 16*num_xmm_regs);
       for (int n = 0; n < num_xmm_regs; n++) {
-        vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
+        vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
       }
     }
 #endif
     // Save whole 128bit (16 bytes) XMM registers
     subptr(rsp, 16*num_xmm_regs);
-    off = 0;
 #ifdef _LP64
-    if (VM_Version::supports_avx512novl()) {
+    if (VM_Version::supports_evex()) {
       for (int n = 0; n < num_xmm_regs; n++) {
-        vextractf32x4h(Address(rsp, off++*16), as_XMMRegister(n), 0);
+        vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0);
       }
     } else {
       for (int n = 0; n < num_xmm_regs; n++) {
-        movdqu(Address(rsp, off++*16), as_XMMRegister(n));
+        movdqu(Address(rsp, n*16), as_XMMRegister(n));
       }
     }
 #else
     for (int n = 0; n < num_xmm_regs; n++) {
-      movdqu(Address(rsp, off++*16), as_XMMRegister(n));
+      movdqu(Address(rsp, n*16), as_XMMRegister(n));
     }
 #endif
   }
@@ -4808,44 +5887,40 @@
     addptr(rsp, sizeof(jdouble)*nb_args);
   }
 
-  off = 0;
   if (UseSSE == 1)  {
     for (int n = 0; n < 8; n++) {
-      movflt(as_XMMRegister(n), Address(rsp, off++*sizeof(jdouble)));
+      movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
     }
     addptr(rsp, sizeof(jdouble)*8);
   } else if (UseSSE >= 2)  {
-    // Restore whole 128bit (16 bytes) XMM regiters
+    // Restore whole 128bit (16 bytes) XMM registers
 #ifdef _LP64
-    if (VM_Version::supports_avx512novl()) {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        vinsertf32x4h(as_XMMRegister(n), Address(rsp, off++*16), 0);
-      }
-    }
-    else {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        movdqu(as_XMMRegister(n), Address(rsp, off++*16));
-      }
-    }
+  if (VM_Version::supports_evex()) {
+    for (int n = 0; n < num_xmm_regs; n++) {
+      vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0);
+    }
+  } else {
+    for (int n = 0; n < num_xmm_regs; n++) {
+      movdqu(as_XMMRegister(n), Address(rsp, n*16));
+    }
+  }
 #else
-    for (int n = 0; n < num_xmm_regs; n++) {
-      movdqu(as_XMMRegister(n), Address(rsp, off++ * 16));
-    }
+  for (int n = 0; n < num_xmm_regs; n++) {
+    movdqu(as_XMMRegister(n), Address(rsp, n*16));
+  }
 #endif
     addptr(rsp, 16*num_xmm_regs);
 
 #ifdef COMPILER2
     if (MaxVectorSize > 16) {
-      // Restore upper half of YMM registes.
-      off = 0;
+      // Restore upper half of YMM registers.
       for (int n = 0; n < num_xmm_regs; n++) {
-        vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
+        vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
       }
       addptr(rsp, 16*num_xmm_regs);
       if(UseAVX > 2) {
-        off = 0;
         for (int n = 0; n < num_xmm_regs; n++) {
-          vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32));
+          vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
         }
         addptr(rsp, 32*num_xmm_regs);
       }
@@ -6312,7 +7387,8 @@
                                       XMMRegister vec, Register tmp,
                                       int ae) {
   ShortBranchVerifier sbv(this);
-  assert(UseSSE42Intrinsics, "SSE4.2 is required");
+  assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
+  assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 
   // This method uses the pcmpestri instruction with bound registers
@@ -6490,7 +7566,8 @@
                                     XMMRegister vec, Register tmp,
                                     int ae) {
   ShortBranchVerifier sbv(this);
-  assert(UseSSE42Intrinsics, "SSE4.2 is required");
+  assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
+  assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 
   //
@@ -6807,7 +7884,8 @@
 void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
                                          XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
   ShortBranchVerifier sbv(this);
-  assert(UseSSE42Intrinsics, "SSE4.2 is required");
+  assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
+  assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
 
   int stride = 8;
 
@@ -6831,7 +7909,7 @@
 
     bind(SCAN_TO_16_CHAR_LOOP);
     vmovdqu(vec3, Address(result, 0));
-    vpcmpeqw(vec3, vec3, vec1, true);
+    vpcmpeqw(vec3, vec3, vec1, 1);
     vptest(vec2, vec3);
     jcc(Assembler::carryClear, FOUND_CHAR);
     addptr(result, 32);
@@ -6844,36 +7922,32 @@
     pshufd(vec1, vec1, 0);
     pxor(vec2, vec2);
   }
-  if (UseAVX >= 2 || UseSSE42Intrinsics) {
-    bind(SCAN_TO_8_CHAR);
-    cmpl(cnt1, stride);
-    if (UseAVX >= 2) {
-      jccb(Assembler::less, SCAN_TO_CHAR);
-    }
-    if (!(UseAVX >= 2)) {
-      jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
-      movdl(vec1, ch);
-      pshuflw(vec1, vec1, 0x00);
-      pshufd(vec1, vec1, 0);
-      pxor(vec2, vec2);
-    }
-    movl(tmp, cnt1);
-    andl(tmp, 0xFFFFFFF8);  //vector count (in chars)
-    andl(cnt1,0x00000007);  //tail count (in chars)
-
-    bind(SCAN_TO_8_CHAR_LOOP);
-    movdqu(vec3, Address(result, 0));
-    pcmpeqw(vec3, vec1);
-    ptest(vec2, vec3);
-    jcc(Assembler::carryClear, FOUND_CHAR);
-    addptr(result, 16);
-    subl(tmp, stride);
-    jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
-  }
+  bind(SCAN_TO_8_CHAR);
+  cmpl(cnt1, stride);
+  if (UseAVX >= 2) {
+    jccb(Assembler::less, SCAN_TO_CHAR);
+  } else {
+    jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
+    movdl(vec1, ch);
+    pshuflw(vec1, vec1, 0x00);
+    pshufd(vec1, vec1, 0);
+    pxor(vec2, vec2);
+  }
+  movl(tmp, cnt1);
+  andl(tmp, 0xFFFFFFF8);  //vector count (in chars)
+  andl(cnt1,0x00000007);  //tail count (in chars)
+
+  bind(SCAN_TO_8_CHAR_LOOP);
+  movdqu(vec3, Address(result, 0));
+  pcmpeqw(vec3, vec1);
+  ptest(vec2, vec3);
+  jcc(Assembler::carryClear, FOUND_CHAR);
+  addptr(result, 16);
+  subl(tmp, stride);
+  jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
   bind(SCAN_TO_CHAR);
   testl(cnt1, cnt1);
   jcc(Assembler::zero, RET_NOT_FOUND);
-
   bind(SCAN_TO_CHAR_LOOP);
   load_unsigned_short(tmp, Address(result, 0));
   cmpl(ch, tmp);
@@ -6887,16 +7961,14 @@
   movl(result, -1);
   jmpb(DONE_LABEL);
 
-  if (UseAVX >= 2 || UseSSE42Intrinsics) {
-    bind(FOUND_CHAR);
-    if (UseAVX >= 2) {
-      vpmovmskb(tmp, vec3);
-    } else {
-      pmovmskb(tmp, vec3);
-    }
-    bsfl(ch, tmp);
-    addl(result, ch);
-  }
+  bind(FOUND_CHAR);
+  if (UseAVX >= 2) {
+    vpmovmskb(tmp, vec3);
+  } else {
+    pmovmskb(tmp, vec3);
+  }
+  bsfl(ch, tmp);
+  addl(result, ch);
 
   bind(FOUND_SEQ_CHAR);
   subptr(result, str1);
@@ -6985,6 +8057,7 @@
   }
 
   if (UseAVX >= 2 && UseSSE42Intrinsics) {
+    assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
     Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
     Label COMPARE_TAIL_LONG;
@@ -7059,7 +8132,7 @@
       vmovdqu(vec1, Address(str1, result, scale));
       vpxor(vec1, Address(str2, result, scale));
     } else {
-      vpmovzxbw(vec1, Address(str1, result, scale1));
+      vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_256bit);
       vpxor(vec1, Address(str2, result, scale2));
     }
     vptest(vec1, vec1);
@@ -7120,6 +8193,7 @@
 
     bind(COMPARE_SMALL_STR);
   } else if (UseSSE42Intrinsics) {
+    assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
     int pcmpmask = 0x19;
     // Setup to compare 8-char (16-byte) vectors,
@@ -7252,7 +8326,7 @@
 
   movl(result, len); // copy
 
-  if (UseAVX >= 2) {
+  if (UseAVX >= 2 && UseSSE >= 2) {
     // With AVX2, use 32-byte vector compare
     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
 
@@ -7287,6 +8361,7 @@
     movl(len, result);
     // Fallthru to tail compare
   } else if (UseSSE42Intrinsics) {
+    assert(UseSSE >= 4, "SSE4 must be  for SSE4.2 intrinsics to be available");
     // With SSE4.2, use double quad vector compare
     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
 
@@ -7363,7 +8438,7 @@
 
   // That's it
   bind(DONE);
-  if (UseAVX >= 2) {
+  if (UseAVX >= 2 && UseSSE >= 2) {
     // clean upper bits of YMM registers
     vpxor(vec1, vec1);
     vpxor(vec2, vec2);
@@ -7451,6 +8526,7 @@
     movl(limit, result);
     // Fallthru to tail compare
   } else if (UseSSE42Intrinsics) {
+    assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
     // With SSE4.2, use double quad vector compare
     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
 
@@ -7672,7 +8748,7 @@
         BIND(L_check_fill_32_bytes);
         addl(count, 8 << shift);
         jccb(Assembler::less, L_check_fill_8_bytes);
-        evmovdqul(Address(to, 0), xtmp, Assembler::AVX_256bit);
+        vmovdqu(Address(to, 0), xtmp);
         addptr(to, 32);
         subl(count, 8 << shift);
 
@@ -7800,6 +8876,7 @@
   negptr(len);
 
   if (UseSSE42Intrinsics || UseAVX >= 2) {
+    assert(UseSSE42Intrinsics ? UseSSE >= 4 : true, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
     Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit;
     Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit;
 
@@ -9572,6 +10649,7 @@
   push(len);
 
   if (UseSSE42Intrinsics) {
+    assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
     Label copy_32_loop, copy_16, copy_tail;
 
     movl(result, len);
@@ -9671,6 +10749,7 @@
   assert_different_registers(src, dst, len, tmp2);
 
   if (UseSSE42Intrinsics) {
+    assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
     Label copy_8_loop, copy_bytes, copy_tail;
 
     movl(tmp2, len);

--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -962,10 +962,15 @@
   void divss(XMMRegister dst, AddressLiteral src);
 
   // Move Unaligned Double Quadword
-  void movdqu(Address     dst, XMMRegister src)   { Assembler::movdqu(dst, src); }
-  void movdqu(XMMRegister dst, Address src)       { Assembler::movdqu(dst, src); }
-  void movdqu(XMMRegister dst, XMMRegister src)   { Assembler::movdqu(dst, src); }
+  void movdqu(Address     dst, XMMRegister src);
+  void movdqu(XMMRegister dst, Address src);
+  void movdqu(XMMRegister dst, XMMRegister src);
   void movdqu(XMMRegister dst, AddressLiteral src);
+  // AVX Unaligned forms
+  void vmovdqu(Address     dst, XMMRegister src);
+  void vmovdqu(XMMRegister dst, Address src);
+  void vmovdqu(XMMRegister dst, XMMRegister src);
+  void vmovdqu(XMMRegister dst, AddressLiteral src);
 
   // Move Aligned Double Quadword
   void movdqa(XMMRegister dst, Address src)       { Assembler::movdqa(dst, src); }
@@ -999,6 +1004,19 @@
     Assembler::pclmulqdq(dst, src, 0x11);
   }
 
+  void pcmpeqb(XMMRegister dst, XMMRegister src);
+  void pcmpeqw(XMMRegister dst, XMMRegister src);
+
+  void pcmpestri(XMMRegister dst, Address src, int imm8);
+  void pcmpestri(XMMRegister dst, XMMRegister src, int imm8);
+
+  void pmovzxbw(XMMRegister dst, XMMRegister src);
+  void pmovzxbw(XMMRegister dst, Address src);
+
+  void pmovmskb(Register dst, XMMRegister src);
+
+  void ptest(XMMRegister dst, XMMRegister src);
+
   void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, AddressLiteral src);
@@ -1024,12 +1042,12 @@
   void ucomisd(XMMRegister dst, AddressLiteral src);
 
   // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
-  void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
+  void xorpd(XMMRegister dst, XMMRegister src);
   void xorpd(XMMRegister dst, Address src)     { Assembler::xorpd(dst, src); }
   void xorpd(XMMRegister dst, AddressLiteral src);
 
   // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
-  void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
+  void xorps(XMMRegister dst, XMMRegister src);
   void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
   void xorps(XMMRegister dst, AddressLiteral src);
 
@@ -1047,6 +1065,49 @@
   void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
   void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
+  void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
+  void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
+
+  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+
+  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+
+  void vpbroadcastw(XMMRegister dst, XMMRegister src);
+
+  void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
+  void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
+  void vpmovmskb(Register dst, XMMRegister src);
+
+  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+
+  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+
+  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+
+  void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
+  void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
+
+  void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
+  void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
+
+  void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
+  void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
+
+  void vptest(XMMRegister dst, XMMRegister src);
+
+  void punpcklbw(XMMRegister dst, XMMRegister src);
+  void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
+
+  void pshuflw(XMMRegister dst, XMMRegister src, int mode);
+  void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }
+
   void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
   void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len)     { Assembler::vandpd(dst, nds, src, vector_len); }
   void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);

--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -192,31 +192,22 @@
     }
   } else if(UseSSE >= 2) {
     // Save whole 128bit (16 bytes) XMM regiters
-    if (VM_Version::supports_avx512novl()) {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ vextractf32x4h(Address(rsp, off*wordSize), as_XMMRegister(n), 0);
-        off += delta;
-      }
-    } else {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
-        off += delta;
-      }
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
+      off += delta;
     }
   }
 
-  if (vect_words > 0) {
+  if (save_vectors) {
     assert(vect_words*wordSize == 128, "");
     __ subptr(rsp, 128); // Save upper half of YMM registes
-    off = 0;
     for (int n = 0; n < num_xmm_regs; n++) {
-      __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
+      __ vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
     }
     if (UseAVX > 2) {
       __ subptr(rsp, 256); // Save upper half of ZMM registes
-      off = 0;
       for (int n = 0; n < num_xmm_regs; n++) {
-        __ vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n));
+        __ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
       }
     }
   }
@@ -275,44 +266,39 @@
 #else
   assert(!restore_vectors, "vectors are generated only by C2");
 #endif
+
+  if (restore_vectors) {
+    assert(additional_frame_bytes == 128, "");
+    if (UseAVX > 2) {
+      // Restore upper half of ZMM registers.
+      for (int n = 0; n < num_xmm_regs; n++) {
+        __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
+      }
+      __ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes
+    }
+    // Restore upper half of YMM registes.
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
+    }
+    __ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes
+  }
+
   int off = xmm0_off;
   int delta = xmm1_off - off;
 
   if (UseSSE == 1) {
-    assert(additional_frame_bytes == 0, "");
     for (int n = 0; n < num_xmm_regs; n++) {
       __ movflt(as_XMMRegister(n), Address(rsp, off*wordSize));
       off += delta;
     }
   } else if (UseSSE >= 2) {
-    if (VM_Version::supports_avx512novl()) {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ vinsertf32x4h(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes), 0);
-        off += delta;
-      }
-    } else {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
-        off += delta;
-      }
+    // additional_frame_bytes only populated for the restore_vector case, else it is 0
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
+      off += delta;
     }
   }
-  if (restore_vectors) {
-    if (UseAVX > 2) {
-      off = 0;
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32));
-      }
-      __ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes
-    }
-    // Restore upper half of YMM registes.
-    assert(additional_frame_bytes == 128, "");
-    off = 0;
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
-    }
-    __ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes
-  }
+
   __ pop_FPU_state();
   __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers
 
@@ -2562,7 +2548,8 @@
 
   oop_maps->add_gc_map( __ pc()-start, map);
 
-  // Discard arg to fetch_unroll_info
+  // Discard args to fetch_unroll_info
+  __ pop(rcx);
   __ pop(rcx);
 
   __ get_thread(rcx);
@@ -2575,9 +2562,8 @@
   // we are very short of registers
 
   Address unpack_kind(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
-  // retrieve the deopt kind from where we left it.
-  __ pop(rax);
-  __ movl(unpack_kind, rax);                      // save the unpack_kind value
+  // retrieve the deopt kind from the UnrollBlock.
+  __ movl(rax, unpack_kind);
 
    Label noException;
   __ cmpl(rax, Deoptimization::Unpack_exception);   // Was exception pending?
@@ -2787,11 +2773,12 @@
   enum frame_layout {
     arg0_off,      // thread                     sp + 0 // Arg location for
     arg1_off,      // unloaded_class_index       sp + 1 // calling C
+    arg2_off,      // exec_mode                  sp + 2
     // The frame sender code expects that rbp will be in the "natural" place and
     // will override any oopMap setting for it. We must therefore force the layout
     // so that it agrees with the frame sender code.
-    rbp_off,       // callee saved register      sp + 2
-    return_off,    // slot for return address    sp + 3
+    rbp_off,       // callee saved register      sp + 3
+    return_off,    // slot for return address    sp + 4
     framesize
   };
 
@@ -2823,6 +2810,7 @@
   __ movptr(Address(rsp, arg0_off*wordSize), rdx);
   // argument already in ECX
   __ movl(Address(rsp, arg1_off*wordSize),rcx);
+  __ movl(Address(rsp, arg2_off*wordSize), Deoptimization::Unpack_uncommon_trap);
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
 
   // Set an oopmap for the call site
@@ -2839,6 +2827,16 @@
   // Load UnrollBlock into EDI
   __ movptr(rdi, rax);
 
+#ifdef ASSERT
+  { Label L;
+    __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()),
+            (int32_t)Deoptimization::Unpack_uncommon_trap);
+    __ jcc(Assembler::equal, L);
+    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
+    __ bind(L);
+  }
+#endif
+
   // Pop all the frames we must move/replace.
   //
   // Frame picture (youngest to oldest)

--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -72,45 +72,28 @@
 class RegisterSaver {
   // Capture info about frame layout.  Layout offsets are in jint
   // units because compiler frame slots are jints.
-#define HALF_ZMM_BANK_WORDS 128
+#define XSAVE_AREA_BEGIN 160
+#define XSAVE_AREA_YMM_BEGIN 576
+#define XSAVE_AREA_ZMM_BEGIN 1152
+#define XSAVE_AREA_UPPERBANK 1664
 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
+#define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
   enum layout {
     fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
-    xmm_off       = fpu_state_off + 160/BytesPerInt,            // offset in fxsave save area
+    xmm_off       = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt,            // offset in fxsave save area
     DEF_XMM_OFFS(0),
     DEF_XMM_OFFS(1),
-    DEF_XMM_OFFS(2),
-    DEF_XMM_OFFS(3),
-    DEF_XMM_OFFS(4),
-    DEF_XMM_OFFS(5),
-    DEF_XMM_OFFS(6),
-    DEF_XMM_OFFS(7),
-    DEF_XMM_OFFS(8),
-    DEF_XMM_OFFS(9),
-    DEF_XMM_OFFS(10),
-    DEF_XMM_OFFS(11),
-    DEF_XMM_OFFS(12),
-    DEF_XMM_OFFS(13),
-    DEF_XMM_OFFS(14),
-    DEF_XMM_OFFS(15),
-    zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt),
+    // 2..15 are implied in range usage
+    ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
+    DEF_YMM_OFFS(0),
+    DEF_YMM_OFFS(1),
+    // 2..15 are implied in range usage
+    zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
+    zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt,
     DEF_ZMM_OFFS(16),
     DEF_ZMM_OFFS(17),
-    DEF_ZMM_OFFS(18),
-    DEF_ZMM_OFFS(19),
-    DEF_ZMM_OFFS(20),
-    DEF_ZMM_OFFS(21),
-    DEF_ZMM_OFFS(22),
-    DEF_ZMM_OFFS(23),
-    DEF_ZMM_OFFS(24),
-    DEF_ZMM_OFFS(25),
-    DEF_ZMM_OFFS(26),
-    DEF_ZMM_OFFS(27),
-    DEF_ZMM_OFFS(28),
-    DEF_ZMM_OFFS(29),
-    DEF_ZMM_OFFS(30),
-    DEF_ZMM_OFFS(31),
+    // 18..31 are implied in range usage
     fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
     fpu_stateH_end,
     r15_off, r15H_off,
@@ -160,8 +143,6 @@
 };
 
 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
-  int vect_words = 0;
-  int ymmhi_offset = -1;
   int off = 0;
   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
   if (UseAVX < 3) {
@@ -171,24 +152,15 @@
   if (save_vectors) {
     assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
     assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
-    // Save upper half of YMM registers
-    vect_words = 16 * num_xmm_regs / wordSize;
-    if (UseAVX < 3) {
-      ymmhi_offset = additional_frame_words;
-      additional_frame_words += vect_words;
-    }
   }
 #else
   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 #endif
 
-  // Always make the frame size 16-byte aligned
-  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
-                                     reg_save_size*BytesPerInt, num_xmm_regs);
+  // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
+  int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs);
   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
-  // The caller will allocate additional_frame_words
-  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
   // CodeBlob frame size is in words.
   int frame_size_in_words = frame_size_in_bytes / wordSize;
   *total_frame_words = frame_size_in_words;
@@ -203,12 +175,34 @@
   __ push_CPU_state(); // Push a multiple of 16 bytes
 
   // push cpu state handles this on EVEX enabled targets
-  if ((vect_words > 0) && (UseAVX < 3)) {
-    assert(vect_words*wordSize >= 256, "");
-    // Save upper half of YMM registes(0..num_xmm_regs)
-    __ subptr(rsp, num_xmm_regs*16);
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
+  if (save_vectors) {
+    // Save upper half of YMM registes(0..15)
+    int base_addr = XSAVE_AREA_YMM_BEGIN;
+    for (int n = 0; n < 16; n++) {
+      __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n));
+    }
+    if (VM_Version::supports_evex()) {
+      // Save upper half of ZMM registes(0..15)
+      base_addr = XSAVE_AREA_ZMM_BEGIN;
+      for (int n = 0; n < 16; n++) {
+        __ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1);
+      }
+      // Save full ZMM registes(16..num_xmm_regs)
+      base_addr = XSAVE_AREA_UPPERBANK;
+      int off = 0;
+      int vector_len = Assembler::AVX_512bit;
+      for (int n = 16; n < num_xmm_regs; n++) {
+        __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
+      }
+    }
+  } else {
+    if (VM_Version::supports_evex()) {
+      // Save upper bank of ZMM registers(16..31) for double/float usage
+      int base_addr = XSAVE_AREA_UPPERBANK;
+      int off = 0;
+      for (int n = 16; n < num_xmm_regs; n++) {
+        __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
+      }
     }
   }
   if (frame::arg_reg_save_area_bytes != 0) {
@@ -224,8 +218,7 @@
   OopMapSet *oop_maps = new OopMapSet();
   OopMap* map = new OopMap(frame_size_in_slots, 0);
 
-#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
-#define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset)
+#define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
 
   map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
   map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
@@ -257,31 +250,21 @@
     off = zmm16_off;
     delta = zmm17_off - off;
     for (int n = 16; n < num_xmm_regs; n++) {
-      XMMRegister xmm_name = as_XMMRegister(n);
-      map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
+      XMMRegister zmm_name = as_XMMRegister(n);
+      map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg());
       off += delta;
     }
   }
 
 #if defined(COMPILER2) || INCLUDE_JVMCI
   if (save_vectors) {
-    assert(ymmhi_offset != -1, "save area must exist");
-    map->set_callee_saved(YMMHI_STACK_OFFSET(  0), xmm0->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next(4));
+    off = ymm0_off;
+    int delta = ymm1_off - off;
+    for (int n = 0; n < 16; n++) {
+      XMMRegister ymm_name = as_XMMRegister(n);
+      map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4));
+      off += delta;
+    }
   }
 #endif // COMPILER2 || INCLUDE_JVMCI
 
@@ -316,8 +299,8 @@
       off = zmm16H_off;
       delta = zmm17H_off - off;
       for (int n = 16; n < num_xmm_regs; n++) {
-        XMMRegister xmm_name = as_XMMRegister(n);
-        map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
+        XMMRegister zmm_name = as_XMMRegister(n);
+        map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
         off += delta;
       }
     }
@@ -335,21 +318,48 @@
     // Pop arg register save area
     __ addptr(rsp, frame::arg_reg_save_area_bytes);
   }
+
 #if defined(COMPILER2) || INCLUDE_JVMCI
-  // On EVEX enabled targets everything is handled in pop fpu state
-  if ((restore_vectors) && (UseAVX < 3)) {
-    assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX");
-    assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");
-    int off = 0;
-    // Restore upper half of YMM registes (0..num_xmm_regs)
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ vinsertf128h(as_XMMRegister(n), Address(rsp,  off++*16));
-    }
-    __ addptr(rsp, num_xmm_regs*16);
+  if (restore_vectors) {
+    assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
+    assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
   }
 #else
-  assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
+  assert(!save_vectors, "vectors are generated only by C2");
 #endif
+
+  // On EVEX enabled targets everything is handled in pop fpu state
+  if (restore_vectors) {
+    // Restore upper half of YMM registes (0..15)
+    int base_addr = XSAVE_AREA_YMM_BEGIN;
+    for (int n = 0; n < 16; n++) {
+      __ vinsertf128h(as_XMMRegister(n), Address(rsp,  base_addr+n*16));
+    }
+    if (VM_Version::supports_evex()) {
+      // Restore upper half of ZMM registes (0..15)
+      base_addr = XSAVE_AREA_ZMM_BEGIN;
+      for (int n = 0; n < 16; n++) {
+        __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1);
+      }
+      // Restore full ZMM registes(16..num_xmm_regs)
+      base_addr = XSAVE_AREA_UPPERBANK;
+      int vector_len = Assembler::AVX_512bit;
+      int off = 0;
+      for (int n = 16; n < num_xmm_regs; n++) {
+        __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
+      }
+    }
+  } else {
+    if (VM_Version::supports_evex()) {
+      // Restore upper bank of ZMM registes(16..31) for double/float usage
+      int base_addr = XSAVE_AREA_UPPERBANK;
+      int off = 0;
+      for (int n = 16; n < num_xmm_regs; n++) {
+        __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)));
+      }
+    }
+  }
+
   // Recover CPU state
   __ pop_CPU_state();
   // Get the rbp described implicitly by the calling convention (no oopMap)
@@ -2819,6 +2829,7 @@
 
     __ movl(r14, (int32_t)Deoptimization::Unpack_reexecute);
     __ mov(c_rarg0, r15_thread);
+    __ movl(c_rarg2, r14); // exec mode
     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
     oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
 
@@ -2905,6 +2916,7 @@
   }
 #endif // ASSERT
   __ mov(c_rarg0, r15_thread);
+  __ movl(c_rarg1, r14); // exec_mode
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
 
   // Need to have an oopmap that tells fetch_unroll_info where to
@@ -2922,6 +2934,7 @@
   // Load UnrollBlock* into rdi
   __ mov(rdi, rax);
 
+  __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
    Label noException;
   __ cmpl(r14, Deoptimization::Unpack_exception);   // Was exception pending?
   __ jcc(Assembler::notEqual, noException);
@@ -3140,6 +3153,7 @@
   // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
 
   __ mov(c_rarg0, r15_thread);
+  __ movl(c_rarg2, Deoptimization::Unpack_uncommon_trap);
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
 
   // Set an oopmap for the call site
@@ -3155,6 +3169,16 @@
   // Load UnrollBlock* into rdi
   __ mov(rdi, rax);
 
+#ifdef ASSERT
+  { Label L;
+    __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()),
+            (int32_t)Deoptimization::Unpack_uncommon_trap);
+    __ jcc(Assembler::equal, L);
+    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
+    __ bind(L);
+  }
+#endif
+
   // Pop all the frames we must move/replace.
   //
   // Frame picture (youngest to oldest)

--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -273,7 +273,7 @@
     if (UseAVX > 2) {
       last_reg = 31;
     }
-    if (VM_Version::supports_avx512novl()) {
+    if (VM_Version::supports_evex()) {
       for (int i = xmm_save_first; i <= last_reg; i++) {
         __ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0);
       }
@@ -391,7 +391,7 @@
     // restore regs belonging to calling function
 #ifdef _WIN64
     // emit the restores for xmm regs
-    if (VM_Version::supports_avx512novl()) {
+    if (VM_Version::supports_evex()) {
       for (int i = xmm_save_first; i <= last_reg; i++) {
         __ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0);
       }
@@ -1439,8 +1439,8 @@
       // Copy 64-bytes per iteration
       __ BIND(L_loop);
       if (UseAVX > 2) {
-        __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 32), Assembler::AVX_512bit);
-        __ evmovdqul(Address(dest, qword_count, Address::times_8, 32), xmm0, Assembler::AVX_512bit);
+        __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit);
+        __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit);
       } else if (UseAVX == 2) {
         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
         __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);

--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -632,12 +632,36 @@
   // Use AES instructions if available.
   if (supports_aes()) {
     if (FLAG_IS_DEFAULT(UseAES)) {
-      UseAES = true;
+      FLAG_SET_DEFAULT(UseAES, true);
     }
-  } else if (UseAES) {
-    if (!FLAG_IS_DEFAULT(UseAES))
+    if (!UseAES) {
+      if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+        warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
+      }
+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+    } else {
+      if (UseSSE > 2) {
+        if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+          FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+        }
+      } else {
+        // The AES intrinsic stubs require AES instruction support (of course)
+        // but also require sse3 mode or higher for instructions it use.
+        if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+          warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
+        }
+        FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+      }
+    }
+  } else if (UseAES || UseAESIntrinsics) {
+    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
       warning("AES instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAES, false);
+      FLAG_SET_DEFAULT(UseAES, false);
+    }
+    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+      warning("AES intrinsics are not available on this CPU");
+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+    }
   }
 
   // Use CLMUL instructions if available.
@@ -673,18 +697,6 @@
     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
   }
 
-  // The AES intrinsic stubs require AES instruction support (of course)
-  // but also require sse3 mode for instructions it use.
-  if (UseAES && (UseSSE > 2)) {
-    if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
-      UseAESIntrinsics = true;
-    }
-  } else if (UseAESIntrinsics) {
-    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
-      warning("AES intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
-  }
-
   // GHASH/GCM intrinsics
   if (UseCLMUL && (UseSSE > 2)) {
     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
@@ -891,7 +903,7 @@
       UseNewLongLShift = true;
     }
     if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
-      if( supports_sse4a() ) {
+      if (supports_sse4a()) {
         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
       } else {
         UseXmmLoadAndClearUpper = false;
@@ -918,10 +930,15 @@
         UseXmmI2D = false;
       }
     }
-    if( FLAG_IS_DEFAULT(UseSSE42Intrinsics) ) {
-      if( supports_sse4_2() && UseSSE >= 4 ) {
-        UseSSE42Intrinsics = true;
+    if (supports_sse4_2() && UseSSE >= 4) {
+      if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
       }
+    } else {
+      if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+        warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
+      }
+      FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
     }
 
     // some defaults for AMD family 15h
@@ -995,8 +1012,13 @@
       }
       if (supports_sse4_2() && UseSSE >= 4) {
         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
-          UseSSE42Intrinsics = true;
+          FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
         }
+      } else {
+        if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+          warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
+        }
+        FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
       }
     }
     if ((cpu_family() == 0x06) &&

--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -552,6 +552,19 @@
           break;
         }
       }
+      // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
+      if (retVal == false) {
+        // Verify that OS save/restore all bits of EVEX registers
+        // during signal processing.
+        int nreg = 2 LP64_ONLY(+2);
+        retVal = true;
+        for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
+          if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
+            retVal = false;
+            break;
+          }
+        }
+      }
     }
     return retVal;
   }
@@ -706,6 +719,9 @@
   static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; }
   static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
   static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
+  static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
+  static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
+  static bool supports_avxonly()    { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
   // Intel features
   static bool is_intel_family_core() { return is_intel() &&
                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }

--- a/hotspot/src/cpu/x86/vm/x86.ad	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/x86.ad	Tue Nov 24 10:30:23 2015 +0100
@@ -1716,6 +1716,36 @@
   return ret_value;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  if (ret_value) {
+    switch (opcode) {
+      case Op_AddVB:
+      case Op_SubVB:
+        if ((vlen == 64) && (VM_Version::supports_avx512bw() == false))
+          ret_value = false;
+        break;
+      case Op_URShiftVS:
+      case Op_RShiftVS:
+      case Op_LShiftVS:
+      case Op_MulVS:
+      case Op_AddVS:
+      case Op_SubVS:
+        if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
+          ret_value = false;
+        break;
+      case Op_CMoveVD:
+        if (vlen != 4)
+          ret_value  = false;
+        break;
+    }
+  }
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   int float_pressure_threshold = default_pressure_threshold;
 #ifdef _LP64
@@ -1759,11 +1789,9 @@
     break;
   case T_BYTE:
     if (size < 4) return 0;
-    if ((size > 32) && !VM_Version::supports_avx512bw()) return 0;
     break;
   case T_SHORT:
     if (size < 4) return 0;
-    if ((size > 16) && !VM_Version::supports_avx512bw()) return 0;
     break;
   default:
     ShouldNotReachHere();
@@ -1967,27 +1995,34 @@
   bool is_single_byte = false;
   int vec_len = 0;
   if ((UseAVX > 2) && (stack_offset != 0)) {
+    int tuple_type = Assembler::EVEX_FVM;
+    int input_size = Assembler::EVEX_32bit;
     switch (ireg) {
-	case Op_VecS:
+    case Op_VecS:
+      tuple_type = Assembler::EVEX_T1S;
+      break;
     case Op_VecD:
+      tuple_type = Assembler::EVEX_T1S;
+      input_size = Assembler::EVEX_64bit;
+      break;
     case Op_VecX:
-	  break;
-	case Op_VecY:
-	  vec_len = 1;
-	  break;
+      break;
+    case Op_VecY:
+      vec_len = 1;
+      break;
     case Op_VecZ:
-	  vec_len = 2;
-	  break;
+      vec_len = 2;
+      break;
     }
-    is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0);
+    is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
   }
   int offset_size = 0;
   int size = 5;
   if (UseAVX > 2 ) {
-    if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) { 
+    if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
       size += 2; // Need an additional two bytes for EVEX encoding
-    } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) { 
+    } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
       offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
     } else {
       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
@@ -2711,7 +2746,21 @@
 %}
 
 instruct absF_reg_reg(regF dst, regF src) %{
-  predicate(UseAVX > 0);
+  predicate(VM_Version::supports_avxonly());
+  match(Set dst (AbsF src));
+  ins_cost(150);
+  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(float_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+#ifdef _LP64
+instruct absF_reg_reg_evex(regF dst, regF src) %{
+  predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (AbsF src));
   ins_cost(150);
   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
@@ -2723,6 +2772,34 @@
   ins_pipe(pipe_slow);
 %}
 
+instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{
+  predicate(VM_Version::supports_avx512novl());
+  match(Set dst (AbsF src1));
+  effect(TEMP src2);
+  ins_cost(150);
+  format %{ "vabsss  $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
+              ExternalAddress(float_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+#else // _LP64
+instruct absF_reg_reg_evex(regF dst, regF src) %{
+  predicate(UseAVX > 2);
+  match(Set dst (AbsF src));
+  ins_cost(150);
+  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(float_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+#endif
+
 instruct absD_reg(regD dst) %{
   predicate((UseSSE>=2) && (UseAVX == 0));
   match(Set dst (AbsD dst));
@@ -2736,7 +2813,22 @@
 %}
 
 instruct absD_reg_reg(regD dst, regD src) %{
-  predicate(UseAVX > 0);
+  predicate(VM_Version::supports_avxonly());
+  match(Set dst (AbsD src));
+  ins_cost(150);
+  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
+            "# abs double by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(double_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+#ifdef _LP64
+instruct absD_reg_reg_evex(regD dst, regD src) %{
+  predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (AbsD src));
   ins_cost(150);
   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
@@ -2749,6 +2841,35 @@
   ins_pipe(pipe_slow);
 %}
 
+instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{
+  predicate(VM_Version::supports_avx512novl());
+  match(Set dst (AbsD src1));
+  effect(TEMP src2);
+  ins_cost(150);
+  format %{ "vabssd  $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
+              ExternalAddress(double_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+#else // _LP64
+instruct absD_reg_reg_evex(regD dst, regD src) %{
+  predicate(UseAVX > 2);
+  match(Set dst (AbsD src));
+  ins_cost(150);
+  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
+            "# abs double by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(double_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+#endif
+
 instruct negF_reg(regF dst) %{
   predicate((UseSSE>=1) && (UseAVX == 0));
   match(Set dst (NegF dst));
@@ -4554,7 +4675,7 @@
 %}
 
 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
-  predicate(UseAVX > 0 && UseAVX < 3);
+  predicate(VM_Version::supports_avxonly());
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
@@ -4594,37 +4715,37 @@
 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
   predicate(UseSSE > 2 && UseAVX == 0);
   match(Set dst (AddReductionVI src1 src2));
-  effect(TEMP tmp2, TEMP tmp);
-  format %{ "movdqu  $tmp2,$src2\n\t"
-            "phaddd  $tmp2,$tmp2\n\t"
-            "phaddd  $tmp2,$tmp2\n\t"
-            "movd    $tmp,$src1\n\t"
-            "paddd   $tmp,$tmp2\n\t"
-            "movd    $dst,$tmp\t! add reduction4I" %}
-  ins_encode %{
-    __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
-    __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
-    __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdl($tmp$$XMMRegister, $src1$$Register);
-    __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdl($dst$$Register, $tmp$$XMMRegister);
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "movdqu  $tmp,$src2\n\t"
+            "phaddd  $tmp,$tmp\n\t"
+            "phaddd  $tmp,$tmp\n\t"
+            "movd    $tmp2,$src1\n\t"
+            "paddd   $tmp2,$tmp\n\t"
+            "movd    $dst,$tmp2\t! add reduction4I" %}
+  ins_encode %{
+    __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
+    __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
+    __ movdl($tmp2$$XMMRegister, $src1$$Register);
+    __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ movdl($dst$$Register, $tmp2$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
-  predicate(UseAVX > 0 && UseAVX < 3);
+  predicate(VM_Version::supports_avxonly());
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
-            "vphaddd  $tmp,$tmp,$tmp2\n\t"
+            "vphaddd  $tmp,$tmp,$tmp\n\t"
             "movd     $tmp2,$src1\n\t"
             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
             "movd     $dst,$tmp2\t! add reduction4I" %}
   ins_encode %{
     int vector_len = 0;
     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
-    __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+    __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
     __ movdl($tmp2$$XMMRegister, $src1$$Register);
     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
     __ movdl($dst$$Register, $tmp2$$XMMRegister);
@@ -4657,7 +4778,7 @@
 %}
 
 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
-  predicate(UseAVX > 0 && UseAVX < 3);
+  predicate(VM_Version::supports_avxonly());
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
@@ -4712,7 +4833,7 @@
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vextracti64x4  $tmp3,$src2\n\t"
+  format %{ "vextracti64x4  $tmp3,$src2,0x1\n\t"
             "vpaddd  $tmp3,$tmp3,$src2\n\t"
             "vextracti128   $tmp,$tmp3\n\t"
             "vpaddd  $tmp,$tmp,$tmp3\n\t"
@@ -4724,7 +4845,7 @@
             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
             "movd    $dst,$tmp2\t! mul reduction16I" %}
   ins_encode %{
-    __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
     __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
@@ -4763,7 +4884,7 @@
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
+  format %{ "vextracti128  $tmp,$src2\n\t"
             "vpaddq  $tmp2,$tmp,$src2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
@@ -4771,7 +4892,7 @@
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
             "movdq   $dst,$tmp2\t! add reduction4L" %}
   ins_encode %{
-    __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -4786,7 +4907,7 @@
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti64x4  $tmp2,$src2\n\t"
+  format %{ "vextracti64x4  $tmp2,$src2,0x1\n\t"
             "vpaddq  $tmp2,$tmp2,$src2\n\t"
             "vextracti128   $tmp,$tmp2\n\t"
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
@@ -4796,7 +4917,7 @@
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
             "movdq   $dst,$tmp2\t! add reduction8L" %}
   ins_encode %{
-    __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -4810,290 +4931,280 @@
 %}
 #endif
 
-instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "addss   $tmp,$src2\n\t"
-            "pshufd  $tmp2,$src2,0x01\n\t"
-            "addss   $tmp,$tmp2\n\t"
-            "movdqu  $dst,$tmp\t! add reduction2F" %}
-  ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "addss   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "addss   $dst,$tmp\t! add reduction2F" %}
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp2, TEMP tmp);
-  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vaddss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vaddss  $dst,$tmp2,$tmp\t! add reduction2F" %}
-  ins_encode %{
-    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddss  $dst,$dst,$tmp\t! add reduction2F" %}
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "addss   $tmp,$src2\n\t"
-            "pshufd  $tmp2,$src2,0x01\n\t"
-            "addss   $tmp,$tmp2\n\t"
-            "pshufd  $tmp2,$src2,0x02\n\t"
-            "addss   $tmp,$tmp2\n\t"
-            "pshufd  $tmp2,$src2,0x03\n\t"
-            "addss   $tmp,$tmp2\n\t"
-            "movdqu  $dst,$tmp\t! add reduction4F" %}
-  ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "addss   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "addss   $dst,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x02\n\t"
+            "addss   $dst,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x03\n\t"
+            "addss   $dst,$tmp\t! add reduction4F" %}
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+    __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+    __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "vaddss  $dst,dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vaddss  $dst,$tmp2,$tmp\t! add reduction4F" %}
-  ins_encode %{
-    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddss  $dst,$dst,$tmp\t! add reduction4F" %}
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
   predicate(UseAVX > 0);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vaddss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf128  $tmp3,$src2\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vaddss  $dst,$tmp2,$tmp\t! add reduction8F" %}
-  ins_encode %{
-    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "vextractf128  $tmp2,$src2\n\t"
+            "vaddss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vaddss  $dst,$dst,$tmp\t! add reduction8F" %}
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
   predicate(UseAVX > 2);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vaddss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x1\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x2\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x3\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vaddss  $dst,$tmp2,$tmp\t! add reduction16F" %}
-  ins_encode %{
-    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vaddss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vaddss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vaddss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vaddss  $dst,$dst,$tmp\t! add reduction16F" %}
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (AddReductionVD src1 src2));
+  match(Set dst (AddReductionVD dst src2));
   effect(TEMP tmp, TEMP dst);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "addsd   $tmp,$src2\n\t"
-            "pshufd  $dst,$src2,0xE\n\t"
+  format %{ "addsd   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0xE\n\t"
             "addsd   $dst,$tmp\t! add reduction2D" %}
   ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ addsd($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
+instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (AddReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "vaddsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vaddsd  $dst,$tmp2,$tmp\t! add reduction2D" %}
-  ins_encode %{
-    __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddsd  $dst,$dst,$tmp\t! add reduction2D" %}
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
   predicate(UseAVX > 0);
-  match(Set dst (AddReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vaddsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf128  $tmp3,$src2\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vaddsd  $dst,$tmp2,$tmp\t! add reduction4D" %}
-  ins_encode %{
-    __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4h  $tmp2,$src2, 0x1\n\t"
+            "vaddsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vaddsd  $dst,$dst,$tmp\t! add reduction4D" %}
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
   predicate(UseAVX > 2);
-  match(Set dst (AddReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vaddsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x1\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x2\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x3\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vaddsd  $dst,$tmp2,$tmp\t! add reduction8D" %}
-  ins_encode %{
-    __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vaddsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vaddsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vaddsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vaddsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vaddsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vaddsd  $dst,$dst,$tmp\t! add reduction8D" %}
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -5216,7 +5327,7 @@
   predicate(UseAVX > 2);
   match(Set dst (MulReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vextracti64x4  $tmp3,$src2\n\t"
+  format %{ "vextracti64x4  $tmp3,$src2,0x1\n\t"
             "vpmulld  $tmp3,$tmp3,$src2\n\t"
             "vextracti128   $tmp,$tmp3\n\t"
             "vpmulld  $tmp,$tmp,$src2\n\t"
@@ -5228,7 +5339,7 @@
             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
             "movd     $dst,$tmp2\t! mul reduction16I" %}
   ins_encode %{
-    __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
     __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
@@ -5267,7 +5378,7 @@
   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
   match(Set dst (MulReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
+  format %{ "vextracti128  $tmp,$src2\n\t"
             "vpmullq  $tmp2,$tmp,$src2\n\t"
             "pshufd   $tmp,$tmp2,0xE\n\t"
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
@@ -5275,7 +5386,7 @@
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
             "movdq    $dst,$tmp2\t! mul reduction4L" %}
   ins_encode %{
-    __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5290,7 +5401,7 @@
   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
   match(Set dst (MulReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti64x4  $tmp2,$src2\n\t"
+  format %{ "vextracti64x4  $tmp2,$src2,0x1\n\t"
             "vpmullq  $tmp2,$tmp2,$src2\n\t"
             "vextracti128   $tmp,$tmp2\n\t"
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
@@ -5300,7 +5411,7 @@
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
             "movdq    $dst,$tmp2\t! mul reduction8L" %}
   ins_encode %{
-    __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5314,290 +5425,280 @@
 %}
 #endif
 
-instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "mulss   $tmp,$src2\n\t"
-            "pshufd  $tmp2,$src2,0x01\n\t"
-            "mulss   $tmp,$tmp2\n\t"
-            "movdqu  $dst,$tmp\t! mul reduction2F" %}
-  ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "mulss   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "mulss   $dst,$tmp\t! mul reduction2F" %}
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "vmulss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vmulss  $dst,$tmp2,$tmp\t! mul reduction2F" %}
-  ins_encode %{
-    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulss  $dst,$dst,$tmp\t! mul reduction2F" %}
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "mulss   $tmp,$src2\n\t"
-            "pshufd  $tmp2,$src2,0x01\n\t"
-            "mulss   $tmp,$tmp2\n\t"
-            "pshufd  $tmp2,$src2,0x02\n\t"
-            "mulss   $tmp,$tmp2\n\t"
-            "pshufd  $tmp2,$src2,0x03\n\t"
-            "mulss   $tmp,$tmp2\n\t"
-            "movdqu  $dst,$tmp\t! mul reduction4F" %}
-  ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "mulss   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "mulss   $dst,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x02\n\t"
+            "mulss   $dst,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x03\n\t"
+            "mulss   $dst,$tmp\t! mul reduction4F" %}
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+    __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+    __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "vmulss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vmulss  $dst,$tmp2,$tmp\t! mul reduction4F" %}
-  ins_encode %{
-    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulss  $dst,$dst,$tmp\t! mul reduction4F" %}
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
   predicate(UseAVX > 0);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vmulss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf128  $tmp3,$src2\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vmulss  $dst,$tmp2,$tmp\t! mul reduction8F" %}
-  ins_encode %{
-    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "vextractf128  $tmp2,$src2\n\t"
+            "vmulss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vmulss  $dst,$dst,$tmp\t! mul reduction8F" %}
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
   predicate(UseAVX > 2);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vmulss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf32x4  $tmp3,$src2, 0x1\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf32x4  $tmp3,$src2, 0x2\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf32x4  $tmp3,$src2, 0x3\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vmulss  $dst,$tmp2,$tmp\t! mul reduction16F" %}
-  ins_encode %{
-    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vmulss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vmulss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vmulss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vmulss  $dst,$dst,$tmp\t! mul reduction16F" %}
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (MulReductionVD src1 src2));
-  effect(TEMP tmp, TEMP dst);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "mulsd   $tmp,$src2\n\t"
-            "pshufd  $dst,$src2,0xE\n\t"
+  match(Set dst (MulReductionVD dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "mulsd   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0xE\n\t"
             "mulsd   $dst,$tmp\t! mul reduction2D" %}
   ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ mulsd($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
+instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (MulReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "vmulsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vmulsd  $dst,$tmp2,$tmp\t! mul reduction2D" %}
-  ins_encode %{
-    __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulsd  $dst,$dst,$tmp\t! mul reduction2D" %}
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
   predicate(UseAVX > 0);
-  match(Set dst (MulReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vmulsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf128  $tmp3,$src2\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vmulsd  $dst,$tmp2,$tmp\t! mul reduction4D" %}
-  ins_encode %{
-    __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulsd  $dst,$dst,$tmp\n\t"
+            "vextractf128  $tmp2,$src2\n\t"
+            "vmulsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vmulsd  $dst,$dst,$tmp\t! mul reduction4D" %}
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
   predicate(UseAVX > 2);
-  match(Set dst (MulReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vmulsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x1\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
+            "vmulsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vmulsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x2\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x3\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vmulsd  $dst,$tmp2,$tmp\t! mul reduction8D" %}
-  ins_encode %{
-    __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vmulsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vmulsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vmulsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vmulsd  $dst,$dst,$tmp\t! mul reduction8D" %}
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -5608,7 +5709,7 @@
 
 // Bytes vector add
 instruct vadd4B(vecS dst, vecS src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVB dst src));
   format %{ "paddb   $dst,$src\t! add packed4B" %}
   ins_encode %{
@@ -5617,8 +5718,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
   ins_encode %{
@@ -5628,8 +5740,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddb  $dst,$dst,$src2\t! add packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
   ins_encode %{
@@ -5639,8 +5763,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd8B(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (AddVB dst src));
   format %{ "paddb   $dst,$src\t! add packed8B" %}
   ins_encode %{
@@ -5649,8 +5796,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
   ins_encode %{
@@ -5660,8 +5818,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddb  $dst,$dst,$src2\t! add packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
   ins_encode %{
@@ -5671,8 +5841,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd16B(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 16);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
   match(Set dst (AddVB dst src));
   format %{ "paddb   $dst,$src\t! add packed16B" %}
   ins_encode %{
@@ -5681,8 +5874,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
   ins_encode %{
@@ -5692,8 +5896,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddb  $dst,$dst,$src2\t! add packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
   ins_encode %{
@@ -5703,8 +5930,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
   ins_encode %{
@@ -5714,8 +5964,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddb  $dst,$dst,$src2\t! add packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
   ins_encode %{
@@ -5725,8 +5987,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed64B" %}
   ins_encode %{
@@ -5737,7 +6022,7 @@
 %}
 
 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed64B" %}
   ins_encode %{
@@ -5749,7 +6034,7 @@
 
 // Shorts/Chars vector add
 instruct vadd2S(vecS dst, vecS src) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (AddVS dst src));
   format %{ "paddw   $dst,$src\t! add packed2S" %}
   ins_encode %{
@@ -5758,8 +6043,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
   ins_encode %{
@@ -5769,8 +6065,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddw  $dst,$dst,$src2\t! add packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
   ins_encode %{
@@ -5780,8 +6088,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd4S(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVS dst src));
   format %{ "paddw   $dst,$src\t! add packed4S" %}
   ins_encode %{
@@ -5790,8 +6121,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
   ins_encode %{
@@ -5801,8 +6143,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddw  $dst,$dst,$src2\t! add packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
   ins_encode %{
@@ -5812,8 +6166,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd8S(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (AddVS dst src));
   format %{ "paddw   $dst,$src\t! add packed8S" %}
   ins_encode %{
@@ -5822,8 +6199,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
   ins_encode %{
@@ -5833,8 +6221,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddw  $dst,$dst,$src2\t! add packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
   ins_encode %{
@@ -5844,8 +6255,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
   ins_encode %{
@@ -5855,8 +6289,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddw  $dst,$dst,$src2\t! add packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
   ins_encode %{
@@ -5866,8 +6312,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed32S" %}
   ins_encode %{
@@ -5878,7 +6347,7 @@
 %}
 
 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed32S" %}
   ins_encode %{
@@ -6264,7 +6733,7 @@
 
 // Bytes vector sub
 instruct vsub4B(vecS dst, vecS src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (SubVB dst src));
   format %{ "psubb   $dst,$src\t! sub packed4B" %}
   ins_encode %{
@@ -6273,8 +6742,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
   ins_encode %{
@@ -6284,8 +6764,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (SubVB src (LoadVector mem)));
   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
   ins_encode %{
@@ -6295,8 +6787,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub8B(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (SubVB dst src));
   format %{ "psubb   $dst,$src\t! sub packed8B" %}
   ins_encode %{
@@ -6305,8 +6820,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
   ins_encode %{
@@ -6316,8 +6842,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
   match(Set dst (SubVB src (LoadVector mem)));
   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
   ins_encode %{
@@ -6327,8 +6865,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub16B(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 16);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
   match(Set dst (SubVB dst src));
   format %{ "psubb   $dst,$src\t! sub packed16B" %}
   ins_encode %{
@@ -6337,8 +6898,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
   ins_encode %{
@@ -6348,8 +6920,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (SubVB src (LoadVector mem)));
   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
   ins_encode %{
@@ -6359,8 +6954,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
   ins_encode %{
@@ -6370,8 +6988,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
   match(Set dst (SubVB src (LoadVector mem)));
   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
   ins_encode %{
@@ -6381,8 +7011,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed64B" %}
   ins_encode %{
@@ -6393,7 +7046,7 @@
 %}
 
 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
   match(Set dst (SubVB src (LoadVector mem)));
   format %{ "vpsubb  $dst,$src,$mem\t! sub packed64B" %}
   ins_encode %{
@@ -6405,7 +7058,7 @@
 
 // Shorts/Chars vector sub
 instruct vsub2S(vecS dst, vecS src) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (SubVS dst src));
   format %{ "psubw   $dst,$src\t! sub packed2S" %}
   ins_encode %{
@@ -6414,8 +7067,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (SubVS src1 src2));
   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
   ins_encode %{
@@ -6425,8 +7089,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (SubVS src (LoadVector mem)));
   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
   ins_encode %{
@@ -6436,8 +7112,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub4S(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (SubVS dst src));
   format %{ "psubw   $dst,$src\t! sub packed4S" %}
   ins_encode %{
@@ -6446,8 +7145,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (SubVS src1 src2));
   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
   ins_encode %{
@@ -6457,8 +7167,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (SubVS src (LoadVector mem)));
   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
   ins_encode %{
@@ -6468,8 +7190,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub8S(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (SubVS dst src));
   format %{ "psubw   $dst,$src\t! sub packed8S" %}
   ins_encode %{
@@ -6478,8 +7223,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (SubVS src1 src2));
   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
   ins_encode %{
@@ -6489,8 +7245,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (SubVS src (LoadVector mem)));
   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
   ins_encode %{
@@ -6500,8 +7279,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (SubVS src1 src2));
   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
   ins_encode %{
@@ -6511,8 +7313,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
   match(Set dst (SubVS src (LoadVector mem)));
   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
   ins_encode %{
@@ -6522,8 +7336,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS dst (LoadVector mem)));
+   effect(TEMP src);
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (SubVS src1 src2));
   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed32S" %}
   ins_encode %{
@@ -6534,7 +7371,7 @@
 %}
 
 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (SubVS src (LoadVector mem)));
   format %{ "vpsubw  $dst,$src,$mem\t! sub packed32S" %}
   ins_encode %{
@@ -6920,7 +7757,7 @@
 
 // Shorts/Chars vector mul
 instruct vmul2S(vecS dst, vecS src) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (MulVS dst src));
   format %{ "pmullw $dst,$src\t! mul packed2S" %}
   ins_encode %{
@@ -6929,8 +7766,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (MulVS src1 src2));
   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
   ins_encode %{
@@ -6940,8 +7788,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (MulVS src (LoadVector mem)));
   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
   ins_encode %{
@@ -6951,8 +7811,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vmul4S(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (MulVS dst src));
   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
   ins_encode %{
@@ -6961,8 +7844,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (MulVS src1 src2));
   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
   ins_encode %{
@@ -6972,8 +7866,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (MulVS src (LoadVector mem)));
   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
   ins_encode %{
@@ -6983,8 +7889,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vmul8S(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (MulVS dst src));
   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
   ins_encode %{
@@ -6993,8 +7922,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (MulVS src1 src2));
   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
   ins_encode %{
@@ -7004,8 +7944,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (MulVS src (LoadVector mem)));
   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
   ins_encode %{
@@ -7015,8 +7978,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (MulVS src1 src2));
   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
   ins_encode %{
@@ -7026,8 +8012,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
   match(Set dst (MulVS src (LoadVector mem)));
   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
   ins_encode %{
@@ -7037,8 +8035,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (MulVS src1 src2));
   format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
   ins_encode %{
@@ -7049,7 +8070,7 @@
 %}
 
 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (MulVS src (LoadVector mem)));
   format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
   ins_encode %{
@@ -7711,7 +8732,7 @@
 
 // Shorts/Chars vector left shift
 instruct vsll2S(vecS dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
   ins_encode %{
@@ -7721,7 +8742,7 @@
 %}
 
 instruct vsll2S_imm(vecS dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
   ins_encode %{
@@ -7730,8 +8751,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
   ins_encode %{
@@ -7741,8 +8773,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
   ins_encode %{
@@ -7752,8 +8796,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsll4S(vecD dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
   ins_encode %{
@@ -7763,7 +8830,7 @@
 %}
 
 instruct vsll4S_imm(vecD dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
   ins_encode %{
@@ -7772,8 +8839,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
   ins_encode %{
@@ -7783,8 +8861,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
   ins_encode %{
@@ -7794,8 +8884,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsll8S(vecX dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
   ins_encode %{
@@ -7805,7 +8918,7 @@
 %}
 
 instruct vsll8S_imm(vecX dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
   ins_encode %{
@@ -7814,8 +8927,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
   ins_encode %{
@@ -7825,8 +8949,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
   ins_encode %{
@@ -7836,8 +8983,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
   ins_encode %{
@@ -7847,8 +9017,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
   ins_encode %{
@@ -7858,8 +9040,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
   ins_encode %{
@@ -7870,7 +9075,7 @@
 %}
 
 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
   ins_encode %{
@@ -8104,7 +9309,7 @@
 // unsigned values.
 
 instruct vsrl2S(vecS dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
   ins_encode %{
@@ -8114,7 +9319,7 @@
 %}
 
 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
   ins_encode %{
@@ -8123,8 +9328,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
   ins_encode %{
@@ -8134,8 +9350,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
   ins_encode %{
@@ -8145,8 +9373,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsrl4S(vecD dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
   ins_encode %{
@@ -8156,7 +9407,7 @@
 %}
 
 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
   ins_encode %{
@@ -8165,8 +9416,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
   ins_encode %{
@@ -8176,8 +9438,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
   ins_encode %{
@@ -8187,8 +9461,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsrl8S(vecX dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
   ins_encode %{
@@ -8198,7 +9495,7 @@
 %}
 
 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
   ins_encode %{
@@ -8207,8 +9504,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
   ins_encode %{
@@ -8218,8 +9526,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
   ins_encode %{
@@ -8229,8 +9560,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
   ins_encode %{
@@ -8240,8 +9594,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
   ins_encode %{
@@ -8251,8 +9617,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
   ins_encode %{
@@ -8263,7 +9652,7 @@
 %}
 
 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
   ins_encode %{
@@ -8493,7 +9882,7 @@
 
 // Shorts/Chars vector arithmetic right shift
 instruct vsra2S(vecS dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
   ins_encode %{
@@ -8512,8 +9901,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
   ins_encode %{
@@ -8523,8 +9923,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
   ins_encode %{
@@ -8534,8 +9946,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsra4S(vecD dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
   ins_encode %{
@@ -8545,7 +9980,7 @@
 %}
 
 instruct vsra4S_imm(vecD dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
   ins_encode %{
@@ -8554,8 +9989,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
   ins_encode %{
@@ -8565,8 +10011,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
   ins_encode %{
@@ -8576,8 +10034,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsra8S(vecX dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
   ins_encode %{
@@ -8587,7 +10068,7 @@
 %}
 
 instruct vsra8S_imm(vecX dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
   ins_encode %{
@@ -8596,8 +10077,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
   ins_encode %{
@@ -8607,8 +10099,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
   ins_encode %{
@@ -8618,8 +10133,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
   ins_encode %{
@@ -8629,8 +10167,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
   ins_encode %{
@@ -8640,8 +10190,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
   ins_encode %{
@@ -8652,7 +10225,7 @@
 %}
 
 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
   ins_encode %{

--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Tue Nov 24 10:30:23 2015 +0100
@@ -291,9 +291,7 @@
     size += 6; // fldcw
   }
   if (C->max_vector_size() > 16) {
-    if(UseAVX <= 2) {
-      size += 3; // vzeroupper
-    }
+    size += 3; // vzeroupper
   }
   return size;
 }
@@ -1915,7 +1913,7 @@
       if (stub == NULL) {
         ciEnv::current()->record_failure("CodeCache is full");
         return;
-      } 
+      }
     }
   %}

--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Tue Nov 24 10:30:23 2015 +0100
@@ -536,11 +536,7 @@
 #define __ _masm.
 
 static int clear_avx_size() {
-  if(UseAVX > 2) {
-    return 0; // vzeroupper is ignored
-  } else {
-    return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
-  }
+  return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
 }
 
 // !!!!! Special hack to get all types of calls to specify the byte offset
@@ -871,7 +867,7 @@
       if (framesize > 0) {
         st->print("\n\t");
         st->print("addq    rbp, #%d", framesize);
-      }      
+      }
     }
   }

--- a/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -497,12 +497,15 @@
   //  1:  getfield
   //  2:    index
   //  3:    index
-  //  4:  ireturn/areturn
+  //  4:  ireturn/areturn/freturn/lreturn/dreturn
   // NB this is not raw bytecode: index is in machine order
   u1 *code = method->code_base();
   assert(code[0] == Bytecodes::_aload_0 &&
          code[1] == Bytecodes::_getfield &&
          (code[4] == Bytecodes::_ireturn ||
+          code[4] == Bytecodes::_freturn ||
+          code[4] == Bytecodes::_lreturn ||
+          code[4] == Bytecodes::_dreturn ||
           code[4] == Bytecodes::_areturn), "should do");
   u2 index = Bytes::get_native_u2(&code[2]);

--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/CompilerToVM.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/CompilerToVM.java	Tue Nov 24 10:30:23 2015 +0100
@@ -32,6 +32,7 @@
 import jdk.vm.ci.code.InstalledCode;
 import jdk.vm.ci.code.InvalidInstalledCodeException;
 import jdk.vm.ci.code.TargetDescription;
+import jdk.vm.ci.common.JVMCIError;
 import jdk.vm.ci.hotspotvmconfig.HotSpotVMField;
 import jdk.vm.ci.inittimer.InitTimer;
 import jdk.vm.ci.meta.JavaType;
@@ -308,6 +309,8 @@
      *         {@link HotSpotVMConfig#codeInstallResultCodeTooLarge},
      *         {@link HotSpotVMConfig#codeInstallResultDependenciesFailed} or
      *         {@link HotSpotVMConfig#codeInstallResultDependenciesInvalid}.
+     * @throws JVMCIError if there is something wrong with the compiled code or the associated
+     *             metadata.
      */
     native int installCode(TargetDescription target, HotSpotCompiledCode compiledCode, InstalledCode code, HotSpotSpeculationLog speculationLog);

--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Tue Nov 24 10:30:23 2015 +0100
@@ -1680,6 +1680,7 @@
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_caller_adjustment", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockCallerAdjustmentOffset;
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_number_of_frames", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockNumberOfFramesOffset;
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_total_frame_sizes", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockTotalFrameSizesOffset;
+    @HotSpotVMField(name = "Deoptimization::UnrollBlock::_unpack_kind", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockUnpackKindOffset;
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_frame_sizes", type = "intptr_t*", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockFrameSizesOffset;
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_frame_pcs", type = "address*", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockFramePcsOffset;
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_initial_info", type = "intptr_t", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockInitialInfoOffset;

--- a/hotspot/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -66,12 +66,12 @@
   features = generic_v9_m;
 
   if (detect_niagara()) {
-    NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Detected Linux on Niagara");)
+    if (PrintMiscellaneous && Verbose) { tty->print_cr("Detected Linux on Niagara"); }
     features = niagara1_m | T_family_m;
   }
 
   if (detect_M_family()) {
-    NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Detected Linux on M family");)
+    if (PrintMiscellaneous && Verbose) { tty->print_cr("Detected Linux on M family"); }
     features = sun4v_m | generic_v9_m | M_family_m | T_family_m;
   }

--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -707,12 +707,10 @@
     BlockBegin* block = bci2block()->at(bci);
     if (block != NULL && block == parent()->bci2block()->at(bci)) {
       BlockBegin* new_block = new BlockBegin(block->bci());
-#ifndef PRODUCT
       if (PrintInitialBlockList) {
         tty->print_cr("CFG: cloned block %d (bci %d) as block %d for jsr",
                       block->block_id(), block->bci(), new_block->block_id());
       }
-#endif
       // copy data from cloned blocked
       new_block->set_depth_first_number(block->depth_first_number());
       if (block->is_set(BlockBegin::parser_loop_header_flag)) new_block->set(BlockBegin::parser_loop_header_flag);
@@ -1438,7 +1436,9 @@
 
   bool need_mem_bar = false;
   if (method()->name() == ciSymbol::object_initializer_name() &&
-      (scope()->wrote_final() || (AlwaysSafeConstructors && scope()->wrote_fields()))) {
+      (scope()->wrote_final() || (AlwaysSafeConstructors && scope()->wrote_fields())
+                              || (support_IRIW_for_not_multiple_copy_atomic_cpu && scope()->wrote_volatile())
+     )){
     need_mem_bar = true;
   }
 
@@ -1554,6 +1554,9 @@
 
   if (code == Bytecodes::_putfield) {
     scope()->set_wrote_fields();
+    if (field->is_volatile()) {
+      scope()->set_wrote_volatile();
+    }
   }
 
   const int offset = !needs_patching ? field->offset() : -1;
@@ -3785,12 +3788,10 @@
     cont = new BlockBegin(next_bci());
     // low number so that continuation gets parsed as early as possible
     cont->set_depth_first_number(0);
-#ifndef PRODUCT
     if (PrintInitialBlockList) {
       tty->print_cr("CFG: created block %d (bci %d) as continuation for inline at bci %d",
                     cont->block_id(), cont->bci(), bci());
     }
-#endif
     continuation_existed = false;
   }
   // Record number of predecessors of continuation block before

--- a/hotspot/src/share/vm/c1/c1_IR.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/c1/c1_IR.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -143,6 +143,7 @@
   _monitor_pairing_ok = method->has_balanced_monitors();
   _wrote_final        = false;
   _wrote_fields       = false;
+  _wrote_volatile     = false;
   _start              = NULL;
 
   if (osr_bci == -1) {

--- a/hotspot/src/share/vm/c1/c1_IR.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/c1/c1_IR.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -151,6 +151,7 @@
   bool          _monitor_pairing_ok;             // the monitor pairing info
   bool          _wrote_final;                    // has written final field
   bool          _wrote_fields;                   // has written fields
+  bool          _wrote_volatile;                 // has written volatile field
   BlockBegin*   _start;                          // the start block, successsors are method entries
 
   BitMap        _requires_phi_function;          // bit is set if phi functions at loop headers are necessary for a local variable
@@ -187,7 +188,8 @@
   bool          wrote_final    () const          { return _wrote_final; }
   void          set_wrote_fields()               { _wrote_fields = true; }
   bool          wrote_fields    () const         { return _wrote_fields; }
-
+  void          set_wrote_volatile()             { _wrote_volatile = true; }
+  bool          wrote_volatile    () const       { return _wrote_volatile; }
 };

--- a/hotspot/src/share/vm/c1/c1_LIR.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -2004,7 +2004,7 @@
 
 // LIR_Op2
 void LIR_Op2::print_instr(outputStream* out) const {
-  if (code() == lir_cmove) {
+  if (code() == lir_cmove || code() == lir_cmp) {
     print_condition(out, condition());         out->print(" ");
   }
   in_opr1()->print(out);    out->print(" ");

--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1761,7 +1761,7 @@
     post_barrier(object.result(), value.result());
   }
 
-  if (is_volatile && os::is_MP()) {
+  if (!support_IRIW_for_not_multiple_copy_atomic_cpu && is_volatile && os::is_MP()) {
     __ membar();
   }
 }
@@ -1822,6 +1822,10 @@
     address = generate_address(object.result(), x->offset(), field_type);
   }
 
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu && is_volatile && os::is_MP()) {
+    __ membar();
+  }
+
   bool needs_atomic_access = is_volatile || AlwaysAtomicAccesses;
   if (needs_atomic_access && !needs_patching) {
     volatile_field_load(address, reg, info);
@@ -2238,6 +2242,10 @@
 
   LIR_Opr value = rlock_result(x, x->basic_type());
 
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu && x->is_volatile() && os::is_MP()) {
+    __ membar();
+  }
+
   get_Object_unsafe(value, src.result(), off.result(), type, x->is_volatile());
 
 #if INCLUDE_ALL_GCS
@@ -2395,7 +2403,7 @@
 
   if (x->is_volatile() && os::is_MP()) __ membar_release();
   put_Object_unsafe(src.result(), off.result(), data.result(), type, x->is_volatile());
-  if (x->is_volatile() && os::is_MP()) __ membar();
+  if (!support_IRIW_for_not_multiple_copy_atomic_cpu && x->is_volatile() && os::is_MP()) __ membar();
 }
 
 
@@ -2794,7 +2802,7 @@
     assert(obj->is_valid(), "must be valid");
 
     if (method()->is_synchronized() && GenerateSynchronizationCode) {
-      LIR_Opr lock = new_register(T_INT);
+      LIR_Opr lock = syncLockOpr();
       __ load_stack_address_monitor(0, lock);
 
       CodeEmitInfo* info = new CodeEmitInfo(scope()->start()->state()->copy(ValueStack::StateBefore, SynchronizationEntryBCI), NULL, x->check_flag(Instruction::DeoptimizeOnException));
@@ -3421,14 +3429,18 @@
   __ add(result, LIR_OprFact::intConst(InvocationCounter::count_increment), result);
   __ store(result, counter);
   if (notify) {
-    LIR_Opr mask = load_immediate(frequency << InvocationCounter::count_shift, T_INT);
-    LIR_Opr meth = new_register(T_METADATA);
-    __ metadata2reg(method->constant_encoding(), meth);
-    __ logical_and(result, mask, result);
-    __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0));
+    LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding());
     // The bci for info can point to cmp for if's we want the if bci
     CodeStub* overflow = new CounterOverflowStub(info, bci, meth);
-    __ branch(lir_cond_equal, T_INT, overflow);
+    int freq = frequency << InvocationCounter::count_shift;
+    if (freq == 0) {
+      __ branch(lir_cond_always, T_ILLEGAL, overflow);
+    } else {
+      LIR_Opr mask = load_immediate(freq, T_INT);
+      __ logical_and(result, mask, result);
+      __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0));
+      __ branch(lir_cond_equal, T_INT, overflow);
+    }
     __ branch_destination(overflow->continuation());
   }
 }

--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -495,6 +495,7 @@
   static LIR_Opr divOutOpr();
   static LIR_Opr remOutOpr();
   static LIR_Opr shiftCountOpr();
+  LIR_Opr syncLockOpr();
   LIR_Opr syncTempOpr();
   LIR_Opr atomicLockOpr();

--- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -6233,9 +6233,19 @@
             if (prev_branch->stub() == NULL) {
 
               LIR_Op2* prev_cmp = NULL;
+              // There might be a cmove inserted for profiling which depends on the same
+              // compare. If we change the condition of the respective compare, we have
+              // to take care of this cmove as well.
+              LIR_Op2* prev_cmove = NULL;
 
               for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
                 prev_op = instructions->at(j);
+                // check for the cmove
+                if (prev_op->code() == lir_cmove) {
+                  assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
+                  prev_cmove = (LIR_Op2*)prev_op;
+                  assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
+                }
                 if (prev_op->code() == lir_cmp) {
                   assert(prev_op->as_Op2() != NULL, "branch must be of type LIR_Op2");
                   prev_cmp = (LIR_Op2*)prev_op;
@@ -6252,6 +6262,13 @@
                 prev_branch->negate_cond();
                 prev_cmp->set_condition(prev_branch->cond());
                 instructions->truncate(instructions->length() - 1);
+                // if we do change the condition, we have to change the cmove as well
+                if (prev_cmove != NULL) {
+                  prev_cmove->set_condition(prev_branch->cond());
+                  LIR_Opr t = prev_cmove->in_opr1();
+                  prev_cmove->set_in_opr1(prev_cmove->in_opr2());
+                  prev_cmove->set_in_opr2(t);
+                }
               }
             }
           }

--- a/hotspot/src/share/vm/ci/ciMethod.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/ci/ciMethod.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1262,6 +1262,8 @@
 bool ciMethod::is_vanilla_constructor() const {  FETCH_FLAG_FROM_VM(is_vanilla_constructor); }
 bool ciMethod::has_loops      () const {         FETCH_FLAG_FROM_VM(has_loops); }
 bool ciMethod::has_jsrs       () const {         FETCH_FLAG_FROM_VM(has_jsrs);  }
+bool ciMethod::is_getter      () const {         FETCH_FLAG_FROM_VM(is_getter); }
+bool ciMethod::is_setter      () const {         FETCH_FLAG_FROM_VM(is_setter); }
 bool ciMethod::is_accessor    () const {         FETCH_FLAG_FROM_VM(is_accessor); }
 bool ciMethod::is_initializer () const {         FETCH_FLAG_FROM_VM(is_initializer); }

--- a/hotspot/src/share/vm/ci/ciMethod.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/ci/ciMethod.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -311,6 +311,8 @@
   bool is_final_method() const                   { return is_final() || holder()->is_final(); }
   bool has_loops      () const;
   bool has_jsrs       () const;
+  bool is_getter      () const;
+  bool is_setter      () const;
   bool is_accessor    () const;
   bool is_initializer () const;
   bool can_be_statically_bound() const           { return _can_be_statically_bound; }

--- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1588,6 +1588,7 @@
   _exceptions = NULL;
   _exc_klasses = NULL;
   _successors = NULL;
+  _predecessors = new (outer->arena()) GrowableArray<Block*>(outer->arena(), 1, 0, NULL);
   _state = new (outer->arena()) StateVector(outer);
   JsrSet* new_jsrs =
     new (outer->arena()) JsrSet(outer->arena(), jsrs->size());
@@ -1771,6 +1772,12 @@
         break;
       }
     }
+
+    // Set predecessor information
+    for (int i = 0; i < _successors->length(); i++) {
+      Block* block = _successors->at(i);
+      block->predecessors()->append(this);
+    }
   }
   return _successors;
 }
@@ -1813,7 +1820,9 @@
     } else {
       klass = handler->catch_klass();
     }
-    _exceptions->append(analyzer->block_at(bci, _jsrs));
+    Block* block = analyzer->block_at(bci, _jsrs);
+    _exceptions->append(block);
+    block->predecessors()->append(this);
     _exc_klasses->append(klass);
   }
 }
@@ -1909,6 +1918,18 @@
       st->cr();
     }
   }
+  if (_predecessors == NULL) {
+    st->print_cr("  No predecessor information");
+  } else {
+    int num_predecessors = _predecessors->length();
+    st->print_cr("  Predecessors : %d", num_predecessors);
+    for (int i = 0; i < num_predecessors; i++) {
+      Block* predecessor = _predecessors->at(i);
+      st->print("    ");
+      predecessor->print_value_on(st);
+      st->cr();
+    }
+  }
   if (_exceptions == NULL) {
     st->print_cr("  No exception information");
   } else {
@@ -2270,6 +2291,9 @@
   for (SuccIter iter(tail); !iter.done(); iter.next()) {
     if (iter.succ() == head) {
       iter.set_succ(clone);
+      // Update predecessor information
+      head->predecessors()->remove(tail);
+      clone->predecessors()->append(tail);
     }
   }
   flow_block(tail, temp_vector, temp_set);
@@ -2279,6 +2303,9 @@
     for (SuccIter iter(clone); !iter.done(); iter.next()) {
       if (iter.succ() == head) {
         iter.set_succ(clone);
+        // Update predecessor information
+        head->predecessors()->remove(clone);
+        clone->predecessors()->append(clone);
         break;
       }
     }
@@ -2884,6 +2911,69 @@
 }
 
 // ------------------------------------------------------------------
+// ciTypeFlow::is_dominated_by
+//
+// Determine if the instruction at bci is dominated by the instruction at dom_bci.
+bool ciTypeFlow::is_dominated_by(int bci, int dom_bci) {
+  assert(!method()->has_jsrs(), "jsrs are not supported");
+
+  ResourceMark rm;
+  JsrSet* jsrs = new ciTypeFlow::JsrSet(NULL);
+  int        index = _methodBlocks->block_containing(bci)->index();
+  int    dom_index = _methodBlocks->block_containing(dom_bci)->index();
+  Block*     block = get_block_for(index, jsrs, ciTypeFlow::no_create);
+  Block* dom_block = get_block_for(dom_index, jsrs, ciTypeFlow::no_create);
+
+  // Start block dominates all other blocks
+  if (start_block()->rpo() == dom_block->rpo()) {
+    return true;
+  }
+
+  // Dominated[i] is true if block i is dominated by dom_block
+  int num_blocks = _methodBlocks->num_blocks();
+  bool* dominated = NEW_RESOURCE_ARRAY(bool, num_blocks);
+  for (int i = 0; i < num_blocks; ++i) {
+    dominated[i] = true;
+  }
+  dominated[start_block()->rpo()] = false;
+
+  // Iterative dominator algorithm
+  bool changed = true;
+  while (changed) {
+    changed = false;
+    // Use reverse postorder iteration
+    for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) {
+      if (blk->is_start()) {
+        // Ignore start block
+        continue;
+      }
+      // The block is dominated if it is the dominating block
+      // itself or if all predecessors are dominated.
+      int index = blk->rpo();
+      bool dom = (index == dom_block->rpo());
+      if (!dom) {
+        // Check if all predecessors are dominated
+        dom = true;
+        for (int i = 0; i < blk->predecessors()->length(); ++i) {
+          Block* pred = blk->predecessors()->at(i);
+          if (!dominated[pred->rpo()]) {
+            dom = false;
+            break;
+          }
+        }
+      }
+      // Update dominator information
+      if (dominated[index] != dom) {
+        changed = true;
+        dominated[index] = dom;
+      }
+    }
+  }
+  // block dominated by dom_block?
+  return dominated[block->rpo()];
+}
+
+// ------------------------------------------------------------------
 // ciTypeFlow::record_failure()
 // The ciTypeFlow object keeps track of failure reasons separately from the ciEnv.
 // This is required because there is not a 1-1 relation between the ciEnv and

--- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -529,6 +529,7 @@
     GrowableArray<Block*>*           _exceptions;
     GrowableArray<ciInstanceKlass*>* _exc_klasses;
     GrowableArray<Block*>*           _successors;
+    GrowableArray<Block*>*           _predecessors;
     StateVector*                     _state;
     JsrSet*                          _jsrs;
 
@@ -617,6 +618,12 @@
       return _successors;
     }
 
+    // Predecessors of this block (including exception edges)
+    GrowableArray<Block*>* predecessors() {
+      assert(_predecessors != NULL, "must be filled in");
+      return _predecessors;
+    }
+
     // Get the exceptional successors for this Block.
     GrowableArray<Block*>* exceptions() {
       if (_exceptions == NULL) {
@@ -941,6 +948,9 @@
   // Perform type inference flow analysis.
   void do_flow();
 
+  // Determine if bci is dominated by dom_bci
+  bool is_dominated_by(int bci, int dom_bci);
+
   void print_on(outputStream* st) const PRODUCT_RETURN;
 
   void rpo_print_on(outputStream* st) const PRODUCT_RETURN;

--- a/hotspot/src/share/vm/classfile/javaClasses.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/classfile/javaClasses.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -28,6 +28,7 @@
 #include "classfile/stringTable.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "code/debugInfo.hpp"
+#include "code/dependencyContext.hpp"
 #include "code/pcDesc.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/oopFactory.hpp"
@@ -3216,14 +3217,16 @@
   }
 }
 
-nmethodBucket* java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(oop call_site) {
+DependencyContext java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(oop call_site) {
   assert(java_lang_invoke_MethodHandleNatives_CallSiteContext::is_instance(call_site), "");
-  return (nmethodBucket*) (address) call_site->long_field(_vmdependencies_offset);
-}
-
-void java_lang_invoke_MethodHandleNatives_CallSiteContext::set_vmdependencies(oop call_site, nmethodBucket* context) {
-  assert(java_lang_invoke_MethodHandleNatives_CallSiteContext::is_instance(call_site), "");
-  call_site->long_field_put(_vmdependencies_offset, (jlong) (address) context);
+  intptr_t* vmdeps_addr = (intptr_t*)call_site->address_field_addr(_vmdependencies_offset);
+#ifndef ASSERT
+  DependencyContext dep_ctx(vmdeps_addr);
+#else
+  // Verify that call_site isn't moved during DependencyContext lifetime.
+  DependencyContext dep_ctx(vmdeps_addr, Handle(call_site));
+#endif // ASSERT
+  return dep_ctx;
 }
 
 // Support for java_security_AccessControlContext

--- a/hotspot/src/share/vm/classfile/javaClasses.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/classfile/javaClasses.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1212,6 +1212,8 @@
 #define CALLSITECONTEXT_INJECTED_FIELDS(macro) \
   macro(java_lang_invoke_MethodHandleNatives_CallSiteContext, vmdependencies, intptr_signature, false)
 
+class DependencyContext;
+
 class java_lang_invoke_MethodHandleNatives_CallSiteContext : AllStatic {
   friend class JavaClasses;
 
@@ -1222,8 +1224,7 @@
 
 public:
   // Accessors
-  static nmethodBucket* vmdependencies(oop context);
-  static void       set_vmdependencies(oop context, nmethodBucket* bucket);
+  static DependencyContext vmdependencies(oop context);
 
   // Testers
   static bool is_subclass(Klass* klass) {

--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -109,6 +109,7 @@
   template(java_io_ByteArrayInputStream,              "java/io/ByteArrayInputStream")             \
   template(java_io_Serializable,                      "java/io/Serializable")                     \
   template(java_util_Arrays,                          "java/util/Arrays")                         \
+  template(java_util_Objects,                         "java/util/Objects")                         \
   template(java_util_Properties,                      "java/util/Properties")                     \
   template(java_util_Vector,                          "java/util/Vector")                         \
   template(java_util_AbstractList,                    "java/util/AbstractList")                   \
@@ -883,6 +884,9 @@
   do_intrinsic(_equalsL,                  java_lang_StringLatin1,equals_name, equalsB_signature,                 F_S)   \
   do_intrinsic(_equalsU,                  java_lang_StringUTF16, equals_name, equalsB_signature,                 F_S)   \
                                                                                                                         \
+  do_intrinsic(_Objects_checkIndex,       java_util_Objects,      checkIndex_name, Objects_checkIndex_signature, F_S)   \
+   do_signature(Objects_checkIndex_signature,                     "(IILjava/util/function/BiFunction;)I")               \
+                                                                                                                        \
   do_class(java_nio_Buffer,               "java/nio/Buffer")                                                            \
   do_intrinsic(_checkIndex,               java_nio_Buffer,        checkIndex_name, int_int_signature,            F_R)   \
    do_name(     checkIndex_name,                                 "checkIndex")                                          \

--- a/hotspot/src/share/vm/code/codeCache.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/code/codeCache.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -133,18 +133,47 @@
 
 address CodeCache::_low_bound = 0;
 address CodeCache::_high_bound = 0;
-int CodeCache::_number_of_blobs = 0;
-int CodeCache::_number_of_adapters = 0;
-int CodeCache::_number_of_nmethods = 0;
 int CodeCache::_number_of_nmethods_with_dependencies = 0;
 bool CodeCache::_needs_cache_clean = false;
 nmethod* CodeCache::_scavenge_root_nmethods = NULL;
-int CodeCache::_codemem_full_count = 0;
 
 // Initialize array of CodeHeaps
 GrowableArray<CodeHeap*>* CodeCache::_heaps = new(ResourceObj::C_HEAP, mtCode) GrowableArray<CodeHeap*> (CodeBlobType::All, true);
 
+void CodeCache::check_heap_sizes(size_t non_nmethod_size, size_t profiled_size, size_t non_profiled_size, size_t cache_size, bool all_set) {
+  size_t total_size = non_nmethod_size + profiled_size + non_profiled_size;
+  // Prepare error message
+  const char* error = "Invalid code heap sizes";
+  err_msg message("NonNMethodCodeHeapSize (%zuK) + ProfiledCodeHeapSize (%zuK) + NonProfiledCodeHeapSize (%zuK) = %zuK",
+          non_nmethod_size/K, profiled_size/K, non_profiled_size/K, total_size/K);
+
+  if (total_size > cache_size) {
+    // Some code heap sizes were explicitly set: total_size must be <= cache_size
+    message.append(" is greater than ReservedCodeCacheSize (%zuK).", cache_size/K);
+    vm_exit_during_initialization(error, message);
+  } else if (all_set && total_size != cache_size) {
+    // All code heap sizes were explicitly set: total_size must equal cache_size
+    message.append(" is not equal to ReservedCodeCacheSize (%zuK).", cache_size/K);
+    vm_exit_during_initialization(error, message);
+  }
+}
+
 void CodeCache::initialize_heaps() {
+  bool non_nmethod_set      = FLAG_IS_CMDLINE(NonNMethodCodeHeapSize);
+  bool profiled_set         = FLAG_IS_CMDLINE(ProfiledCodeHeapSize);
+  bool non_profiled_set     = FLAG_IS_CMDLINE(NonProfiledCodeHeapSize);
+  size_t min_size           = os::vm_page_size();
+  size_t cache_size         = ReservedCodeCacheSize;
+  size_t non_nmethod_size   = NonNMethodCodeHeapSize;
+  size_t profiled_size      = ProfiledCodeHeapSize;
+  size_t non_profiled_size  = NonProfiledCodeHeapSize;
+  // Check if total size set via command line flags exceeds the reserved size
+  check_heap_sizes((non_nmethod_set  ? non_nmethod_size  : min_size),
+                   (profiled_set     ? profiled_size     : min_size),
+                   (non_profiled_set ? non_profiled_size : min_size),
+                   cache_size,
+                   non_nmethod_set && profiled_set && non_profiled_set);
+
   // Determine size of compiler buffers
   size_t code_buffers_size = 0;
 #ifdef COMPILER1
@@ -159,51 +188,94 @@
   code_buffers_size += c2_count * C2Compiler::initial_code_buffer_size();
 #endif
 
+  // Increase default non_nmethod_size to account for compiler buffers
+  if (!non_nmethod_set) {
+    non_nmethod_size += code_buffers_size;
+  }
   // Calculate default CodeHeap sizes if not set by user
-  if (!FLAG_IS_CMDLINE(NonNMethodCodeHeapSize) && !FLAG_IS_CMDLINE(ProfiledCodeHeapSize)
-      && !FLAG_IS_CMDLINE(NonProfiledCodeHeapSize)) {
-    // Increase default NonNMethodCodeHeapSize to account for compiler buffers
-    FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, NonNMethodCodeHeapSize + code_buffers_size);
-
+  if (!non_nmethod_set && !profiled_set && !non_profiled_set) {
     // Check if we have enough space for the non-nmethod code heap
-    if (ReservedCodeCacheSize > NonNMethodCodeHeapSize) {
-      // Use the default value for NonNMethodCodeHeapSize and one half of the
-      // remaining size for non-profiled methods and one half for profiled methods
-      size_t remaining_size = ReservedCodeCacheSize - NonNMethodCodeHeapSize;
-      size_t profiled_size = remaining_size / 2;
-      size_t non_profiled_size = remaining_size - profiled_size;
-      FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, profiled_size);
-      FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, non_profiled_size);
+    if (cache_size > non_nmethod_size) {
+      // Use the default value for non_nmethod_size and one half of the
+      // remaining size for non-profiled and one half for profiled methods
+      size_t remaining_size = cache_size - non_nmethod_size;
+      profiled_size = remaining_size / 2;
+      non_profiled_size = remaining_size - profiled_size;
     } else {
       // Use all space for the non-nmethod heap and set other heaps to minimal size
-      FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, ReservedCodeCacheSize - os::vm_page_size() * 2);
-      FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, os::vm_page_size());
-      FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, os::vm_page_size());
+      non_nmethod_size = cache_size - 2 * min_size;
+      profiled_size = min_size;
+      non_profiled_size = min_size;
+    }
+  } else if (!non_nmethod_set || !profiled_set || !non_profiled_set) {
+    // The user explicitly set some code heap sizes. Increase or decrease the (default)
+    // sizes of the other code heaps accordingly. First adapt non-profiled and profiled
+    // code heap sizes and then only change non-nmethod code heap size if still necessary.
+    intx diff_size = cache_size - (non_nmethod_size + profiled_size + non_profiled_size);
+    if (non_profiled_set) {
+      if (!profiled_set) {
+        // Adapt size of profiled code heap
+        if (diff_size < 0 && ((intx)profiled_size + diff_size) <= 0) {
+          // Not enough space available, set to minimum size
+          diff_size += profiled_size - min_size;
+          profiled_size = min_size;
+        } else {
+          profiled_size += diff_size;
+          diff_size = 0;
+        }
+      }
+    } else if (profiled_set) {
+      // Adapt size of non-profiled code heap
+      if (diff_size < 0 && ((intx)non_profiled_size + diff_size) <= 0) {
+        // Not enough space available, set to minimum size
+        diff_size += non_profiled_size - min_size;
+        non_profiled_size = min_size;
+      } else {
+        non_profiled_size += diff_size;
+        diff_size = 0;
+      }
+    } else if (non_nmethod_set) {
+      // Distribute remaining size between profiled and non-profiled code heaps
+      diff_size = cache_size - non_nmethod_size;
+      profiled_size = diff_size / 2;
+      non_profiled_size = diff_size - profiled_size;
+      diff_size = 0;
+    }
+    if (diff_size != 0) {
+      // Use non-nmethod code heap for remaining space requirements
+      assert(!non_nmethod_set && ((intx)non_nmethod_size + diff_size) > 0, "sanity");
+      non_nmethod_size += diff_size;
     }
   }
 
   // We do not need the profiled CodeHeap, use all space for the non-profiled CodeHeap
   if(!heap_available(CodeBlobType::MethodProfiled)) {
-    FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, NonProfiledCodeHeapSize + ProfiledCodeHeapSize);
-    FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, 0);
+    non_profiled_size += profiled_size;
+    profiled_size = 0;
   }
   // We do not need the non-profiled CodeHeap, use all space for the non-nmethod CodeHeap
   if(!heap_available(CodeBlobType::MethodNonProfiled)) {
-    FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, NonNMethodCodeHeapSize + NonProfiledCodeHeapSize);
-    FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, 0);
+    non_nmethod_size += non_profiled_size;
+    non_profiled_size = 0;
+  }
+  // Make sure we have enough space for VM internal code
+  uint min_code_cache_size = CodeCacheMinimumUseSpace DEBUG_ONLY(* 3);
+  if (non_nmethod_size < (min_code_cache_size + code_buffers_size)) {
+    vm_exit_during_initialization(err_msg(
+        "Not enough space in non-nmethod code heap to run VM: %zuK < %zuK",
+        non_nmethod_size/K, (min_code_cache_size + code_buffers_size)/K));
   }
 
-  // Make sure we have enough space for VM internal code
-  uint min_code_cache_size = CodeCacheMinimumUseSpace DEBUG_ONLY(* 3);
-  if (NonNMethodCodeHeapSize < (min_code_cache_size + code_buffers_size)) {
-    vm_exit_during_initialization("Not enough space in non-nmethod code heap to run VM.");
-  }
-  guarantee(NonProfiledCodeHeapSize + ProfiledCodeHeapSize + NonNMethodCodeHeapSize <= ReservedCodeCacheSize, "Size check");
+  // Verify sizes and update flag values
+  assert(non_profiled_size + profiled_size + non_nmethod_size == cache_size, "Invalid code heap sizes");
+  FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, non_nmethod_size);
+  FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, profiled_size);
+  FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, non_profiled_size);
 
   // Align CodeHeaps
   size_t alignment = heap_alignment();
-  size_t non_method_size = align_size_up(NonNMethodCodeHeapSize, alignment);
-  size_t profiled_size   = align_size_down(ProfiledCodeHeapSize, alignment);
+  non_nmethod_size = align_size_up(non_nmethod_size, alignment);
+  profiled_size   = align_size_down(profiled_size, alignment);
 
   // Reserve one continuous chunk of memory for CodeHeaps and split it into
   // parts for the individual heaps. The memory layout looks like this:
@@ -212,9 +284,9 @@
   //      Profiled nmethods
   //         Non-nmethods
   // ---------- low ------------
-  ReservedCodeSpace rs = reserve_heap_memory(ReservedCodeCacheSize);
-  ReservedSpace non_method_space    = rs.first_part(non_method_size);
-  ReservedSpace rest                = rs.last_part(non_method_size);
+  ReservedCodeSpace rs = reserve_heap_memory(cache_size);
+  ReservedSpace non_method_space    = rs.first_part(non_nmethod_size);
+  ReservedSpace rest                = rs.last_part(non_nmethod_size);
   ReservedSpace profiled_space      = rest.first_part(profiled_size);
   ReservedSpace non_profiled_space  = rest.last_part(profiled_size);
 
@@ -420,42 +492,41 @@
     }
   }
   print_trace("allocation", cb, size);
-  _number_of_blobs++;
   return cb;
 }
 
 void CodeCache::free(CodeBlob* cb) {
   assert_locked_or_safepoint(CodeCache_lock);
-
+  CodeHeap* heap = get_code_heap(cb);
   print_trace("free", cb);
   if (cb->is_nmethod()) {
-    _number_of_nmethods--;
+    heap->set_nmethod_count(heap->nmethod_count() - 1);
     if (((nmethod *)cb)->has_dependencies()) {
       _number_of_nmethods_with_dependencies--;
     }
   }
   if (cb->is_adapter_blob()) {
-    _number_of_adapters--;
+    heap->set_adapter_count(heap->adapter_count() - 1);
   }
-  _number_of_blobs--;
 
   // Get heap for given CodeBlob and deallocate
   get_code_heap(cb)->deallocate(cb);
 
-  assert(_number_of_blobs >= 0, "sanity check");
+  assert(heap->blob_count() >= 0, "sanity check");
 }
 
 void CodeCache::commit(CodeBlob* cb) {
   // this is called by nmethod::nmethod, which must already own CodeCache_lock
   assert_locked_or_safepoint(CodeCache_lock);
+  CodeHeap* heap = get_code_heap(cb);
   if (cb->is_nmethod()) {
-    _number_of_nmethods++;
+    heap->set_nmethod_count(heap->nmethod_count() + 1);
     if (((nmethod *)cb)->has_dependencies()) {
       _number_of_nmethods_with_dependencies++;
     }
   }
   if (cb->is_adapter_blob()) {
-    _number_of_adapters++;
+    heap->set_adapter_count(heap->adapter_count() + 1);
   }
 
   // flush the hardware I-cache
@@ -577,11 +648,9 @@
     assert(cur->on_scavenge_root_list(), "else shouldn't be on this list");
 
     bool is_live = (!cur->is_zombie() && !cur->is_unloaded());
-#ifndef PRODUCT
     if (TraceScavenge) {
       cur->print_on(tty, is_live ? "scavenge root" : "dead scavenge root"); tty->cr();
     }
-#endif //PRODUCT
     if (is_live) {
       // Perform cur->oops_do(f), maybe just once per nmethod.
       f->do_code_blob(cur);
@@ -774,6 +843,55 @@
   }
 }
 
+int CodeCache::blob_count(int code_blob_type) {
+  CodeHeap* heap = get_code_heap(code_blob_type);
+  return (heap != NULL) ? heap->blob_count() : 0;
+}
+
+int CodeCache::blob_count() {
+  int count = 0;
+  FOR_ALL_HEAPS(heap) {
+    count += (*heap)->blob_count();
+  }
+  return count;
+}
+
+int CodeCache::nmethod_count(int code_blob_type) {
+  CodeHeap* heap = get_code_heap(code_blob_type);
+  return (heap != NULL) ? heap->nmethod_count() : 0;
+}
+
+int CodeCache::nmethod_count() {
+  int count = 0;
+  FOR_ALL_HEAPS(heap) {
+    count += (*heap)->nmethod_count();
+  }
+  return count;
+}
+
+int CodeCache::adapter_count(int code_blob_type) {
+  CodeHeap* heap = get_code_heap(code_blob_type);
+  return (heap != NULL) ? heap->adapter_count() : 0;
+}
+
+int CodeCache::adapter_count() {
+  int count = 0;
+  FOR_ALL_HEAPS(heap) {
+    count += (*heap)->adapter_count();
+  }
+  return count;
+}
+
+address CodeCache::low_bound(int code_blob_type) {
+  CodeHeap* heap = get_code_heap(code_blob_type);
+  return (heap != NULL) ? (address)heap->low_boundary() : NULL;
+}
+
+address CodeCache::high_bound(int code_blob_type) {
+  CodeHeap* heap = get_code_heap(code_blob_type);
+  return (heap != NULL) ? (address)heap->high_boundary() : NULL;
+}
+
 size_t CodeCache::capacity() {
   size_t cap = 0;
   FOR_ALL_HEAPS(heap) {
@@ -863,6 +981,9 @@
     initialize_heaps();
   } else {
     // Use a single code heap
+    FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, 0);
+    FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, 0);
+    FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, 0);
     ReservedCodeSpace rs = reserve_heap_memory(ReservedCodeCacheSize);
     add_heap(rs, "CodeCache", CodeBlobType::All);
   }
@@ -1104,9 +1225,8 @@
   CodeHeap* heap = get_code_heap(code_blob_type);
   assert(heap != NULL, "heap is null");
 
-  if (!heap->was_full() || print) {
+  if ((heap->full_count() == 0) || print) {
     // Not yet reported for this heap, report
-    heap->report_full();
     if (SegmentedCodeCache) {
       warning("%s is full. Compiler has been disabled.", get_code_heap_name(code_blob_type));
       warning("Try increasing the code heap size using -XX:%s=", get_code_heap_flag_name(code_blob_type));
@@ -1125,18 +1245,19 @@
     tty->print("%s", s.as_string());
   }
 
-  _codemem_full_count++;
+  heap->report_full();
+
   EventCodeCacheFull event;
   if (event.should_commit()) {
     event.set_codeBlobType((u1)code_blob_type);
     event.set_startAddress((u8)heap->low_boundary());
     event.set_commitedTopAddress((u8)heap->high());
     event.set_reservedTopAddress((u8)heap->high_boundary());
-    event.set_entryCount(nof_blobs());
-    event.set_methodCount(nof_nmethods());
-    event.set_adaptorCount(nof_adapters());
+    event.set_entryCount(heap->blob_count());
+    event.set_methodCount(heap->nmethod_count());
+    event.set_adaptorCount(heap->adapter_count());
     event.set_unallocatedCapacity(heap->unallocated_capacity()/K);
-    event.set_fullCount(_codemem_full_count);
+    event.set_fullCount(heap->full_count());
     event.commit();
   }
 }
@@ -1360,7 +1481,7 @@
   if (detailed) {
     st->print_cr(" total_blobs=" UINT32_FORMAT " nmethods=" UINT32_FORMAT
                        " adapters=" UINT32_FORMAT,
-                       nof_blobs(), nof_nmethods(), nof_adapters());
+                       blob_count(), nmethod_count(), adapter_count());
     st->print_cr(" compilation: %s", CompileBroker::should_compile_new_jobs() ?
                  "enabled" : Arguments::mode() == Arguments::_int ?
                  "disabled (interpreter mode)" :
@@ -1392,6 +1513,6 @@
 void CodeCache::log_state(outputStream* st) {
   st->print(" total_blobs='" UINT32_FORMAT "' nmethods='" UINT32_FORMAT "'"
             " adapters='" UINT32_FORMAT "' free_code_cache='" SIZE_FORMAT "'",
-            nof_blobs(), nof_nmethods(), nof_adapters(),
+            blob_count(), nmethod_count(), adapter_count(),
             unallocated_capacity());
 }

--- a/hotspot/src/share/vm/code/codeCache.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/code/codeCache.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -85,26 +85,23 @@
 
   static address _low_bound;                            // Lower bound of CodeHeap addresses
   static address _high_bound;                           // Upper bound of CodeHeap addresses
-  static int _number_of_blobs;                          // Total number of CodeBlobs in the cache
-  static int _number_of_adapters;                       // Total number of Adapters in the cache
-  static int _number_of_nmethods;                       // Total number of nmethods in the cache
   static int _number_of_nmethods_with_dependencies;     // Total number of nmethods with dependencies
   static bool _needs_cache_clean;                       // True if inline caches of the nmethods needs to be flushed
   static nmethod* _scavenge_root_nmethods;              // linked via nm->scavenge_root_link()
-  static int _codemem_full_count;                       // Number of times a CodeHeap in the cache was full
 
   static void mark_scavenge_root_nmethods() PRODUCT_RETURN;
   static void verify_perm_nmethods(CodeBlobClosure* f_or_null) PRODUCT_RETURN;
 
   // CodeHeap management
   static void initialize_heaps();                             // Initializes the CodeHeaps
+  // Check the code heap sizes set by the user via command line
+  static void check_heap_sizes(size_t non_nmethod_size, size_t profiled_size, size_t non_profiled_size, size_t cache_size, bool all_set);
   // Creates a new heap with the given name and size, containing CodeBlobs of the given type
   static void add_heap(ReservedSpace rs, const char* name, int code_blob_type);
   static CodeHeap* get_code_heap(const CodeBlob* cb);         // Returns the CodeHeap for the given CodeBlob
   static CodeHeap* get_code_heap(int code_blob_type);         // Returns the CodeHeap for the given CodeBlobType
   // Returns the name of the VM option to set the size of the corresponding CodeHeap
   static const char* get_code_heap_flag_name(int code_blob_type);
-  static bool heap_available(int code_blob_type);             // Returns true if an own CodeHeap for the given CodeBlobType is available
   static size_t heap_alignment();                             // Returns the alignment of the CodeHeaps in bytes
   static ReservedCodeSpace reserve_heap_memory(size_t size);  // Reserves one continuous chunk of memory for the CodeHeaps
 
@@ -139,9 +136,12 @@
   static CodeBlob* find_blob_unsafe(void* start);       // Same as find_blob but does not fail if looking up a zombie method
   static nmethod*  find_nmethod(void* start);           // Returns the nmethod containing the given address
 
-  static int       nof_blobs()      { return _number_of_blobs; }      // Returns the total number of CodeBlobs in the cache
-  static int       nof_adapters()   { return _number_of_adapters; }   // Returns the total number of Adapters in the cache
-  static int       nof_nmethods()   { return _number_of_nmethods; }   // Returns the total number of nmethods in the cache
+  static int       blob_count();                        // Returns the total number of CodeBlobs in the cache
+  static int       blob_count(int code_blob_type);
+  static int       adapter_count();                     // Returns the total number of Adapters in the cache
+  static int       adapter_count(int code_blob_type);
+  static int       nmethod_count();                     // Returns the total number of nmethods in the cache
+  static int       nmethod_count(int code_blob_type);
 
   // GC support
   static void gc_epilogue();
@@ -177,7 +177,9 @@
 
   // The full limits of the codeCache
   static address low_bound()                          { return _low_bound; }
+  static address low_bound(int code_blob_type);
   static address high_bound()                         { return _high_bound; }
+  static address high_bound(int code_blob_type);
 
   // Profiling
   static size_t capacity();
@@ -191,6 +193,9 @@
   static void set_needs_cache_clean(bool v)           { _needs_cache_clean = v;    }
   static void clear_inline_caches();                  // clear all inline caches
 
+  // Returns true if an own CodeHeap for the given CodeBlobType is available
+  static bool heap_available(int code_blob_type);
+
   // Returns the CodeBlobType for the given nmethod
   static int get_code_blob_type(nmethod* nm) {
     return get_code_heap(nm)->code_blob_type();
@@ -239,7 +244,10 @@
   // tells how many nmethods have dependencies
   static int number_of_nmethods_with_dependencies();
 
-  static int get_codemem_full_count() { return _codemem_full_count; }
+  static int get_codemem_full_count(int code_blob_type) {
+    CodeHeap* heap = get_code_heap(code_blob_type);
+    return (heap != NULL) ? heap->full_count() : 0;
+  }
 };

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/code/dependencyContext.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/nmethod.hpp"
+#include "code/dependencies.hpp"
+#include "code/dependencyContext.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/perfData.hpp"
+#include "utilities/exceptions.hpp"
+
+PerfCounter* DependencyContext::_perf_total_buckets_allocated_count   = NULL;
+PerfCounter* DependencyContext::_perf_total_buckets_deallocated_count = NULL;
+PerfCounter* DependencyContext::_perf_total_buckets_stale_count       = NULL;
+PerfCounter* DependencyContext::_perf_total_buckets_stale_acc_count   = NULL;
+
+void dependencyContext_init() {
+  DependencyContext::init();
+}
+
+void DependencyContext::init() {
+  if (UsePerfData) {
+    EXCEPTION_MARK;
+    _perf_total_buckets_allocated_count =
+        PerfDataManager::create_counter(SUN_CI, "nmethodBucketsAllocated", PerfData::U_Events, CHECK);
+    _perf_total_buckets_deallocated_count =
+        PerfDataManager::create_counter(SUN_CI, "nmethodBucketsDeallocated", PerfData::U_Events, CHECK);
+    _perf_total_buckets_stale_count =
+        PerfDataManager::create_counter(SUN_CI, "nmethodBucketsStale", PerfData::U_Events, CHECK);
+    _perf_total_buckets_stale_acc_count =
+        PerfDataManager::create_counter(SUN_CI, "nmethodBucketsStaleAccumulated", PerfData::U_Events, CHECK);
+  }
+}
+
+//
+// Walk the list of dependent nmethods searching for nmethods which
+// are dependent on the changes that were passed in and mark them for
+// deoptimization.  Returns the number of nmethods found.
+//
+int DependencyContext::mark_dependent_nmethods(DepChange& changes) {
+  int found = 0;
+  for (nmethodBucket* b = dependencies(); b != NULL; b = b->next()) {
+    nmethod* nm = b->get_nmethod();
+    // since dependencies aren't removed until an nmethod becomes a zombie,
+    // the dependency list may contain nmethods which aren't alive.
+    if (b->count() > 0 && nm->is_alive() && !nm->is_marked_for_deoptimization() && nm->check_dependency_on(changes)) {
+      if (TraceDependencies) {
+        ResourceMark rm;
+        tty->print_cr("Marked for deoptimization");
+        changes.print();
+        nm->print();
+        nm->print_dependencies();
+      }
+      nm->mark_for_deoptimization();
+      found++;
+    }
+  }
+  return found;
+}
+
+//
+// Add an nmethod to the dependency context.
+// It's possible that an nmethod has multiple dependencies on a klass
+// so a count is kept for each bucket to guarantee that creation and
+// deletion of dependencies is consistent.
+//
+void DependencyContext::add_dependent_nmethod(nmethod* nm, bool expunge) {
+  assert_lock_strong(CodeCache_lock);
+  for (nmethodBucket* b = dependencies(); b != NULL; b = b->next()) {
+    if (nm == b->get_nmethod()) {
+      b->increment();
+      return;
+    }
+  }
+  set_dependencies(new nmethodBucket(nm, dependencies()));
+  if (UsePerfData) {
+    _perf_total_buckets_allocated_count->inc();
+  }
+  if (expunge) {
+    // Remove stale entries from the list.
+    expunge_stale_entries();
+  }
+}
+
+//
+// Remove an nmethod dependency from the context.
+// Decrement count of the nmethod in the dependency list and, optionally, remove
+// the bucket completely when the count goes to 0.  This method must find
+// a corresponding bucket otherwise there's a bug in the recording of dependencies.
+// Can be called concurrently by parallel GC threads.
+//
+void DependencyContext::remove_dependent_nmethod(nmethod* nm, bool expunge) {
+  assert_locked_or_safepoint(CodeCache_lock);
+  nmethodBucket* first = dependencies();
+  nmethodBucket* last = NULL;
+  for (nmethodBucket* b = first; b != NULL; b = b->next()) {
+    if (nm == b->get_nmethod()) {
+      int val = b->decrement();
+      guarantee(val >= 0, "Underflow: %d", val);
+      if (val == 0) {
+        if (expunge) {
+          if (last == NULL) {
+            set_dependencies(b->next());
+          } else {
+            last->set_next(b->next());
+          }
+          delete b;
+          if (UsePerfData) {
+            _perf_total_buckets_deallocated_count->inc();
+          }
+        } else {
+          // Mark the context as having stale entries, since it is not safe to
+          // expunge the list right now.
+          set_has_stale_entries(true);
+          if (UsePerfData) {
+            _perf_total_buckets_stale_count->inc();
+            _perf_total_buckets_stale_acc_count->inc();
+          }
+        }
+      }
+      if (expunge) {
+        // Remove stale entries from the list.
+        expunge_stale_entries();
+      }
+      return;
+    }
+    last = b;
+  }
+#ifdef ASSERT
+  tty->print_raw_cr("### can't find dependent nmethod");
+  nm->print();
+#endif // ASSERT
+  ShouldNotReachHere();
+}
+
+//
+// Reclaim all unused buckets.
+//
+void DependencyContext::expunge_stale_entries() {
+  assert_locked_or_safepoint(CodeCache_lock);
+  if (!has_stale_entries()) {
+    assert(!find_stale_entries(), "inconsistent info");
+    return;
+  }
+  nmethodBucket* first = dependencies();
+  nmethodBucket* last = NULL;
+  int removed = 0;
+  for (nmethodBucket* b = first; b != NULL;) {
+    assert(b->count() >= 0, "bucket count: %d", b->count());
+    nmethodBucket* next = b->next();
+    if (b->count() == 0) {
+      if (last == NULL) {
+        first = next;
+      } else {
+        last->set_next(next);
+      }
+      removed++;
+      delete b;
+      // last stays the same.
+    } else {
+      last = b;
+    }
+    b = next;
+  }
+  set_dependencies(first);
+  set_has_stale_entries(false);
+  if (UsePerfData && removed > 0) {
+    _perf_total_buckets_deallocated_count->inc(removed);
+    _perf_total_buckets_stale_count->dec(removed);
+  }
+}
+
+//
+// Invalidate all dependencies in the context
+int DependencyContext::remove_all_dependents() {
+  assert_locked_or_safepoint(CodeCache_lock);
+  nmethodBucket* b = dependencies();
+  set_dependencies(NULL);
+  int marked = 0;
+  int removed = 0;
+  while (b != NULL) {
+    nmethod* nm = b->get_nmethod();
+    if (b->count() > 0 && nm->is_alive() && !nm->is_marked_for_deoptimization()) {
+      nm->mark_for_deoptimization();
+      marked++;
+    }
+    nmethodBucket* next = b->next();
+    removed++;
+    delete b;
+    b = next;
+  }
+  set_has_stale_entries(false);
+  if (UsePerfData && removed > 0) {
+    _perf_total_buckets_deallocated_count->inc(removed);
+  }
+  return marked;
+}
+
+#ifndef PRODUCT
+void DependencyContext::print_dependent_nmethods(bool verbose) {
+  int idx = 0;
+  for (nmethodBucket* b = dependencies(); b != NULL; b = b->next()) {
+    nmethod* nm = b->get_nmethod();
+    tty->print("[%d] count=%d { ", idx++, b->count());
+    if (!verbose) {
+      nm->print_on(tty, "nmethod");
+      tty->print_cr(" } ");
+    } else {
+      nm->print();
+      nm->print_dependencies();
+      tty->print_cr("--- } ");
+    }
+  }
+}
+
+bool DependencyContext::is_dependent_nmethod(nmethod* nm) {
+  for (nmethodBucket* b = dependencies(); b != NULL; b = b->next()) {
+    if (nm == b->get_nmethod()) {
+#ifdef ASSERT
+      int count = b->count();
+      assert(count >= 0, "count shouldn't be negative: %d", count);
+#endif
+      return true;
+    }
+  }
+  return false;
+}
+
+bool DependencyContext::find_stale_entries() {
+  for (nmethodBucket* b = dependencies(); b != NULL; b = b->next()) {
+    if (b->count() == 0)  return true;
+  }
+  return false;
+}
+
+#endif //PRODUCT
+
+int nmethodBucket::decrement() {
+  return Atomic::add(-1, (volatile int *)&_count);
+}
+
+/////////////// Unit tests ///////////////
+
+#ifndef PRODUCT
+
+class TestDependencyContext {
+ public:
+  nmethod* _nmethods[3];
+
+  intptr_t _dependency_context;
+
+  TestDependencyContext() : _dependency_context(DependencyContext::EMPTY) {
+    CodeCache_lock->lock_without_safepoint_check();
+
+    DependencyContext depContext(&_dependency_context);
+
+    _nmethods[0] = reinterpret_cast<nmethod*>(0x8 * 0);
+    _nmethods[1] = reinterpret_cast<nmethod*>(0x8 * 1);
+    _nmethods[2] = reinterpret_cast<nmethod*>(0x8 * 2);
+
+    depContext.add_dependent_nmethod(_nmethods[2]);
+    depContext.add_dependent_nmethod(_nmethods[1]);
+    depContext.add_dependent_nmethod(_nmethods[0]);
+  }
+
+  ~TestDependencyContext() {
+    wipe();
+    CodeCache_lock->unlock();
+  }
+
+  static void testRemoveDependentNmethod(int id, bool delete_immediately) {
+    TestDependencyContext c;
+    DependencyContext depContext(&c._dependency_context);
+    assert(!has_stale_entries(depContext), "check");
+
+    nmethod* nm = c._nmethods[id];
+    depContext.remove_dependent_nmethod(nm, delete_immediately);
+
+    if (!delete_immediately) {
+      assert(has_stale_entries(depContext), "check");
+      assert(depContext.is_dependent_nmethod(nm), "check");
+      depContext.expunge_stale_entries();
+    }
+
+    assert(!has_stale_entries(depContext), "check");
+    assert(!depContext.is_dependent_nmethod(nm), "check");
+  }
+
+  static void testRemoveDependentNmethod() {
+    testRemoveDependentNmethod(0, false);
+    testRemoveDependentNmethod(1, false);
+    testRemoveDependentNmethod(2, false);
+
+    testRemoveDependentNmethod(0, true);
+    testRemoveDependentNmethod(1, true);
+    testRemoveDependentNmethod(2, true);
+  }
+
+  static void test() {
+    testRemoveDependentNmethod();
+  }
+
+  static bool has_stale_entries(DependencyContext ctx) {
+    assert(ctx.has_stale_entries() == ctx.find_stale_entries(), "check");
+    return ctx.has_stale_entries();
+  }
+
+  void wipe() {
+    DependencyContext ctx(&_dependency_context);
+    nmethodBucket* b = ctx.dependencies();
+    ctx.set_dependencies(NULL);
+    ctx.set_has_stale_entries(false);
+    while (b != NULL) {
+      nmethodBucket* next = b->next();
+      delete b;
+      b = next;
+    }
+  }
+};
+
+void TestDependencyContext_test() {
+  TestDependencyContext::test();
+}
+
+#endif // PRODUCT

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/code/dependencyContext.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_CODE_DEPENDENCYCONTEXT_HPP
+#define SHARE_VM_CODE_DEPENDENCYCONTEXT_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+#include "runtime/handles.hpp"
+#include "runtime/perfData.hpp"
+
+class nmethod;
+class DepChange;
+
+//
+// nmethodBucket is used to record dependent nmethods for
+// deoptimization.  nmethod dependencies are actually <klass, method>
+// pairs but we really only care about the klass part for purposes of
+// finding nmethods which might need to be deoptimized.  Instead of
+// recording the method, a count of how many times a particular nmethod
+// was recorded is kept.  This ensures that any recording errors are
+// noticed since an nmethod should be removed as many times are it's
+// added.
+//
+class nmethodBucket: public CHeapObj<mtClass> {
+  friend class VMStructs;
+ private:
+  nmethod*       _nmethod;
+  int            _count;
+  nmethodBucket* _next;
+
+ public:
+  nmethodBucket(nmethod* nmethod, nmethodBucket* next) :
+   _nmethod(nmethod), _next(next), _count(1) {}
+
+  int count()                             { return _count; }
+  int increment()                         { _count += 1; return _count; }
+  int decrement();
+  nmethodBucket* next()                   { return _next; }
+  void set_next(nmethodBucket* b)         { _next = b; }
+  nmethod* get_nmethod()                  { return _nmethod; }
+};
+
+//
+// Utility class to manipulate nmethod dependency context.
+// The context consists of nmethodBucket* (a head of a linked list)
+// and a boolean flag (does the list contains stale entries). The structure is
+// encoded as an intptr_t: lower bit is used for the flag. It is possible since
+// nmethodBucket* is aligned - the structure is malloc'ed in C heap.
+// Dependency context can be attached either to an InstanceKlass (_dep_context field)
+// or CallSiteContext oop for call_site_target dependencies (see javaClasses.hpp).
+// DependencyContext class operates on some location which holds a intptr_t value.
+//
+class DependencyContext : public StackObj {
+  friend class VMStructs;
+  friend class TestDependencyContext;
+ private:
+  enum TagBits { _has_stale_entries_bit = 1, _has_stale_entries_mask = 1 };
+
+  intptr_t* _dependency_context_addr;
+
+  void set_dependencies(nmethodBucket* b) {
+    assert((intptr_t(b) & _has_stale_entries_mask) == 0, "should be aligned");
+    if (has_stale_entries()) {
+      *_dependency_context_addr = intptr_t(b) | _has_stale_entries_mask;
+    } else {
+      *_dependency_context_addr = intptr_t(b);
+    }
+  }
+
+  void set_has_stale_entries(bool x) {
+    if (x) {
+      *_dependency_context_addr |= _has_stale_entries_mask;
+    } else {
+      *_dependency_context_addr &= ~_has_stale_entries_mask;
+    }
+  }
+
+  nmethodBucket* dependencies() {
+    intptr_t value = *_dependency_context_addr;
+    return (nmethodBucket*) (value & ~_has_stale_entries_mask);
+  }
+
+  bool has_stale_entries() const {
+    intptr_t value = *_dependency_context_addr;
+    return (value & _has_stale_entries_mask) != 0;
+  }
+
+  static PerfCounter* _perf_total_buckets_allocated_count;
+  static PerfCounter* _perf_total_buckets_deallocated_count;
+  static PerfCounter* _perf_total_buckets_stale_count;
+  static PerfCounter* _perf_total_buckets_stale_acc_count;
+
+ public:
+#ifdef ASSERT
+  // Verification for dependency contexts rooted at Java objects.
+  Handle _base; // non-NULL if dependency context resides in an oop (e.g. CallSite).
+  oop _base_oop;
+
+  DependencyContext(intptr_t* addr, Handle base = Handle())
+    : _dependency_context_addr(addr), _base(base)
+  {
+      _base_oop = _base();
+  }
+
+  ~DependencyContext() {
+    // Base oop relocation invalidates _dependency_context_addr.
+    assert(_base_oop == _base(), "base oop relocation is forbidden");
+  }
+#else
+    DependencyContext(intptr_t* addr) : _dependency_context_addr(addr) {}
+#endif // ASSERT
+
+  static const intptr_t EMPTY = 0; // dependencies = NULL, has_stale_entries = false
+
+  static void init();
+
+  int  mark_dependent_nmethods(DepChange& changes);
+  void add_dependent_nmethod(nmethod* nm, bool expunge_stale_entries = false);
+  void remove_dependent_nmethod(nmethod* nm, bool expunge_stale_entries = false);
+  int  remove_all_dependents();
+
+  void expunge_stale_entries();
+
+#ifndef PRODUCT
+  void print_dependent_nmethods(bool verbose);
+  bool is_dependent_nmethod(nmethod* nm);
+  bool find_stale_entries();
+#endif //PRODUCT
+};
+#endif // SHARE_VM_CODE_DEPENDENCYCONTEXT_HPP

--- a/hotspot/src/share/vm/code/nmethod.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/code/nmethod.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1539,7 +1539,7 @@
   if (PrintMethodFlushing) {
     tty->print_cr("*flushing nmethod %3d/" INTPTR_FORMAT ". Live blobs:" UINT32_FORMAT
                   "/Free CodeCache:" SIZE_FORMAT "Kb",
-                  _compile_id, p2i(this), CodeCache::nof_blobs(),
+                  _compile_id, p2i(this), CodeCache::blob_count(),
                   CodeCache::unallocated_capacity(CodeCache::get_code_blob_type(this))/1024);
   }
 
@@ -1819,9 +1819,7 @@
   if (_jvmci_installed_code != NULL) {
     if (_jvmci_installed_code->is_a(HotSpotNmethod::klass()) && HotSpotNmethod::isDefault(_jvmci_installed_code)) {
       if (!is_alive->do_object_b(_jvmci_installed_code)) {
-        bs->write_ref_nmethod_pre(&_jvmci_installed_code, this);
-        _jvmci_installed_code = NULL;
-        bs->write_ref_nmethod_post(&_jvmci_installed_code, this);
+        clear_jvmci_installed_code();
       }
     } else {
       if (can_unload(is_alive, (oop*)&_jvmci_installed_code, unloading_occurred)) {
@@ -1922,27 +1920,6 @@
     unloading_occurred = true;
   }
 
-#if INCLUDE_JVMCI
-  // Follow JVMCI method
-  if (_jvmci_installed_code != NULL) {
-    if (_jvmci_installed_code->is_a(HotSpotNmethod::klass()) && HotSpotNmethod::isDefault(_jvmci_installed_code)) {
-      if (!is_alive->do_object_b(_jvmci_installed_code)) {
-        _jvmci_installed_code = NULL;
-      }
-    } else {
-      if (can_unload(is_alive, (oop*)&_jvmci_installed_code, unloading_occurred)) {
-        return false;
-      }
-    }
-  }
-
-  if (_speculation_log != NULL) {
-    if (!is_alive->do_object_b(_speculation_log)) {
-      _speculation_log = NULL;
-    }
-  }
-#endif
-
   // Exception cache
   clean_exception_cache(is_alive);
 
@@ -2006,9 +1983,7 @@
   if (_jvmci_installed_code != NULL) {
     if (_jvmci_installed_code->is_a(HotSpotNmethod::klass()) && HotSpotNmethod::isDefault(_jvmci_installed_code)) {
       if (!is_alive->do_object_b(_jvmci_installed_code)) {
-        bs->write_ref_nmethod_pre(&_jvmci_installed_code, this);
-        _jvmci_installed_code = NULL;
-        bs->write_ref_nmethod_post(&_jvmci_installed_code, this);
+        clear_jvmci_installed_code();
       }
     } else {
       if (can_unload(is_alive, (oop*)&_jvmci_installed_code, unloading_occurred)) {
@@ -2271,7 +2246,7 @@
           break;
       }
       // Mark was clear when we first saw this guy.
-      NOT_PRODUCT(if (TraceScavenge)  print_on(tty, "oops_do, mark"));
+      if (TraceScavenge) { print_on(tty, "oops_do, mark"); }
       return false;
     }
   }
@@ -2280,7 +2255,7 @@
 }
 
 void nmethod::oops_do_marking_prologue() {
-  NOT_PRODUCT(if (TraceScavenge)  tty->print_cr("[oops_do_marking_prologue"));
+  if (TraceScavenge) { tty->print_cr("[oops_do_marking_prologue"); }
   assert(_oops_do_mark_nmethods == NULL, "must not call oops_do_marking_prologue twice in a row");
   // We use cmpxchg_ptr instead of regular assignment here because the user
   // may fork a bunch of threads, and we need them all to see the same state.
@@ -2302,7 +2277,7 @@
   void* required = _oops_do_mark_nmethods;
   void* observed = Atomic::cmpxchg_ptr(NULL, &_oops_do_mark_nmethods, required);
   guarantee(observed == required, "no races in this sequential code");
-  NOT_PRODUCT(if (TraceScavenge)  tty->print_cr("oops_do_marking_epilogue]"));
+  if (TraceScavenge) { tty->print_cr("oops_do_marking_epilogue]"); }
 }
 
 class DetectScavengeRoot: public OopClosure {
@@ -3373,6 +3348,14 @@
 #endif // !PRODUCT
 
 #if INCLUDE_JVMCI
+void nmethod::clear_jvmci_installed_code() {
+  // This must be done carefully to maintain nmethod remembered sets properly
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  bs->write_ref_nmethod_pre(&_jvmci_installed_code, this);
+  _jvmci_installed_code = NULL;
+  bs->write_ref_nmethod_post(&_jvmci_installed_code, this);
+}
+
 void nmethod::maybe_invalidate_installed_code() {
   if (_jvmci_installed_code != NULL) {
      if (!is_alive()) {
@@ -3382,7 +3365,7 @@
        // might want to invalidate all existing activations.
        InstalledCode::set_address(_jvmci_installed_code, 0);
        InstalledCode::set_entryPoint(_jvmci_installed_code, 0);
-       _jvmci_installed_code = NULL;
+       clear_jvmci_installed_code();
      } else if (is_not_entrant()) {
        InstalledCode::set_entryPoint(_jvmci_installed_code, 0);
      }

--- a/hotspot/src/share/vm/code/nmethod.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/code/nmethod.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -602,7 +602,7 @@
 #if INCLUDE_JVMCI
   oop jvmci_installed_code() { return _jvmci_installed_code ; }
   char* jvmci_installed_code_name(char* buf, size_t buflen);
-  void set_jvmci_installed_code(oop installed_code) { _jvmci_installed_code = installed_code;  }
+  void clear_jvmci_installed_code();
   void maybe_invalidate_installed_code();
   oop speculation_log() { return _speculation_log ; }
   void set_speculation_log(oop speculation_log) { _speculation_log = speculation_log;  }

--- a/hotspot/src/share/vm/compiler/compileBroker.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/compiler/compileBroker.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -26,6 +26,7 @@
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "code/codeCache.hpp"
+#include "code/dependencyContext.hpp"
 #include "compiler/compileBroker.hpp"
 #include "compiler/compileLog.hpp"
 #include "compiler/compilerOracle.hpp"
@@ -237,10 +238,27 @@
   task->set_code_handle(NULL);
   thread->set_env(NULL);
   if (task->is_blocking()) {
-    MutexLocker notifier(task->lock(), thread);
-    task->mark_complete();
-    // Notify the waiting thread that the compilation has completed.
-    task->lock()->notify_all();
+    bool free_task = false;
+    {
+      MutexLocker notifier(task->lock(), thread);
+      task->mark_complete();
+#if INCLUDE_JVMCI
+      if (CompileBroker::compiler(task->comp_level())->is_jvmci() &&
+        !task->has_waiter()) {
+        // The waiting thread timed out and thus did not free the task.
+        free_task = true;
+      }
+#endif
+      if (!free_task) {
+        // Notify the waiting thread that the compilation has completed
+        // so that it can free the task.
+        task->lock()->notify_all();
+      }
+    }
+    if (free_task) {
+      // The task can only be freed once the task lock is released.
+      CompileTask::free(task);
+    }
   } else {
     task->mark_complete();
 
@@ -547,7 +565,6 @@
                                                  PerfData::U_Ticks, CHECK);
   }
 
-
   if (UsePerfData) {
 
     EXCEPTION_MARK;
@@ -1302,6 +1319,11 @@
   return new_task;
 }
 
+// 1 second should be long enough to complete most JVMCI compilations
+// and not too long to stall a blocking JVMCI compilation that
+// is trying to acquire a lock held by the app thread that submitted the
+// compilation.
+static const long BLOCKING_JVMCI_COMPILATION_TIMEOUT = 1000;
 
 /**
  *  Wait for the compilation task to complete.
@@ -1318,30 +1340,47 @@
   thread->set_blocked_on_compilation(true);
 
   methodHandle method(thread, task->method());
+  bool free_task;
+#if INCLUDE_JVMCI
+  if (compiler(task->comp_level())->is_jvmci()) {
+    MutexLocker waiter(task->lock(), thread);
+    // No need to check if compilation has completed - just
+    // rely on the time out. The JVMCI compiler thread will
+    // recycle the CompileTask.
+    task->lock()->wait(!Mutex::_no_safepoint_check_flag, BLOCKING_JVMCI_COMPILATION_TIMEOUT);
+    // If the compilation completes while has_waiter is true then
+    // this thread is responsible for freeing the task.  Otherwise
+    // the compiler thread will free the task.
+    task->clear_waiter();
+    free_task = task->is_complete();
+  } else
+#endif
   {
     MutexLocker waiter(task->lock(), thread);
-
+    free_task = true;
     while (!task->is_complete() && !is_compilation_disabled_forever()) {
       task->lock()->wait();
     }
   }
 
   thread->set_blocked_on_compilation(false);
-  if (is_compilation_disabled_forever()) {
-    CompileTask::free(task);
-    return;
-  }
+  if (free_task) {
+    if (is_compilation_disabled_forever()) {
+      CompileTask::free(task);
+      return;
+    }
 
-  // It is harmless to check this status without the lock, because
-  // completion is a stable property (until the task object is recycled).
-  assert(task->is_complete(), "Compilation should have completed");
-  assert(task->code_handle() == NULL, "must be reset");
+    // It is harmless to check this status without the lock, because
+    // completion is a stable property (until the task object is recycled).
+    assert(task->is_complete(), "Compilation should have completed");
+    assert(task->code_handle() == NULL, "must be reset");
 
-  // By convention, the waiter is responsible for recycling a
-  // blocking CompileTask. Since there is only one waiter ever
-  // waiting on a CompileTask, we know that no one else will
-  // be using this CompileTask; we can free it.
-  CompileTask::free(task);
+    // By convention, the waiter is responsible for recycling a
+    // blocking CompileTask. Since there is only one waiter ever
+    // waiting on a CompileTask, we know that no one else will
+    // be using this CompileTask; we can free it.
+    CompileTask::free(task);
+  }
 }
 
 /**
@@ -1676,13 +1715,7 @@
   bool should_break = false;
   int task_level = task->comp_level();
 
-  // Look up matching directives
-  DirectiveSet* directive = DirectivesStack::getMatchingDirective(task->method(), compiler(task_level));
-
-  should_break = directive->BreakAtExecuteOption || task->check_break_at_flags();
-  if (should_log && !directive->LogOption) {
-    should_log = false;
-  }
+  DirectiveSet* directive;
   {
     // create the handle inside it's own block so it can't
     // accidentally be referenced once the thread transitions to
@@ -1691,12 +1724,20 @@
     methodHandle method(thread, task->method());
     assert(!method->is_native(), "no longer compile natives");
 
+    // Look up matching directives
+    directive = DirectivesStack::getMatchingDirective(method, compiler(task_level));
+
     // Save information about this method in case of failure.
     set_last_compile(thread, method, is_osr, task_level);
 
     DTRACE_METHOD_COMPILE_BEGIN_PROBE(method, compiler_name(task_level));
   }
 
+  should_break = directive->BreakAtExecuteOption || task->check_break_at_flags();
+  if (should_log && !directive->LogOption) {
+    should_log = false;
+  }
+
   // Allocate a new set of JNI handles.
   push_jni_handle_block();
   Method* target_handle = task->method();
@@ -1716,7 +1757,8 @@
     EventCompilation event;
 
     JVMCIEnv env(task, system_dictionary_modification_counter);
-    jvmci->compile_method(target_handle, osr_bci, &env);
+    methodHandle method(thread, target_handle);
+    jvmci->compile_method(method, osr_bci, &env);
 
     post_compile(thread, task, event, task->code() != NULL, NULL);
   } else

--- a/hotspot/src/share/vm/compiler/compileTask.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/compiler/compileTask.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -47,7 +47,7 @@
   } else {
     task = new CompileTask();
     DEBUG_ONLY(_num_allocated_tasks++;)
-    assert (WhiteBoxAPI || _num_allocated_tasks < 10000, "Leaking compilation tasks?");
+    assert (WhiteBoxAPI || JVMCI_ONLY(UseJVMCICompiler ||) _num_allocated_tasks < 10000, "Leaking compilation tasks?");
     task->set_next(NULL);
     task->set_is_free(true);
   }
@@ -90,6 +90,7 @@
   _method_holder = JNIHandles::make_global(method->method_holder()->klass_holder());
   _osr_bci = osr_bci;
   _is_blocking = is_blocking;
+  JVMCI_ONLY(_has_waiter = CompileBroker::compiler(comp_level)->is_jvmci();)
   _comp_level = comp_level;
   _num_inlined_bytecodes = 0;

--- a/hotspot/src/share/vm/compiler/compileTask.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/compiler/compileTask.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -53,6 +53,9 @@
   bool         _is_complete;
   bool         _is_success;
   bool         _is_blocking;
+#if INCLUDE_JVMCI
+  bool         _has_waiter;
+#endif
   int          _comp_level;
   int          _num_inlined_bytecodes;
   nmethodLocker* _code_handle;  // holder of eventual result
@@ -85,6 +88,10 @@
   bool         is_complete() const               { return _is_complete; }
   bool         is_blocking() const               { return _is_blocking; }
   bool         is_success() const                { return _is_success; }
+#if INCLUDE_JVMCI
+  bool         has_waiter() const                { return _has_waiter; }
+  void         clear_waiter()                    { _has_waiter = false; }
+#endif
 
   nmethodLocker* code_handle() const             { return _code_handle; }
   void         set_code_handle(nmethodLocker* l) { _code_handle = l; }

--- a/hotspot/src/share/vm/compiler/compilerDirectives.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/compiler/compilerDirectives.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -527,12 +527,14 @@
 
 DirectiveSet* DirectivesStack::getMatchingDirective(methodHandle method, AbstractCompiler *comp) {
   assert(_depth > 0, "Must never be empty");
-  CompilerDirectives* dir = _top;
-  assert(dir != NULL, "Must be initialized");
 
   DirectiveSet* match = NULL;
   {
     MutexLockerEx locker(DirectivesStack_lock, Mutex::_no_safepoint_check_flag);
+
+    CompilerDirectives* dir = _top;
+    assert(dir != NULL, "Must be initialized");
+
     while (dir != NULL) {
       if (dir->is_default_directive() || dir->match(method)) {
         match = dir->get_for(comp);

--- a/hotspot/src/share/vm/compiler/compilerDirectives.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/compiler/compilerDirectives.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -67,7 +67,7 @@
     cflags(VectorizeDebug,          bool, false, VectorizeDebug) \
     cflags(CloneMapDebug,           bool, false, CloneMapDebug) \
     cflags(DoReserveCopyInSuperWordDebug, bool, false, DoReserveCopyInSuperWordDebug) \
-    NOT_PRODUCT( cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLevel)) \
+    cflags(IGVPrintLevel,           intx, PrintIdealGraphLevel, IGVPrintLevel) \
     cflags(MaxNodeLimit,            intx, MaxNodeLimit, MaxNodeLimit)
 #else
   #define compilerdirectives_c2_flags(cflags)

--- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1148,7 +1148,6 @@
   }
   assert(new_obj != NULL, "just checking");
 
-#ifndef PRODUCT
   // This code must come after the CAS test, or it will print incorrect
   // information.
   if (TraceScavenge) {
@@ -1156,7 +1155,6 @@
        is_in_reserved(new_obj) ? "copying" : "tenuring",
        new_obj->klass()->internal_name(), p2i(old), p2i(new_obj), new_obj->size());
   }
-#endif
 
   if (forward_ptr == NULL) {
     oop obj_to_push = new_obj;

--- a/hotspot/src/share/vm/gc/cms/parOopClosures.inline.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/gc/cms/parOopClosures.inline.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -108,14 +108,11 @@
       if (m->is_marked()) { // Contains forwarding pointer.
         new_obj = ParNewGeneration::real_forwardee(obj);
         oopDesc::encode_store_heap_oop_not_null(p, new_obj);
-#ifndef PRODUCT
         if (TraceScavenge) {
           gclog_or_tty->print_cr("{%s %s ( " PTR_FORMAT " ) " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
              "forwarded ",
              new_obj->klass()->internal_name(), p2i(p), p2i((void *)obj), p2i((void *)new_obj), new_obj->size());
         }
-#endif
-
       } else {
         size_t obj_sz = obj->size_given_klass(objK);
         new_obj = _g->copy_to_survivor_space(_par_scan_state, obj, obj_sz, m);

--- a/hotspot/src/share/vm/gc/parallel/psPromotionManager.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/gc/parallel/psPromotionManager.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -430,7 +430,6 @@
     obj = obj->forwardee();
   }
 
-#ifndef PRODUCT
   if (TraceScavenge) {
     gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " (%d)}",
                            "promotion-failure",
@@ -438,7 +437,6 @@
                            p2i(obj), obj->size());
 
   }
-#endif
 
   return obj;
 }

--- a/hotspot/src/share/vm/gc/parallel/psPromotionManager.inline.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/gc/parallel/psPromotionManager.inline.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -260,7 +260,6 @@
     new_obj = o->forwardee();
   }
 
-#ifndef PRODUCT
   // This code must come after the CAS test, or it will print incorrect
   // information.
   if (TraceScavenge) {
@@ -268,7 +267,6 @@
        should_scavenge(&new_obj) ? "copying" : "tenuring",
        new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size());
   }
-#endif
 
   return new_obj;
 }
@@ -285,15 +283,13 @@
         ? o->forwardee()
         : copy_to_survivor_space<promote_immediately>(o);
 
-#ifndef PRODUCT
   // This code must come after the CAS test, or it will print incorrect
   // information.
-  if (TraceScavenge &&  o->is_forwarded()) {
+  if (TraceScavenge && o->is_forwarded()) {
     gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
        "forwarding",
        new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size());
   }
-#endif
 
   oopDesc::encode_store_heap_oop_not_null(p, new_obj);

--- a/hotspot/src/share/vm/gc/parallel/psScavenge.inline.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/gc/parallel/psScavenge.inline.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -138,7 +138,6 @@
     // If the klass has not been dirtied we know that there's
     // no references into  the young gen and we can skip it.
 
-#ifndef PRODUCT
     if (TraceScavenge) {
       ResourceMark rm;
       gclog_or_tty->print_cr("PSScavengeKlassClosure::do_klass " PTR_FORMAT ", %s, dirty: %s",
@@ -146,7 +145,6 @@
                              klass->external_name(),
                              klass->has_modified_oops() ? "true" : "false");
     }
-#endif
 
     if (klass->has_modified_oops()) {
       // Clean the klass since we're going to scavenge all the metadata.

--- a/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -134,7 +134,6 @@
 void FastScanClosure::do_oop(narrowOop* p) { FastScanClosure::do_oop_work(p); }
 
 void KlassScanClosure::do_klass(Klass* klass) {
-#ifndef PRODUCT
   if (TraceScavenge) {
     ResourceMark rm;
     gclog_or_tty->print_cr("KlassScanClosure::do_klass " PTR_FORMAT ", %s, dirty: %s",
@@ -142,7 +141,6 @@
                            klass->external_name(),
                            klass->has_modified_oops() ? "true" : "false");
   }
-#endif
 
   // If the klass has not been dirtied we know that there's
   // no references into  the young gen and we can skip it.

--- a/hotspot/src/share/vm/interpreter/interpreter.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/interpreter/interpreter.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -300,7 +300,10 @@
   }
 
   // Accessor method?
-  if (m->is_accessor()) {
+  if (m->is_getter()) {
+    // TODO: We should have used ::is_accessor above, but fast accessors in Zero expect only getters.
+    // See CppInterpreter::accessor_entry in cppInterpreter_zero.cpp. This should be fixed in Zero,
+    // then the call above updated to ::is_accessor
     assert(m->size_of_parameters() == 1, "fast code for accessors assumes parameter size = 1");
     return accessor;
   }

--- a/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -71,62 +71,97 @@
   return CompilerToVM::asMethod(hotspot_method);
 }
 
-VMReg getVMRegFromLocation(oop location, int total_frame_size) {
-  oop reg = code_Location::reg(location);
+VMReg getVMRegFromLocation(Handle location, int total_frame_size, TRAPS) {
+  if (location.is_null()) {
+    THROW_NULL(vmSymbols::java_lang_NullPointerException());
+  }
+
+  Handle reg = code_Location::reg(location);
   jint offset = code_Location::offset(location);
 
-  if (reg != NULL) {
+  if (reg.not_null()) {
     // register
     jint number = code_Register::number(reg);
-    VMReg vmReg = CodeInstaller::get_hotspot_reg(number);
-    assert(offset % 4 == 0, "must be aligned");
-    return vmReg->next(offset / 4);
+    VMReg vmReg = CodeInstaller::get_hotspot_reg(number, CHECK_NULL);
+    if (offset % 4 == 0) {
+      return vmReg->next(offset / 4);
+    } else {
+      JVMCI_ERROR_NULL("unaligned subregister offset %d in oop map", offset);
+    }
   } else {
     // stack slot
-    assert(offset % 4 == 0, "must be aligned");
-    return VMRegImpl::stack2reg(offset / 4);
+    if (offset % 4 == 0) {
+      return VMRegImpl::stack2reg(offset / 4);
+    } else {
+      JVMCI_ERROR_NULL("unaligned stack offset %d in oop map", offset);
+    }
   }
 }
 
 // creates a HotSpot oop map out of the byte arrays provided by DebugInfo
-OopMap* CodeInstaller::create_oop_map(oop debug_info) {
-  oop reference_map = DebugInfo::referenceMap(debug_info);
+OopMap* CodeInstaller::create_oop_map(Handle debug_info, TRAPS) {
+  Handle reference_map = DebugInfo::referenceMap(debug_info);
+  if (reference_map.is_null()) {
+    THROW_NULL(vmSymbols::java_lang_NullPointerException());
+  }
+  if (!reference_map->is_a(HotSpotReferenceMap::klass())) {
+    JVMCI_ERROR_NULL("unknown reference map: %s", reference_map->klass()->signature_name());
+  }
   if (HotSpotReferenceMap::maxRegisterSize(reference_map) > 16) {
     _has_wide_vector = true;
   }
   OopMap* map = new OopMap(_total_frame_size, _parameter_count);
-  objArrayOop objects = HotSpotReferenceMap::objects(reference_map);
-  objArrayOop derivedBase = HotSpotReferenceMap::derivedBase(reference_map);
-  typeArrayOop sizeInBytes = HotSpotReferenceMap::sizeInBytes(reference_map);
+  objArrayHandle objects = HotSpotReferenceMap::objects(reference_map);
+  objArrayHandle derivedBase = HotSpotReferenceMap::derivedBase(reference_map);
+  typeArrayHandle sizeInBytes = HotSpotReferenceMap::sizeInBytes(reference_map);
+  if (objects.is_null() || derivedBase.is_null() || sizeInBytes.is_null()) {
+    THROW_NULL(vmSymbols::java_lang_NullPointerException());
+  }
+  if (objects->length() != derivedBase->length() || objects->length() != sizeInBytes->length()) {
+    JVMCI_ERROR_NULL("arrays in reference map have different sizes: %d %d %d", objects->length(), derivedBase->length(), sizeInBytes->length());
+  }
   for (int i = 0; i < objects->length(); i++) {
-    oop location = objects->obj_at(i);
-    oop baseLocation = derivedBase->obj_at(i);
+    Handle location = objects->obj_at(i);
+    Handle baseLocation = derivedBase->obj_at(i);
     int bytes = sizeInBytes->int_at(i);
 
-    VMReg vmReg = getVMRegFromLocation(location, _total_frame_size);
-    if (baseLocation != NULL) {
+    VMReg vmReg = getVMRegFromLocation(location, _total_frame_size, CHECK_NULL);
+    if (baseLocation.not_null()) {
       // derived oop
-      assert(bytes == 8, "derived oop can't be compressed");
-      VMReg baseReg = getVMRegFromLocation(baseLocation, _total_frame_size);
-      map->set_derived_oop(vmReg, baseReg);
+#ifdef _LP64
+      if (bytes == 8) {
+#else
+      if (bytes == 4) {
+#endif
+        VMReg baseReg = getVMRegFromLocation(baseLocation, _total_frame_size, CHECK_NULL);
+        map->set_derived_oop(vmReg, baseReg);
+      } else {
+        JVMCI_ERROR_NULL("invalid derived oop size in ReferenceMap: %d", bytes);
+      }
+#ifdef _LP64
     } else if (bytes == 8) {
       // wide oop
       map->set_oop(vmReg);
-    } else {
+    } else if (bytes == 4) {
       // narrow oop
-      assert(bytes == 4, "wrong size");
       map->set_narrowoop(vmReg);
+#else
+    } else if (bytes == 4) {
+      map->set_oop(vmReg);
+#endif
+    } else {
+      JVMCI_ERROR_NULL("invalid oop size in ReferenceMap: %d", bytes);
     }
   }
 
-  oop callee_save_info = (oop) DebugInfo::calleeSaveInfo(debug_info);
-  if (callee_save_info != NULL) {
-    objArrayOop registers = RegisterSaveLayout::registers(callee_save_info);
-    typeArrayOop slots = RegisterSaveLayout::slots(callee_save_info);
+  Handle callee_save_info = (oop) DebugInfo::calleeSaveInfo(debug_info);
+  if (callee_save_info.not_null()) {
+    objArrayHandle registers = RegisterSaveLayout::registers(callee_save_info);
+    typeArrayHandle slots = RegisterSaveLayout::slots(callee_save_info);
     for (jint i = 0; i < slots->length(); i++) {
-      oop jvmci_reg = registers->obj_at(i);
+      Handle jvmci_reg = registers->obj_at(i);
       jint jvmci_reg_number = code_Register::number(jvmci_reg);
-      VMReg hotspot_reg = CodeInstaller::get_hotspot_reg(jvmci_reg_number);
+      VMReg hotspot_reg = CodeInstaller::get_hotspot_reg(jvmci_reg_number, CHECK_NULL);
       // HotSpot stack slots are 4 bytes
       jint jvmci_slot = slots->int_at(i);
       jint hotspot_slot = jvmci_slot * VMRegImpl::slots_per_word;
@@ -142,7 +177,7 @@
   return map;
 }
 
-Metadata* CodeInstaller::record_metadata_reference(Handle& constant) {
+Metadata* CodeInstaller::record_metadata_reference(Handle constant, TRAPS) {
   oop obj = HotSpotMetaspaceConstantImpl::metaspaceObject(constant);
   if (obj->is_a(HotSpotResolvedObjectTypeImpl::klass())) {
     Klass* klass = java_lang_Class::as_Klass(HotSpotResolvedObjectTypeImpl::javaClass(obj));
@@ -157,16 +192,18 @@
     TRACE_jvmci_3("metadata[%d of %d] = %s", index, _oop_recorder->metadata_count(), method->name()->as_C_string());
     return method;
   } else {
-    fatal("unexpected metadata reference for constant of type %s", obj->klass()->name()->as_C_string());
-    return NULL;
+    JVMCI_ERROR_NULL("unexpected metadata reference for constant of type %s", obj->klass()->signature_name());
   }
 }
 
 #ifdef _LP64
-narrowKlass CodeInstaller::record_narrow_metadata_reference(Handle& constant) {
+narrowKlass CodeInstaller::record_narrow_metadata_reference(Handle constant, TRAPS) {
   oop obj = HotSpotMetaspaceConstantImpl::metaspaceObject(constant);
   assert(HotSpotMetaspaceConstantImpl::compressed(constant), "unexpected uncompressed pointer");
-  assert(obj->is_a(HotSpotResolvedObjectTypeImpl::klass()), "unexpected compressed pointer of type %s", obj->klass()->name()->as_C_string());
+
+  if (!obj->is_a(HotSpotResolvedObjectTypeImpl::klass())) {
+    JVMCI_ERROR_0("unexpected compressed pointer of type %s", obj->klass()->signature_name());
+  }
 
   Klass* klass = java_lang_Class::as_Klass(HotSpotResolvedObjectTypeImpl::javaClass(obj));
   int index = _oop_recorder->find_index(klass);
@@ -175,9 +212,9 @@
 }
 #endif
 
-Location::Type CodeInstaller::get_oop_type(oop value) {
-  oop lirKind = Value::lirKind(value);
-  oop platformKind = LIRKind::platformKind(lirKind);
+Location::Type CodeInstaller::get_oop_type(Handle value) {
+  Handle lirKind = Value::lirKind(value);
+  Handle platformKind = LIRKind::platformKind(lirKind);
   assert(LIRKind::referenceMask(lirKind) == 1, "unexpected referenceMask");
 
   if (platformKind == word_kind()) {
@@ -187,24 +224,29 @@
   }
 }
 
-ScopeValue* CodeInstaller::get_scope_value(oop value, BasicType type, GrowableArray<ScopeValue*>* objects, ScopeValue* &second) {
+ScopeValue* CodeInstaller::get_scope_value(Handle value, BasicType type, GrowableArray<ScopeValue*>* objects, ScopeValue* &second, TRAPS) {
   second = NULL;
-  if (value == Value::ILLEGAL()) {
-    assert(type == T_ILLEGAL, "expected legal value");
+  if (value.is_null()) {
+    THROW_NULL(vmSymbols::java_lang_NullPointerException());
+  } else if (value == Value::ILLEGAL()) {
+    if (type != T_ILLEGAL) {
+      JVMCI_ERROR_NULL("unexpected illegal value, expected %s", basictype_to_str(type));
+    }
     return _illegal_value;
   } else if (value->is_a(RegisterValue::klass())) {
-    oop reg = RegisterValue::reg(value);
+    Handle reg = RegisterValue::reg(value);
     jint number = code_Register::number(reg);
-    VMReg hotspotRegister = get_hotspot_reg(number);
+    VMReg hotspotRegister = get_hotspot_reg(number, CHECK_NULL);
     if (is_general_purpose_reg(hotspotRegister)) {
       Location::Type locationType;
       if (type == T_OBJECT) {
         locationType = get_oop_type(value);
       } else if (type == T_LONG) {
         locationType = Location::lng;
+      } else if (type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN) {
+        locationType = Location::int_in_long;
       } else {
-        assert(type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN, "unexpected type in cpu register");
-        locationType = Location::int_in_long;
+        JVMCI_ERROR_NULL("unexpected type %s in cpu register", basictype_to_str(type));
       }
       ScopeValue* value = new LocationValue(Location::new_reg_loc(locationType, hotspotRegister));
       if (type == T_LONG) {
@@ -212,13 +254,14 @@
       }
       return value;
     } else {
-      assert(type == T_FLOAT || type == T_DOUBLE, "only float and double expected in xmm register");
       Location::Type locationType;
       if (type == T_FLOAT) {
         // this seems weird, but the same value is used in c1_LinearScan
         locationType = Location::normal;
+      } else if (type == T_DOUBLE) {
+        locationType = Location::dbl;
       } else {
-        locationType = Location::dbl;
+        JVMCI_ERROR_NULL("unexpected type %s in floating point register", basictype_to_str(type));
       }
       ScopeValue* value = new LocationValue(Location::new_reg_loc(locationType, hotspotRegister));
       if (type == T_DOUBLE) {
@@ -239,9 +282,10 @@
       locationType = Location::lng;
     } else if (type == T_DOUBLE) {
       locationType = Location::dbl;
+    } else if (type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN) {
+      locationType = Location::normal;
     } else {
-      assert(type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN, "unexpected type in stack slot");
-      locationType = Location::normal;
+      JVMCI_ERROR_NULL("unexpected type %s in stack slot", basictype_to_str(type));
     }
     ScopeValue* value = new LocationValue(Location::new_stk_loc(locationType, offset));
     if (type == T_DOUBLE || type == T_LONG) {
@@ -254,7 +298,10 @@
         jlong prim = PrimitiveConstant::primitive(value);
         return new ConstantLongValue(prim);
       } else {
-        assert(type == JVMCIRuntime::kindToBasicType(JavaKind::typeChar(PrimitiveConstant::kind(value))), "primitive constant type doesn't match");
+        BasicType constantType = JVMCIRuntime::kindToBasicType(PrimitiveConstant::kind(value), CHECK_NULL);
+        if (type != constantType) {
+          JVMCI_ERROR_NULL("primitive constant type doesn't match, expected %s but got %s", basictype_to_str(type), basictype_to_str(constantType));
+        }
         if (type == T_INT || type == T_FLOAT) {
           jint prim = (jint)PrimitiveConstant::primitive(value);
           switch (prim) {
@@ -264,53 +311,63 @@
             case  2: return _int_2_scope_value;
             default: return new ConstantIntValue(prim);
           }
-        } else {
-          assert(type == T_LONG || type == T_DOUBLE, "unexpected primitive constant type");
+        } else if (type == T_LONG || type == T_DOUBLE) {
           jlong prim = PrimitiveConstant::primitive(value);
           second = _int_1_scope_value;
           return new ConstantLongValue(prim);
+        } else {
+          JVMCI_ERROR_NULL("unexpected primitive constant type %s", basictype_to_str(type));
         }
       }
-    } else {
-      assert(type == T_OBJECT, "unexpected object constant");
-      if (value->is_a(NullConstant::klass()) || value->is_a(HotSpotCompressedNullConstant::klass())) {
+    } else if (value->is_a(NullConstant::klass()) || value->is_a(HotSpotCompressedNullConstant::klass())) {
+      if (type == T_OBJECT) {
         return _oop_null_scope_value;
       } else {
-        assert(value->is_a(HotSpotObjectConstantImpl::klass()), "unexpected constant type");
+        JVMCI_ERROR_NULL("unexpected null constant, expected %s", basictype_to_str(type));
+      }
+    } else if (value->is_a(HotSpotObjectConstantImpl::klass())) {
+      if (type == T_OBJECT) {
         oop obj = HotSpotObjectConstantImpl::object(value);
-        assert(obj != NULL, "null value must be in NullConstant");
+        if (obj == NULL) {
+          JVMCI_ERROR_NULL("null value must be in NullConstant");
+        }
         return new ConstantOopWriteValue(JNIHandles::make_local(obj));
+      } else {
+        JVMCI_ERROR_NULL("unexpected object constant, expected %s", basictype_to_str(type));
       }
     }
   } else if (value->is_a(VirtualObject::klass())) {
-    assert(type == T_OBJECT, "unexpected virtual object");
-    int id = VirtualObject::id(value);
-    ScopeValue* object = objects->at(id);
-    assert(object != NULL, "missing value");
-    return object;
-  } else {
-    value->klass()->print();
-    value->print();
+    if (type == T_OBJECT) {
+      int id = VirtualObject::id(value);
+      if (0 <= id && id < objects->length()) {
+        ScopeValue* object = objects->at(id);
+        if (object != NULL) {
+          return object;
+        }
+      }
+      JVMCI_ERROR_NULL("unknown virtual object id %d", id);
+    } else {
+      JVMCI_ERROR_NULL("unexpected virtual object, expected %s", basictype_to_str(type));
+    }
   }
-  ShouldNotReachHere();
-  return NULL;
+
+  JVMCI_ERROR_NULL("unexpected value in scope: %s", value->klass()->signature_name())
 }
 
-void CodeInstaller::record_object_value(ObjectValue* sv, oop value, GrowableArray<ScopeValue*>* objects) {
-  oop type = VirtualObject::type(value);
+void CodeInstaller::record_object_value(ObjectValue* sv, Handle value, GrowableArray<ScopeValue*>* objects, TRAPS) {
+  Handle type = VirtualObject::type(value);
   int id = VirtualObject::id(value);
   oop javaMirror = HotSpotResolvedObjectTypeImpl::javaClass(type);
   Klass* klass = java_lang_Class::as_Klass(javaMirror);
   bool isLongArray = klass == Universe::longArrayKlassObj();
 
-  objArrayOop values = VirtualObject::values(value);
-  objArrayOop slotKinds = VirtualObject::slotKinds(value);
+  objArrayHandle values = VirtualObject::values(value);
+  objArrayHandle slotKinds = VirtualObject::slotKinds(value);
   for (jint i = 0; i < values->length(); i++) {
     ScopeValue* cur_second = NULL;
-    oop object = values->obj_at(i);
-    oop kind = slotKinds->obj_at(i);
-    BasicType type = JVMCIRuntime::kindToBasicType(JavaKind::typeChar(kind));
-    ScopeValue* value = get_scope_value(object, type, objects, cur_second);
+    Handle object = values->obj_at(i);
+    BasicType type = JVMCIRuntime::kindToBasicType(slotKinds->obj_at(i), CHECK);
+    ScopeValue* value = get_scope_value(object, type, objects, cur_second, CHECK);
 
     if (isLongArray && cur_second == NULL) {
       // we're trying to put ints into a long array... this isn't really valid, but it's used for some optimizations.
@@ -326,14 +383,19 @@
   }
 }
 
-MonitorValue* CodeInstaller::get_monitor_value(oop value, GrowableArray<ScopeValue*>* objects) {
-  guarantee(value->is_a(StackLockValue::klass()), "Monitors must be of type StackLockValue");
+MonitorValue* CodeInstaller::get_monitor_value(Handle value, GrowableArray<ScopeValue*>* objects, TRAPS) {
+  if (value.is_null()) {
+    THROW_NULL(vmSymbols::java_lang_NullPointerException());
+  }
+  if (!value->is_a(StackLockValue::klass())) {
+    JVMCI_ERROR_NULL("Monitors must be of type StackLockValue, got %s", value->klass()->signature_name());
+  }
 
   ScopeValue* second = NULL;
-  ScopeValue* owner_value = get_scope_value(StackLockValue::owner(value), T_OBJECT, objects, second);
+  ScopeValue* owner_value = get_scope_value(StackLockValue::owner(value), T_OBJECT, objects, second, CHECK_NULL);
   assert(second == NULL, "monitor cannot occupy two stack slots");
 
-  ScopeValue* lock_data_value = get_scope_value(StackLockValue::slot(value), T_LONG, objects, second);
+  ScopeValue* lock_data_value = get_scope_value(StackLockValue::slot(value), T_LONG, objects, second, CHECK_NULL);
   assert(second == lock_data_value, "monitor is LONG value that occupies two stack slots");
   assert(lock_data_value->is_location(), "invalid monitor location");
   Location lock_data_loc = ((LocationValue*)lock_data_value)->location();
@@ -346,7 +408,7 @@
   return new MonitorValue(owner_value, lock_data_loc, eliminated);
 }
 
-void CodeInstaller::initialize_dependencies(oop compiled_code, OopRecorder* recorder) {
+void CodeInstaller::initialize_dependencies(oop compiled_code, OopRecorder* recorder, TRAPS) {
   JavaThread* thread = JavaThread::current();
   CompilerThread* compilerThread = thread->is_Compiler_thread() ? thread->as_CompilerThread() : NULL;
   _oop_recorder = recorder;
@@ -368,8 +430,7 @@
         } else if (assumption->klass() == Assumptions_CallSiteTargetValue::klass()) {
           assumption_CallSiteTargetValue(assumption);
         } else {
-          assumption->print();
-          fatal("unexpected Assumption subclass");
+          JVMCI_ERROR("unexpected Assumption subclass %s", assumption->klass()->signature_name());
         }
       }
     }
@@ -414,18 +475,19 @@
   _size = bytes;
 }
 
-JVMCIEnv::CodeInstallResult CodeInstaller::gather_metadata(Handle target, Handle& compiled_code, CodeMetadata& metadata) {
+JVMCIEnv::CodeInstallResult CodeInstaller::gather_metadata(Handle target, Handle compiled_code, CodeMetadata& metadata, TRAPS) {
   CodeBuffer buffer("JVMCI Compiler CodeBuffer for Metadata");
   jobject compiled_code_obj = JNIHandles::make_local(compiled_code());
-  initialize_dependencies(JNIHandles::resolve(compiled_code_obj), NULL);
+  initialize_dependencies(JNIHandles::resolve(compiled_code_obj), NULL, CHECK_OK);
 
   // Get instructions and constants CodeSections early because we need it.
   _instructions = buffer.insts();
   _constants = buffer.consts();
 
-  initialize_fields(target(), JNIHandles::resolve(compiled_code_obj));
-  if (!initialize_buffer(buffer)) {
-    return JVMCIEnv::code_too_large;
+  initialize_fields(target(), JNIHandles::resolve(compiled_code_obj), CHECK_OK);
+  JVMCIEnv::CodeInstallResult result = initialize_buffer(buffer, CHECK_OK);
+  if (result != JVMCIEnv::ok) {
+    return result;
   }
   process_exception_handlers();
 
@@ -446,18 +508,18 @@
 }
 
 // constructor used to create a method
-JVMCIEnv::CodeInstallResult CodeInstaller::install(JVMCICompiler* compiler, Handle target, Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log) {
+JVMCIEnv::CodeInstallResult CodeInstaller::install(JVMCICompiler* compiler, Handle target, Handle compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log, TRAPS) {
   CodeBuffer buffer("JVMCI Compiler CodeBuffer");
   jobject compiled_code_obj = JNIHandles::make_local(compiled_code());
   OopRecorder* recorder = new OopRecorder(&_arena, true);
-  initialize_dependencies(JNIHandles::resolve(compiled_code_obj), recorder);
+  initialize_dependencies(JNIHandles::resolve(compiled_code_obj), recorder, CHECK_OK);
 
   // Get instructions and constants CodeSections early because we need it.
   _instructions = buffer.insts();
   _constants = buffer.consts();
 
-  initialize_fields(target(), JNIHandles::resolve(compiled_code_obj));
-  JVMCIEnv::CodeInstallResult result = initialize_buffer(buffer);
+  initialize_fields(target(), JNIHandles::resolve(compiled_code_obj), CHECK_OK);
+  JVMCIEnv::CodeInstallResult result = initialize_buffer(buffer, CHECK_OK);
   if (result != JVMCIEnv::ok) {
     return result;
   }
@@ -500,7 +562,7 @@
   return result;
 }
 
-void CodeInstaller::initialize_fields(oop target, oop compiled_code) {
+void CodeInstaller::initialize_fields(oop target, oop compiled_code, TRAPS) {
   if (compiled_code->is_a(HotSpotCompiledNmethod::klass())) {
     Handle hotspotJavaMethod = HotSpotCompiledNmethod::method(compiled_code);
     methodHandle method = getMethodFromHotSpotMethod(hotspotJavaMethod());
@@ -521,7 +583,9 @@
 
   // Pre-calculate the constants section size.  This is required for PC-relative addressing.
   _data_section_handle = JNIHandles::make_local(HotSpotCompiledCode::dataSection(compiled_code));
-  guarantee(HotSpotCompiledCode::dataSectionAlignment(compiled_code) <= _constants->alignment(), "Alignment inside constants section is restricted by alignment of section begin");
+  if ((_constants->alignment() % HotSpotCompiledCode::dataSectionAlignment(compiled_code)) != 0) {
+    JVMCI_ERROR("invalid data section alignment: %d", HotSpotCompiledCode::dataSectionAlignment(compiled_code));
+  }
   _constants_size = data_section()->length();
 
   _data_section_patches_handle = JNIHandles::make_local(HotSpotCompiledCode::dataSectionPatches(compiled_code));
@@ -538,16 +602,18 @@
   _word_kind_handle = JNIHandles::make_local(Architecture::wordKind(arch));
 }
 
-int CodeInstaller::estimate_stubs_size() {
+int CodeInstaller::estimate_stubs_size(TRAPS) {
   // Estimate the number of static call stubs that might be emitted.
   int static_call_stubs = 0;
   objArrayOop sites = this->sites();
   for (int i = 0; i < sites->length(); i++) {
     oop site = sites->obj_at(i);
-    if (site->is_a(CompilationResult_Mark::klass())) {
+    if (site != NULL && site->is_a(CompilationResult_Mark::klass())) {
       oop id_obj = CompilationResult_Mark::id(site);
       if (id_obj != NULL) {
-        assert(java_lang_boxing_object::is_instance(id_obj, T_INT), "Integer id expected");
+        if (!java_lang_boxing_object::is_instance(id_obj, T_INT)) {
+          JVMCI_ERROR_0("expected Integer id, got %s", id_obj->klass()->signature_name());
+        }
         jint id = id_obj->int_field(java_lang_boxing_object::value_offset_in_bytes(T_INT));
         if (id == INVOKESTATIC || id == INVOKESPECIAL) {
           static_call_stubs++;
@@ -559,7 +625,7 @@
 }
 
 // perform data and call relocation on the CodeBuffer
-JVMCIEnv::CodeInstallResult CodeInstaller::initialize_buffer(CodeBuffer& buffer) {
+JVMCIEnv::CodeInstallResult CodeInstaller::initialize_buffer(CodeBuffer& buffer, TRAPS) {
   HandleMark hm;
   objArrayHandle sites = this->sites();
   int locs_buffer_size = sites->length() * (relocInfo::length_limit + sizeof(relocInfo));
@@ -568,7 +634,7 @@
   // stubs.  Stubs have extra relocs but they are managed by the stub
   // section itself so they don't need to be accounted for in the
   // locs_buffer above.
-  int stubs_size = estimate_stubs_size();
+  int stubs_size = estimate_stubs_size(CHECK_OK);
   int total_size = round_to(_code_size, buffer.insts()->alignment()) + round_to(_constants_size, buffer.consts()->alignment()) + round_to(stubs_size, buffer.stubs()->alignment());
 
   if (total_size > JVMCINMethodSizeLimit) {
@@ -600,19 +666,30 @@
 
   for (int i = 0; i < data_section_patches()->length(); i++) {
     Handle patch = data_section_patches()->obj_at(i);
+    if (patch.is_null()) {
+      THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok);
+    }
     Handle reference = CompilationResult_DataPatch::reference(patch);
-    assert(reference->is_a(CompilationResult_ConstantReference::klass()), "patch in data section must be a ConstantReference");
+    if (reference.is_null()) {
+      THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok);
+    }
+    if (!reference->is_a(CompilationResult_ConstantReference::klass())) {
+      JVMCI_ERROR_OK("invalid patch in data section: %s", reference->klass()->signature_name());
+    }
     Handle constant = CompilationResult_ConstantReference::constant(reference);
+    if (constant.is_null()) {
+      THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok);
+    }
     address dest = _constants->start() + CompilationResult_Site::pcOffset(patch);
     if (constant->is_a(HotSpotMetaspaceConstantImpl::klass())) {
       if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
 #ifdef _LP64
-        *((narrowKlass*) dest) = record_narrow_metadata_reference(constant);
+        *((narrowKlass*) dest) = record_narrow_metadata_reference(constant, CHECK_OK);
 #else
-        fatal("unexpected compressed Klass* in 32-bit mode");
+        JVMCI_ERROR_OK("unexpected compressed Klass* in 32-bit mode");
 #endif
       } else {
-        *((Metadata**) dest) = record_metadata_reference(constant);
+        *((Metadata**) dest) = record_metadata_reference(constant, CHECK_OK);
       }
     } else if (constant->is_a(HotSpotObjectConstantImpl::klass())) {
       Handle obj = HotSpotObjectConstantImpl::object(constant);
@@ -623,48 +700,49 @@
 #ifdef _LP64
         _constants->relocate(dest, oop_Relocation::spec(oop_index), relocInfo::narrow_oop_in_const);
 #else
-        fatal("unexpected compressed oop in 32-bit mode");
+        JVMCI_ERROR_OK("unexpected compressed oop in 32-bit mode");
 #endif
       } else {
         _constants->relocate(dest, oop_Relocation::spec(oop_index));
       }
     } else {
-      ShouldNotReachHere();
+      JVMCI_ERROR_OK("invalid constant in data section: %s", constant->klass()->signature_name());
     }
   }
   jint last_pc_offset = -1;
   for (int i = 0; i < sites->length(); i++) {
-    {
-        No_Safepoint_Verifier no_safepoint;
-        oop site = sites->obj_at(i);
-        jint pc_offset = CompilationResult_Site::pcOffset(site);
+    Handle site = sites->obj_at(i);
+    if (site.is_null()) {
+      THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok);
+    }
+
+    jint pc_offset = CompilationResult_Site::pcOffset(site);
 
-        if (site->is_a(CompilationResult_Call::klass())) {
-          TRACE_jvmci_4("call at %i", pc_offset);
-          site_Call(buffer, pc_offset, site);
-        } else if (site->is_a(CompilationResult_Infopoint::klass())) {
-          // three reasons for infopoints denote actual safepoints
-          oop reason = CompilationResult_Infopoint::reason(site);
-          if (InfopointReason::SAFEPOINT() == reason || InfopointReason::CALL() == reason || InfopointReason::IMPLICIT_EXCEPTION() == reason) {
-            TRACE_jvmci_4("safepoint at %i", pc_offset);
-            site_Safepoint(buffer, pc_offset, site);
-          } else {
-            // if the infopoint is not an actual safepoint, it must have one of the other reasons
-            // (safeguard against new safepoint types that require handling above)
-            assert(InfopointReason::METHOD_START() == reason || InfopointReason::METHOD_END() == reason || InfopointReason::LINE_NUMBER() == reason, "");
-            site_Infopoint(buffer, pc_offset, site);
-          }
-        } else if (site->is_a(CompilationResult_DataPatch::klass())) {
-          TRACE_jvmci_4("datapatch at %i", pc_offset);
-          site_DataPatch(buffer, pc_offset, site);
-        } else if (site->is_a(CompilationResult_Mark::klass())) {
-          TRACE_jvmci_4("mark at %i", pc_offset);
-          site_Mark(buffer, pc_offset, site);
-        } else {
-          fatal("unexpected Site subclass");
-        }
-        last_pc_offset = pc_offset;
+    if (site->is_a(CompilationResult_Call::klass())) {
+      TRACE_jvmci_4("call at %i", pc_offset);
+      site_Call(buffer, pc_offset, site, CHECK_OK);
+    } else if (site->is_a(CompilationResult_Infopoint::klass())) {
+      // three reasons for infopoints denote actual safepoints
+      oop reason = CompilationResult_Infopoint::reason(site);
+      if (InfopointReason::SAFEPOINT() == reason || InfopointReason::CALL() == reason || InfopointReason::IMPLICIT_EXCEPTION() == reason) {
+        TRACE_jvmci_4("safepoint at %i", pc_offset);
+        site_Safepoint(buffer, pc_offset, site, CHECK_OK);
+      } else if (InfopointReason::METHOD_START() == reason || InfopointReason::METHOD_END() == reason || InfopointReason::LINE_NUMBER() == reason) {
+        site_Infopoint(buffer, pc_offset, site, CHECK_OK);
+      } else {
+        JVMCI_ERROR_OK("unknown infopoint reason at %i", pc_offset);
+      }
+    } else if (site->is_a(CompilationResult_DataPatch::klass())) {
+      TRACE_jvmci_4("datapatch at %i", pc_offset);
+      site_DataPatch(buffer, pc_offset, site, CHECK_OK);
+    } else if (site->is_a(CompilationResult_Mark::klass())) {
+      TRACE_jvmci_4("mark at %i", pc_offset);
+      site_Mark(buffer, pc_offset, site, CHECK_OK);
+    } else {
+      JVMCI_ERROR_OK("unexpected site subclass: %s", site->klass()->signature_name());
     }
+    last_pc_offset = pc_offset;
+
     if (CodeInstallSafepointChecks && SafepointSynchronize::do_call_back()) {
       // this is a hacky way to force a safepoint check but nothing else was jumping out at me.
       ThreadToNativeFromVM ttnfv(JavaThread::current());
@@ -673,7 +751,6 @@
 
 #ifndef PRODUCT
   if (comments() != NULL) {
-    No_Safepoint_Verifier no_safepoint;
     for (int i = 0; i < comments()->length(); i++) {
       oop comment = comments()->obj_at(i);
       assert(comment->is_a(HotSpotCompiledCode_Comment::klass()), "cce");
@@ -759,56 +836,61 @@
   return true;
 }
 
-GrowableArray<ScopeValue*>* CodeInstaller::record_virtual_objects(oop debug_info) {
-  objArrayOop virtualObjects = DebugInfo::virtualObjectMapping(debug_info);
-  if (virtualObjects == NULL) {
+GrowableArray<ScopeValue*>* CodeInstaller::record_virtual_objects(Handle debug_info, TRAPS) {
+  objArrayHandle virtualObjects = DebugInfo::virtualObjectMapping(debug_info);
+  if (virtualObjects.is_null()) {
     return NULL;
   }
   GrowableArray<ScopeValue*>* objects = new GrowableArray<ScopeValue*>(virtualObjects->length(), virtualObjects->length(), NULL);
   // Create the unique ObjectValues
   for (int i = 0; i < virtualObjects->length(); i++) {
-    oop value = virtualObjects->obj_at(i);
+    Handle value = virtualObjects->obj_at(i);
     int id = VirtualObject::id(value);
-    oop type = VirtualObject::type(value);
+    Handle type = VirtualObject::type(value);
     oop javaMirror = HotSpotResolvedObjectTypeImpl::javaClass(type);
     ObjectValue* sv = new ObjectValue(id, new ConstantOopWriteValue(JNIHandles::make_local(Thread::current(), javaMirror)));
-    assert(objects->at(id) == NULL, "once");
+    if (id < 0 || id >= objects->length()) {
+      JVMCI_ERROR_NULL("virtual object id %d out of bounds", id);
+    }
+    if (objects->at(id) != NULL) {
+      JVMCI_ERROR_NULL("duplicate virtual object id %d", id);
+    }
     objects->at_put(id, sv);
   }
   // All the values which could be referenced by the VirtualObjects
   // exist, so now describe all the VirtualObjects themselves.
   for (int i = 0; i < virtualObjects->length(); i++) {
-    oop value = virtualObjects->obj_at(i);
+    Handle value = virtualObjects->obj_at(i);
     int id = VirtualObject::id(value);
-    record_object_value(objects->at(id)->as_ObjectValue(), value, objects);
+    record_object_value(objects->at(id)->as_ObjectValue(), value, objects, CHECK_NULL);
   }
   _debug_recorder->dump_object_pool(objects);
   return objects;
 }
 
-void CodeInstaller::record_scope(jint pc_offset, oop debug_info) {
-  oop position = DebugInfo::bytecodePosition(debug_info);
-  if (position == NULL) {
+void CodeInstaller::record_scope(jint pc_offset, Handle debug_info, TRAPS) {
+  Handle position = DebugInfo::bytecodePosition(debug_info);
+  if (position.is_null()) {
     // Stubs do not record scope info, just oop maps
     return;
   }
 
-  GrowableArray<ScopeValue*>* objectMapping = record_virtual_objects(debug_info);
-  record_scope(pc_offset, position, objectMapping);
+  GrowableArray<ScopeValue*>* objectMapping = record_virtual_objects(debug_info, CHECK);
+  record_scope(pc_offset, position, objectMapping, CHECK);
 }
 
-void CodeInstaller::record_scope(jint pc_offset, oop position, GrowableArray<ScopeValue*>* objects) {
-  oop frame = NULL;
+void CodeInstaller::record_scope(jint pc_offset, Handle position, GrowableArray<ScopeValue*>* objects, TRAPS) {
+  Handle frame;
   if (position->is_a(BytecodeFrame::klass())) {
     frame = position;
   }
-  oop caller_frame = BytecodePosition::caller(position);
-  if (caller_frame != NULL) {
-    record_scope(pc_offset, caller_frame, objects);
+  Handle caller_frame = BytecodePosition::caller(position);
+  if (caller_frame.not_null()) {
+    record_scope(pc_offset, caller_frame, objects, CHECK);
   }
 
-  oop hotspot_method = BytecodePosition::method(position);
-  Method* method = getMethodFromHotSpotMethod(hotspot_method);
+  Handle hotspot_method = BytecodePosition::method(position);
+  Method* method = getMethodFromHotSpotMethod(hotspot_method());
   jint bci = BytecodePosition::bci(position);
   if (bci == BytecodeFrame::BEFORE_BCI()) {
     bci = SynchronizationEntryBCI;
@@ -817,13 +899,13 @@
   TRACE_jvmci_2("Recording scope pc_offset=%d bci=%d method=%s", pc_offset, bci, method->name_and_sig_as_C_string());
 
   bool reexecute = false;
-  if (frame != NULL) {
+  if (frame.not_null()) {
     if (bci == SynchronizationEntryBCI){
        reexecute = false;
     } else {
       Bytecodes::Code code = Bytecodes::java_code_at(method, method->bcp_from(bci));
       reexecute = bytecode_should_reexecute(code);
-      if (frame != NULL) {
+      if (frame.not_null()) {
         reexecute = (BytecodeFrame::duringCall(frame) == JNI_FALSE);
       }
     }
@@ -834,15 +916,22 @@
   DebugToken* monitors_token = NULL;
   bool throw_exception = false;
 
-  if (frame != NULL) {
+  if (frame.not_null()) {
     jint local_count = BytecodeFrame::numLocals(frame);
     jint expression_count = BytecodeFrame::numStack(frame);
     jint monitor_count = BytecodeFrame::numLocks(frame);
-    objArrayOop values = BytecodeFrame::values(frame);
-    objArrayOop slotKinds = BytecodeFrame::slotKinds(frame);
+    objArrayHandle values = BytecodeFrame::values(frame);
+    objArrayHandle slotKinds = BytecodeFrame::slotKinds(frame);
 
-    assert(local_count + expression_count + monitor_count == values->length(), "unexpected values length");
-    assert(local_count + expression_count == slotKinds->length(), "unexpected slotKinds length");
+    if (values.is_null() || slotKinds.is_null()) {
+      THROW(vmSymbols::java_lang_NullPointerException());
+    }
+    if (local_count + expression_count + monitor_count != values->length()) {
+      JVMCI_ERROR("unexpected values length %d in scope (%d locals, %d expressions, %d monitors)", values->length(), local_count, expression_count, monitor_count);
+    }
+    if (local_count + expression_count != slotKinds->length()) {
+      JVMCI_ERROR("unexpected slotKinds length %d in scope (%d locals, %d expressions)", slotKinds->length(), local_count, expression_count);
+    }
 
     GrowableArray<ScopeValue*>* locals = local_count > 0 ? new GrowableArray<ScopeValue*> (local_count) : NULL;
     GrowableArray<ScopeValue*>* expressions = expression_count > 0 ? new GrowableArray<ScopeValue*> (expression_count) : NULL;
@@ -853,30 +942,30 @@
 
     for (jint i = 0; i < values->length(); i++) {
       ScopeValue* second = NULL;
-      oop value = values->obj_at(i);
+      Handle value = values->obj_at(i);
       if (i < local_count) {
-        oop kind = slotKinds->obj_at(i);
-        BasicType type = JVMCIRuntime::kindToBasicType(JavaKind::typeChar(kind));
-        ScopeValue* first = get_scope_value(value, type, objects, second);
+        BasicType type = JVMCIRuntime::kindToBasicType(slotKinds->obj_at(i), CHECK);
+        ScopeValue* first = get_scope_value(value, type, objects, second, CHECK);
         if (second != NULL) {
           locals->append(second);
         }
         locals->append(first);
       } else if (i < local_count + expression_count) {
-        oop kind = slotKinds->obj_at(i);
-        BasicType type = JVMCIRuntime::kindToBasicType(JavaKind::typeChar(kind));
-        ScopeValue* first = get_scope_value(value, type, objects, second);
+        BasicType type = JVMCIRuntime::kindToBasicType(slotKinds->obj_at(i), CHECK);
+        ScopeValue* first = get_scope_value(value, type, objects, second, CHECK);
         if (second != NULL) {
           expressions->append(second);
         }
         expressions->append(first);
       } else {
-        monitors->append(get_monitor_value(value, objects));
+        MonitorValue *monitor = get_monitor_value(value, objects, CHECK);
+        monitors->append(monitor);
       }
       if (second != NULL) {
         i++;
-        assert(i < values->length(), "double-slot value not followed by Value.ILLEGAL");
-        assert(values->obj_at(i) == Value::ILLEGAL(), "double-slot value not followed by Value.ILLEGAL");
+        if (i >= values->length() || values->obj_at(i) != Value::ILLEGAL()) {
+          JVMCI_ERROR("double-slot value not followed by Value.ILLEGAL");
+        }
       }
     }
 
@@ -891,32 +980,37 @@
                                   locals_token, expressions_token, monitors_token);
 }
 
-void CodeInstaller::site_Safepoint(CodeBuffer& buffer, jint pc_offset, oop site) {
-  oop debug_info = CompilationResult_Infopoint::debugInfo(site);
-  assert(debug_info != NULL, "debug info expected");
+void CodeInstaller::site_Safepoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) {
+  Handle debug_info = CompilationResult_Infopoint::debugInfo(site);
+  if (debug_info.is_null()) {
+    JVMCI_ERROR("debug info expected at safepoint at %i", pc_offset);
+  }
 
   // address instruction = _instructions->start() + pc_offset;
   // jint next_pc_offset = Assembler::locate_next_instruction(instruction) - _instructions->start();
-  _debug_recorder->add_safepoint(pc_offset, create_oop_map(debug_info));
-  record_scope(pc_offset, debug_info);
+  OopMap *map = create_oop_map(debug_info, CHECK);
+  _debug_recorder->add_safepoint(pc_offset, map);
+  record_scope(pc_offset, debug_info, CHECK);
   _debug_recorder->end_safepoint(pc_offset);
 }
 
-void CodeInstaller::site_Infopoint(CodeBuffer& buffer, jint pc_offset, oop site) {
-  oop debug_info = CompilationResult_Infopoint::debugInfo(site);
-  assert(debug_info != NULL, "debug info expected");
+void CodeInstaller::site_Infopoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) {
+  Handle debug_info = CompilationResult_Infopoint::debugInfo(site);
+  if (debug_info.is_null()) {
+    JVMCI_ERROR("debug info expected at infopoint at %i", pc_offset);
+  }
 
   _debug_recorder->add_non_safepoint(pc_offset);
-  record_scope(pc_offset, debug_info);
+  record_scope(pc_offset, debug_info, CHECK);
   _debug_recorder->end_non_safepoint(pc_offset);
 }
 
-void CodeInstaller::site_Call(CodeBuffer& buffer, jint pc_offset, oop site) {
-  oop target = CompilationResult_Call::target(site);
+void CodeInstaller::site_Call(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) {
+  Handle target = CompilationResult_Call::target(site);
   InstanceKlass* target_klass = InstanceKlass::cast(target->klass());
 
-  oop hotspot_method = NULL; // JavaMethod
-  oop foreign_call = NULL;
+  Handle hotspot_method; // JavaMethod
+  Handle foreign_call;
 
   if (target_klass->is_subclass_of(SystemDictionary::HotSpotForeignCallTarget_klass())) {
     foreign_call = target;
@@ -924,27 +1018,29 @@
     hotspot_method = target;
   }
 
-  oop debug_info = CompilationResult_Call::debugInfo(site);
+  Handle debug_info = CompilationResult_Call::debugInfo(site);
 
-  assert(!!hotspot_method ^ !!foreign_call, "Call site needs exactly one type");
+  assert(hotspot_method.not_null() ^ foreign_call.not_null(), "Call site needs exactly one type");
 
   NativeInstruction* inst = nativeInstruction_at(_instructions->start() + pc_offset);
-  jint next_pc_offset = CodeInstaller::pd_next_offset(inst, pc_offset, hotspot_method);
+  jint next_pc_offset = CodeInstaller::pd_next_offset(inst, pc_offset, hotspot_method, CHECK);
 
-  if (debug_info != NULL) {
-    _debug_recorder->add_safepoint(next_pc_offset, create_oop_map(debug_info));
-    record_scope(next_pc_offset, debug_info);
+  if (debug_info.not_null()) {
+    OopMap *map = create_oop_map(debug_info, CHECK);
+    _debug_recorder->add_safepoint(next_pc_offset, map);
+    record_scope(next_pc_offset, debug_info, CHECK);
   }
 
-  if (foreign_call != NULL) {
+  if (foreign_call.not_null()) {
     jlong foreign_call_destination = HotSpotForeignCallTarget::address(foreign_call);
-    CodeInstaller::pd_relocate_ForeignCall(inst, foreign_call_destination);
+    CodeInstaller::pd_relocate_ForeignCall(inst, foreign_call_destination, CHECK);
   } else { // method != NULL
-    assert(hotspot_method != NULL, "unexpected JavaMethod");
-    assert(debug_info != NULL, "debug info expected");
+    if (debug_info.is_null()) {
+      JVMCI_ERROR("debug info expected at call at %i", pc_offset);
+    }
 
     TRACE_jvmci_3("method call");
-    CodeInstaller::pd_relocate_JavaMethod(hotspot_method, pc_offset);
+    CodeInstaller::pd_relocate_JavaMethod(hotspot_method, pc_offset, CHECK);
     if (_next_call_type == INVOKESTATIC || _next_call_type == INVOKESPECIAL) {
       // Need a static call stub for transitions from compiled to interpreted.
       CompiledStaticCall::emit_to_interp_stub(buffer, _instructions->start() + pc_offset);
@@ -953,38 +1049,45 @@
 
   _next_call_type = INVOKE_INVALID;
 
-  if (debug_info != NULL) {
+  if (debug_info.not_null()) {
     _debug_recorder->end_safepoint(next_pc_offset);
   }
 }
 
-void CodeInstaller::site_DataPatch(CodeBuffer& buffer, jint pc_offset, oop site) {
-  oop reference = CompilationResult_DataPatch::reference(site);
-  if (reference->is_a(CompilationResult_ConstantReference::klass())) {
+void CodeInstaller::site_DataPatch(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) {
+  Handle reference = CompilationResult_DataPatch::reference(site);
+  if (reference.is_null()) {
+    THROW(vmSymbols::java_lang_NullPointerException());
+  } else if (reference->is_a(CompilationResult_ConstantReference::klass())) {
     Handle constant = CompilationResult_ConstantReference::constant(reference);
-    if (constant->is_a(HotSpotObjectConstantImpl::klass())) {
-      pd_patch_OopConstant(pc_offset, constant);
+    if (constant.is_null()) {
+      THROW(vmSymbols::java_lang_NullPointerException());
+    } else if (constant->is_a(HotSpotObjectConstantImpl::klass())) {
+      pd_patch_OopConstant(pc_offset, constant, CHECK);
     } else if (constant->is_a(HotSpotMetaspaceConstantImpl::klass())) {
-      pd_patch_MetaspaceConstant(pc_offset, constant);
-    } else if (constant->is_a(HotSpotSentinelConstant::klass())) {
-      fatal("sentinel constant unsupported");
+      pd_patch_MetaspaceConstant(pc_offset, constant, CHECK);
     } else {
-      fatal("unknown constant type in data patch");
+      JVMCI_ERROR("unknown constant type in data patch: %s", constant->klass()->signature_name());
     }
   } else if (reference->is_a(CompilationResult_DataSectionReference::klass())) {
     int data_offset = CompilationResult_DataSectionReference::offset(reference);
-    assert(0 <= data_offset && data_offset < _constants_size, "data offset 0x%X points outside data section (size 0x%X)", data_offset, _constants_size);
-    pd_patch_DataSectionReference(pc_offset, data_offset);
+    if (0 <= data_offset && data_offset < _constants_size) {
+      pd_patch_DataSectionReference(pc_offset, data_offset);
+    } else {
+      JVMCI_ERROR("data offset 0x%X points outside data section (size 0x%X)", data_offset, _constants_size);
+    }
   } else {
-    fatal("unknown data patch type");
+    JVMCI_ERROR("unknown data patch type: %s", reference->klass()->signature_name());
   }
 }
 
-void CodeInstaller::site_Mark(CodeBuffer& buffer, jint pc_offset, oop site) {
-  oop id_obj = CompilationResult_Mark::id(site);
+void CodeInstaller::site_Mark(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) {
+  Handle id_obj = CompilationResult_Mark::id(site);
 
-  if (id_obj != NULL) {
-    assert(java_lang_boxing_object::is_instance(id_obj, T_INT), "Integer id expected");
+  if (id_obj.not_null()) {
+    if (!java_lang_boxing_object::is_instance(id_obj(), T_INT)) {
+      JVMCI_ERROR("expected Integer id, got %s", id_obj->klass()->signature_name());
+    }
     jint id = id_obj->int_field(java_lang_boxing_object::value_offset_in_bytes(T_INT));
 
     address pc = _instructions->start() + pc_offset;
@@ -1017,7 +1120,7 @@
       case POLL_FAR:
       case POLL_RETURN_NEAR:
       case POLL_RETURN_FAR:
-        pd_relocate_poll(pc, id);
+        pd_relocate_poll(pc, id, CHECK);
         break;
       case CARD_TABLE_SHIFT:
       case CARD_TABLE_ADDRESS:
@@ -1027,7 +1130,7 @@
       case CRC_TABLE_ADDRESS:
         break;
       default:
-        ShouldNotReachHere();
+        JVMCI_ERROR("invalid mark id: %d", id);
         break;
     }
   }

--- a/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -154,13 +154,13 @@
   static ConstantIntValue*    _int_2_scope_value;
   static LocationValue*       _illegal_value;
 
-  jint pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method);
-  void pd_patch_OopConstant(int pc_offset, Handle& constant);
-  void pd_patch_MetaspaceConstant(int pc_offset, Handle& constant);
+  jint pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS);
+  void pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS);
+  void pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS);
   void pd_patch_DataSectionReference(int pc_offset, int data_offset);
-  void pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination);
-  void pd_relocate_JavaMethod(oop method, jint pc_offset);
-  void pd_relocate_poll(address pc, jint mark);
+  void pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS);
+  void pd_relocate_JavaMethod(Handle method, jint pc_offset, TRAPS);
+  void pd_relocate_poll(address pc, jint mark, TRAPS);
 
   objArrayOop sites() { return (objArrayOop) JNIHandles::resolve(_sites_handle); }
   arrayOop code() { return (arrayOop) JNIHandles::resolve(_code_handle); }
@@ -177,33 +177,33 @@
 
   CodeInstaller() : _arena(mtCompiler) {}
 
-  JVMCIEnv::CodeInstallResult gather_metadata(Handle target, Handle& compiled_code, CodeMetadata& metadata);
-  JVMCIEnv::CodeInstallResult install(JVMCICompiler* compiler, Handle target, Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log);
+  JVMCIEnv::CodeInstallResult gather_metadata(Handle target, Handle compiled_code, CodeMetadata& metadata, TRAPS);
+  JVMCIEnv::CodeInstallResult install(JVMCICompiler* compiler, Handle target, Handle compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log, TRAPS);
 
   static address runtime_call_target_address(oop runtime_call);
-  static VMReg get_hotspot_reg(jint jvmciRegisterNumber);
+  static VMReg get_hotspot_reg(jint jvmciRegisterNumber, TRAPS);
   static bool is_general_purpose_reg(VMReg hotspotRegister);
 
   const OopMapSet* oopMapSet() const { return _debug_recorder->_oopmaps; }
 
 protected:
-  Location::Type get_oop_type(oop value);
-  ScopeValue* get_scope_value(oop value, BasicType type, GrowableArray<ScopeValue*>* objects, ScopeValue* &second);
-  MonitorValue* get_monitor_value(oop value, GrowableArray<ScopeValue*>* objects);
+  Location::Type get_oop_type(Handle value);
+  ScopeValue* get_scope_value(Handle value, BasicType type, GrowableArray<ScopeValue*>* objects, ScopeValue* &second, TRAPS);
+  MonitorValue* get_monitor_value(Handle value, GrowableArray<ScopeValue*>* objects, TRAPS);
 
-  Metadata* record_metadata_reference(Handle& constant);
+  Metadata* record_metadata_reference(Handle constant, TRAPS);
 #ifdef _LP64
-  narrowKlass record_narrow_metadata_reference(Handle& constant);
+  narrowKlass record_narrow_metadata_reference(Handle constant, TRAPS);
 #endif
 
   // extract the fields of the CompilationResult
-  void initialize_fields(oop target, oop target_method);
-  void initialize_dependencies(oop target_method, OopRecorder* oop_recorder);
+  void initialize_fields(oop target, oop target_method, TRAPS);
+  void initialize_dependencies(oop target_method, OopRecorder* oop_recorder, TRAPS);
 
-  int estimate_stubs_size();
+  int estimate_stubs_size(TRAPS);
 
   // perform data and call relocation on the CodeBuffer
-  JVMCIEnv::CodeInstallResult initialize_buffer(CodeBuffer& buffer);
+  JVMCIEnv::CodeInstallResult initialize_buffer(CodeBuffer& buffer, TRAPS);
 
   void assumption_NoFinalizableSubclass(Handle assumption);
   void assumption_ConcreteSubtype(Handle assumption);
@@ -211,19 +211,19 @@
   void assumption_ConcreteMethod(Handle assumption);
   void assumption_CallSiteTargetValue(Handle assumption);
 
-  void site_Safepoint(CodeBuffer& buffer, jint pc_offset, oop site);
-  void site_Infopoint(CodeBuffer& buffer, jint pc_offset, oop site);
-  void site_Call(CodeBuffer& buffer, jint pc_offset, oop site);
-  void site_DataPatch(CodeBuffer& buffer, jint pc_offset, oop site);
-  void site_Mark(CodeBuffer& buffer, jint pc_offset, oop site);
+  void site_Safepoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS);
+  void site_Infopoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS);
+  void site_Call(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS);
+  void site_DataPatch(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS);
+  void site_Mark(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS);
 
-  OopMap* create_oop_map(oop debug_info);
+  OopMap* create_oop_map(Handle debug_info, TRAPS);
 
-  void record_scope(jint pc_offset, oop debug_info);
-  void record_scope(jint pc_offset, oop code_pos, GrowableArray<ScopeValue*>* objects);
-  void record_object_value(ObjectValue* sv, oop value, GrowableArray<ScopeValue*>* objects);
+  void record_scope(jint pc_offset, Handle debug_info, TRAPS);
+  void record_scope(jint pc_offset, Handle code_pos, GrowableArray<ScopeValue*>* objects, TRAPS);
+  void record_object_value(ObjectValue* sv, Handle value, GrowableArray<ScopeValue*>* objects, TRAPS);
 
-  GrowableArray<ScopeValue*>* record_virtual_objects(oop debug_info);
+  GrowableArray<ScopeValue*>* record_virtual_objects(Handle debug_info, TRAPS);
 
   void process_exception_handlers();
   int estimateStubSpace(int static_call_stubs);

--- a/hotspot/src/share/vm/jvmci/jvmciCompiler.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciCompiler.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -112,7 +112,7 @@
   _bootstrapping = false;
 }
 
-void JVMCICompiler::compile_method(methodHandle method, int entry_bci, JVMCIEnv* env) {
+void JVMCICompiler::compile_method(const methodHandle& method, int entry_bci, JVMCIEnv* env) {
   JVMCI_EXCEPTION_CONTEXT
 
   bool is_osr = entry_bci != InvocationEntryBci;

--- a/hotspot/src/share/vm/jvmci/jvmciCompiler.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciCompiler.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -71,7 +71,7 @@
   // Compilation entry point for methods
   virtual void compile_method(ciEnv* env, ciMethod* target, int entry_bci, DirectiveSet* directive);
 
-  void compile_method(methodHandle target, int entry_bci, JVMCIEnv* env);
+  void compile_method(const methodHandle& target, int entry_bci, JVMCIEnv* env);
 
   virtual bool is_trivial(Method* method);

--- a/hotspot/src/share/vm/jvmci/jvmciCompilerToVM.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciCompilerToVM.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -670,7 +670,7 @@
 
   TraceTime install_time("installCode", JVMCICompiler::codeInstallTimer());
   CodeInstaller installer;
-  JVMCIEnv::CodeInstallResult result = installer.install(compiler, target_handle, compiled_code_handle, cb, installed_code_handle, speculation_log_handle);
+  JVMCIEnv::CodeInstallResult result = installer.install(compiler, target_handle, compiled_code_handle, cb, installed_code_handle, speculation_log_handle, CHECK_0);
 
   if (PrintCodeCacheOnCompilation) {
     stringStream s;
@@ -690,6 +690,7 @@
       assert(installed_code_handle->is_a(InstalledCode::klass()), "wrong type");
       CompilerToVM::invalidate_installed_code(installed_code_handle, CHECK_0);
       InstalledCode::set_address(installed_code_handle, (jlong) cb);
+      InstalledCode::set_version(installed_code_handle, InstalledCode::version(installed_code_handle) + 1);
       if (cb->is_nmethod()) {
         InstalledCode::set_entryPoint(installed_code_handle, (jlong) cb->as_nmethod_or_null()->verified_entry_point());
       } else {
@@ -726,7 +727,7 @@
   CodeBlob *cb = NULL;
   CodeInstaller installer;
 
-  JVMCIEnv::CodeInstallResult result = installer.gather_metadata(target_handle, compiled_code_handle, code_metadata); //cb, pc_descs, nr_pc_descs, scopes_descs, scopes_size, reloc_buffer);
+  JVMCIEnv::CodeInstallResult result = installer.gather_metadata(target_handle, compiled_code_handle, code_metadata, CHECK_0); //cb, pc_descs, nr_pc_descs, scopes_descs, scopes_size, reloc_buffer);
   if (result != JVMCIEnv::ok) {
     return result;
   }

--- a/hotspot/src/share/vm/jvmci/jvmciEnv.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciEnv.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -161,7 +161,7 @@
 }
 
 // ------------------------------------------------------------------
-KlassHandle JVMCIEnv::get_klass_by_name(KlassHandle& accessing_klass,
+KlassHandle JVMCIEnv::get_klass_by_name(KlassHandle accessing_klass,
                                   Symbol* klass_name,
                                   bool require_local) {
   ResourceMark rm;
@@ -177,7 +177,7 @@
 KlassHandle JVMCIEnv::get_klass_by_index_impl(const constantPoolHandle& cpool,
                                         int index,
                                         bool& is_accessible,
-                                        KlassHandle& accessor) {
+                                        KlassHandle accessor) {
   JVMCI_EXCEPTION_CONTEXT;
   KlassHandle klass (THREAD, ConstantPool::klass_at_if_loaded(cpool, index));
   Symbol* klass_name = NULL;
@@ -218,7 +218,7 @@
 KlassHandle JVMCIEnv::get_klass_by_index(const constantPoolHandle& cpool,
                                    int index,
                                    bool& is_accessible,
-                                   KlassHandle& accessor) {
+                                   KlassHandle accessor) {
   ResourceMark rm;
   KlassHandle result = get_klass_by_index_impl(cpool, index, is_accessible, accessor);
   return result;
@@ -229,7 +229,7 @@
 //
 // Implementation note: the results of field lookups are cached
 // in the accessor klass.
-void JVMCIEnv::get_field_by_index_impl(instanceKlassHandle& klass, fieldDescriptor& field_desc,
+void JVMCIEnv::get_field_by_index_impl(instanceKlassHandle klass, fieldDescriptor& field_desc,
                                         int index) {
   JVMCI_EXCEPTION_CONTEXT;
 
@@ -270,7 +270,7 @@
 
 // ------------------------------------------------------------------
 // Get a field by index from a klass's constant pool.
-void JVMCIEnv::get_field_by_index(instanceKlassHandle& accessor, fieldDescriptor& fd, int index) {
+void JVMCIEnv::get_field_by_index(instanceKlassHandle accessor, fieldDescriptor& fd, int index) {
   ResourceMark rm;
   return get_field_by_index_impl(accessor, fd, index);
 }
@@ -278,8 +278,8 @@
 // ------------------------------------------------------------------
 // Perform an appropriate method lookup based on accessor, holder,
 // name, signature, and bytecode.
-methodHandle JVMCIEnv::lookup_method(instanceKlassHandle& h_accessor,
-                               instanceKlassHandle& h_holder,
+methodHandle JVMCIEnv::lookup_method(instanceKlassHandle h_accessor,
+                               instanceKlassHandle h_holder,
                                Symbol*       name,
                                Symbol*       sig,
                                Bytecodes::Code bc) {
@@ -314,7 +314,7 @@
 // ------------------------------------------------------------------
 methodHandle JVMCIEnv::get_method_by_index_impl(const constantPoolHandle& cpool,
                                           int index, Bytecodes::Code bc,
-                                          instanceKlassHandle& accessor) {
+                                          instanceKlassHandle accessor) {
   if (bc == Bytecodes::_invokedynamic) {
     ConstantPoolCacheEntry* cpce = cpool->invokedynamic_cp_cache_entry_at(index);
     bool is_resolved = !cpce->is_f1_null();
@@ -379,7 +379,7 @@
 }
 
 // ------------------------------------------------------------------
-instanceKlassHandle JVMCIEnv::get_instance_klass_for_declared_method_holder(KlassHandle& method_holder) {
+instanceKlassHandle JVMCIEnv::get_instance_klass_for_declared_method_holder(KlassHandle method_holder) {
   // For the case of <array>.clone(), the method holder can be an ArrayKlass*
   // instead of an InstanceKlass*.  For that case simply pretend that the
   // declared holder is Object.clone since that's where the call will bottom out.
@@ -397,7 +397,7 @@
 // ------------------------------------------------------------------
 methodHandle JVMCIEnv::get_method_by_index(const constantPoolHandle& cpool,
                                      int index, Bytecodes::Code bc,
-                                     instanceKlassHandle& accessor) {
+                                     instanceKlassHandle accessor) {
   ResourceMark rm;
   return get_method_by_index_impl(cpool, index, bc, accessor);
 }
@@ -452,7 +452,7 @@
 
 // ------------------------------------------------------------------
 JVMCIEnv::CodeInstallResult JVMCIEnv::register_method(
-                                methodHandle& method,
+                                const methodHandle& method,
                                 nmethod*& nm,
                                 int entry_bci,
                                 CodeOffsets* offsets,

--- a/hotspot/src/share/vm/jvmci/jvmciEnv.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciEnv.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -78,7 +78,7 @@
   // The CI treats a klass as loaded if it is consistently defined in
   // another loader, even if it hasn't yet been loaded in all loaders
   // that could potentially see it via delegation.
-  static KlassHandle get_klass_by_name(KlassHandle& accessing_klass,
+  static KlassHandle get_klass_by_name(KlassHandle accessing_klass,
                              Symbol* klass_name,
                              bool require_local);
 
@@ -86,12 +86,12 @@
   static KlassHandle   get_klass_by_index(const constantPoolHandle& cpool,
                                 int klass_index,
                                 bool& is_accessible,
-                                KlassHandle& loading_klass);
-  static void   get_field_by_index(instanceKlassHandle& loading_klass, fieldDescriptor& fd,
+                                KlassHandle loading_klass);
+  static void   get_field_by_index(instanceKlassHandle loading_klass, fieldDescriptor& fd,
                                 int field_index);
   static methodHandle  get_method_by_index(const constantPoolHandle& cpool,
                                  int method_index, Bytecodes::Code bc,
-                                 instanceKlassHandle& loading_klass);
+                                 instanceKlassHandle loading_klass);
 
   JVMCIEnv(CompileTask* task, int system_dictionary_modification_counter);
 
@@ -112,17 +112,17 @@
   static KlassHandle   get_klass_by_index_impl(const constantPoolHandle& cpool,
                                      int klass_index,
                                      bool& is_accessible,
-                                     KlassHandle& loading_klass);
-  static void   get_field_by_index_impl(instanceKlassHandle& loading_klass, fieldDescriptor& fd,
+                                     KlassHandle loading_klass);
+  static void   get_field_by_index_impl(instanceKlassHandle loading_klass, fieldDescriptor& fd,
                                      int field_index);
   static methodHandle  get_method_by_index_impl(const constantPoolHandle& cpool,
                                       int method_index, Bytecodes::Code bc,
-                                      instanceKlassHandle& loading_klass);
+                                      instanceKlassHandle loading_klass);
 
   // Helper methods
   static bool       check_klass_accessibility(KlassHandle accessing_klass, KlassHandle resolved_klass);
-  static methodHandle  lookup_method(instanceKlassHandle&  accessor,
-                           instanceKlassHandle&  holder,
+  static methodHandle  lookup_method(instanceKlassHandle  accessor,
+                           instanceKlassHandle  holder,
                            Symbol*         name,
                            Symbol*         sig,
                            Bytecodes::Code bc);
@@ -142,7 +142,7 @@
 
   // Register the result of a compilation.
   static JVMCIEnv::CodeInstallResult register_method(
-                       methodHandle&             target,
+                       const methodHandle&       target,
                        nmethod*&                 nm,
                        int                       entry_bci,
                        CodeOffsets*              offsets,
@@ -166,7 +166,7 @@
   // InstanceKlass*.  This is needed since the holder of a method in
   // the bytecodes could be an array type.  Basically this converts
   // array types into java/lang/Object and other types stay as they are.
-  static instanceKlassHandle get_instance_klass_for_declared_method_holder(KlassHandle& klass);
+  static instanceKlassHandle get_instance_klass_for_declared_method_holder(KlassHandle klass);
 };
 
 #endif // SHARE_VM_JVMCI_JVMCIENV_HPP

--- a/hotspot/src/share/vm/jvmci/jvmciJavaClasses.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciJavaClasses.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -30,7 +30,7 @@
 // This function is similar to javaClasses.cpp, it computes the field offset of a (static or instance) field.
 // It looks up the name and signature symbols without creating new ones, all the symbols of these classes need to be already loaded.
 
-void compute_offset(int &dest_offset, Klass* klass, const char* name, const char* signature, bool static_field) {
+void compute_offset(int &dest_offset, Klass* klass, const char* name, const char* signature, bool static_field, TRAPS) {
   InstanceKlass* ik = InstanceKlass::cast(klass);
   Symbol* name_symbol = SymbolTable::probe(name, (int)strlen(name));
   Symbol* signature_symbol = SymbolTable::probe(signature, (int)strlen(signature));
@@ -49,6 +49,11 @@
   guarantee(fd.is_static() == static_field, "static/instance mismatch");
   dest_offset = fd.offset();
   assert(dest_offset != 0, "must be valid offset");
+  if (static_field) {
+    // Must ensure classes for static fields are initialized as the
+    // accessor itself does not include a class initialization check.
+    ik->initialize(CHECK);
+  }
 }
 
 // This piece of macro magic creates the contents of the jvmci_compute_offsets method that initializes the field indices of all the access classes.
@@ -57,7 +62,7 @@
 
 #define END_CLASS }
 
-#define FIELD(klass, name, signature, static_field) compute_offset(klass::_##name##_offset, k, #name, signature, static_field);
+#define FIELD(klass, name, signature, static_field) compute_offset(klass::_##name##_offset, k, #name, signature, static_field, CHECK);
 #define CHAR_FIELD(klass, name) FIELD(klass, name, "C", false)
 #define INT_FIELD(klass, name) FIELD(klass, name, "I", false)
 #define BOOLEAN_FIELD(klass, name) FIELD(klass, name, "Z", false)
@@ -69,7 +74,7 @@
 #define STATIC_BOOLEAN_FIELD(klass, name) FIELD(klass, name, "Z", true)
 
 
-void JVMCIJavaClasses::compute_offsets() {
+void JVMCIJavaClasses::compute_offsets(TRAPS) {
   COMPILER_CLASSES_DO(START_CLASS, END_CLASS, CHAR_FIELD, INT_FIELD, BOOLEAN_FIELD, LONG_FIELD, FLOAT_FIELD, OOP_FIELD, OOP_FIELD, OOP_FIELD, STATIC_OOP_FIELD, STATIC_OOP_FIELD, STATIC_INT_FIELD, STATIC_BOOLEAN_FIELD)
 }

--- a/hotspot/src/share/vm/jvmci/jvmciJavaClasses.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciJavaClasses.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -29,7 +29,7 @@
 
 class JVMCIJavaClasses : AllStatic {
  public:
-  static void compute_offsets();
+  static void compute_offsets(TRAPS);
 };
 
 /* This macro defines the structure of the CompilationResult - classes.
@@ -306,7 +306,7 @@
         assert(obj->is_a(SystemDictionary::name##_klass()), "wrong class, " #name " expected, found %s", obj->klass()->external_name());                       \
         assert(offset != 0, "must be valid offset");                                                                                                           \
     }                                                                                                                                                          \
-    static void compute_offsets();                                                                                                                             \
+    static void compute_offsets(TRAPS);                                                                                                                             \
   public:                                                                                                                                                      \
     static InstanceKlass* klass() { return SystemDictionary::name##_klass(); }
 
@@ -315,10 +315,10 @@
 #define FIELD(name, type, accessor, cast)                                                                                                                         \
     static int _##name##_offset;                                                                                                                                  \
     static type name(oop obj)                   { check(obj, #name, _##name##_offset); return cast obj->accessor(_##name##_offset); }                                               \
-    static type name(Handle& obj)                { check(obj(), #name, _##name##_offset); return cast obj->accessor(_##name##_offset); }                                            \
+    static type name(Handle obj)                { check(obj(), #name, _##name##_offset); return cast obj->accessor(_##name##_offset); }                                             \
     static type name(jobject obj)               { check(JNIHandles::resolve(obj), #name, _##name##_offset); return cast JNIHandles::resolve(obj)->accessor(_##name##_offset); }     \
     static void set_##name(oop obj, type x)     { check(obj, #name, _##name##_offset); obj->accessor##_put(_##name##_offset, x); }                                                  \
-    static void set_##name(Handle& obj, type x)  { check(obj(), #name, _##name##_offset); obj->accessor##_put(_##name##_offset, x); }                                               \
+    static void set_##name(Handle obj, type x)  { check(obj(), #name, _##name##_offset); obj->accessor##_put(_##name##_offset, x); }                                                \
     static void set_##name(jobject obj, type x) { check(JNIHandles::resolve(obj), #name, _##name##_offset); JNIHandles::resolve(obj)->accessor##_put(_##name##_offset, x); }
 
 #define EMPTY_CAST
@@ -392,6 +392,6 @@
 #undef STATIC_BOOLEAN_FIELD
 #undef EMPTY_CAST
 
-void compute_offset(int &dest_offset, Klass* klass, const char* name, const char* signature, bool static_field);
+void compute_offset(int &dest_offset, Klass* klass, const char* name, const char* signature, bool static_field, TRAPS);
 
 #endif // SHARE_VM_JVMCI_JVMCIJAVACLASSES_HPP

--- a/hotspot/src/share/vm/jvmci/jvmciRuntime.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciRuntime.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -59,7 +59,11 @@
 static const char* OPTION_PREFIX = "jvmci.option.";
 static const size_t OPTION_PREFIX_LEN = strlen(OPTION_PREFIX);
 
-BasicType JVMCIRuntime::kindToBasicType(jchar ch) {
+BasicType JVMCIRuntime::kindToBasicType(Handle kind, TRAPS) {
+  if (kind.is_null()) {
+    THROW_(vmSymbols::java_lang_NullPointerException(), T_ILLEGAL);
+  }
+  jchar ch = JavaKind::typeChar(kind);
   switch(ch) {
     case 'z': return T_BOOLEAN;
     case 'b': return T_BYTE;
@@ -72,10 +76,8 @@
     case 'a': return T_OBJECT;
     case '-': return T_ILLEGAL;
     default:
-      fatal("unexpected Kind: %c", ch);
-      break;
+      JVMCI_ERROR_(T_ILLEGAL, "unexpected Kind: %c", ch);
   }
-  return T_ILLEGAL;
 }
 
 // Simple helper to see if the caller of a runtime stub which
@@ -718,7 +720,7 @@
   if (JVMCIRuntime::_well_known_classes_initialized == false) {
     SystemDictionary::WKID scan = SystemDictionary::FIRST_JVMCI_WKID;
     SystemDictionary::initialize_wk_klasses_through(SystemDictionary::LAST_JVMCI_WKID, scan, CHECK);
-    JVMCIJavaClasses::compute_offsets();
+    JVMCIJavaClasses::compute_offsets(CHECK);
     JVMCIRuntime::_well_known_classes_initialized = true;
   }
 }

--- a/hotspot/src/share/vm/jvmci/jvmciRuntime.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/jvmciRuntime.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -29,6 +29,17 @@
 #include "runtime/arguments.hpp"
 #include "runtime/deoptimization.hpp"
 
+#define JVMCI_ERROR(...)       \
+  { Exceptions::fthrow(THREAD_AND_LOCATION, vmSymbols::jdk_vm_ci_common_JVMCIError(), __VA_ARGS__); return; }
+
+#define JVMCI_ERROR_(ret, ...) \
+  { Exceptions::fthrow(THREAD_AND_LOCATION, vmSymbols::jdk_vm_ci_common_JVMCIError(), __VA_ARGS__); return ret; }
+
+#define JVMCI_ERROR_0(...)    JVMCI_ERROR_(0, __VA_ARGS__)
+#define JVMCI_ERROR_NULL(...) JVMCI_ERROR_(NULL, __VA_ARGS__)
+#define JVMCI_ERROR_OK(...)   JVMCI_ERROR_(JVMCIEnv::ok, __VA_ARGS__)
+#define CHECK_OK              CHECK_(JVMCIEnv::ok)
+
 class ParseClosure : public StackObj {
   int _lineNo;
   char* _filename;
@@ -171,7 +182,7 @@
   } \
   (void)(0
 
-  static BasicType kindToBasicType(jchar ch);
+  static BasicType kindToBasicType(Handle kind, TRAPS);
 
   // The following routines are all called from compiled JVMCI code

--- a/hotspot/src/share/vm/jvmci/vmSymbols_jvmci.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/jvmci/vmSymbols_jvmci.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -86,6 +86,7 @@
   template(jdk_vm_ci_code_VirtualObject,                          "jdk/vm/ci/code/VirtualObject")                          \
   template(jdk_vm_ci_code_RegisterSaveLayout,                     "jdk/vm/ci/code/RegisterSaveLayout")                     \
   template(jdk_vm_ci_code_InvalidInstalledCodeException,          "jdk/vm/ci/code/InvalidInstalledCodeException")          \
+  template(jdk_vm_ci_common_JVMCIError,                           "jdk/vm/ci/common/JVMCIError")                           \
   template(compileMethod_name,                                    "compileMethod")                                         \
   template(compileMethod_signature,                               "(Ljdk/vm/ci/hotspot/HotSpotResolvedJavaMethod;IJI)V")   \
   template(fromMetaspace_name,                                    "fromMetaspace")                                         \

--- a/hotspot/src/share/vm/memory/heap.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/memory/heap.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -47,7 +47,10 @@
   _freelist_segments            = 0;
   _freelist_length              = 0;
   _max_allocated_capacity       = 0;
-  _was_full                     = false;
+  _blob_count                   = 0;
+  _nmethod_count                = 0;
+  _adapter_count                = 0;
+  _full_count                   = 0;
 }
 
 
@@ -185,6 +188,7 @@
     assert(!block->free(), "must be marked free");
     DEBUG_ONLY(memset((void*)block->allocated_space(), badCodeHeapNewVal, instance_size));
     _max_allocated_capacity = MAX2(_max_allocated_capacity, allocated_capacity());
+    _blob_count++;
     return block->allocated_space();
   }
 
@@ -198,6 +202,7 @@
     _next_segment += number_of_segments;
     DEBUG_ONLY(memset((void *)b->allocated_space(), badCodeHeapNewVal, instance_size));
     _max_allocated_capacity = MAX2(_max_allocated_capacity, allocated_capacity());
+    _blob_count++;
     return b->allocated_space();
   } else {
     return NULL;

--- a/hotspot/src/share/vm/memory/heap.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/memory/heap.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -100,7 +100,11 @@
 
   const char*  _name;                            // Name of the CodeHeap
   const int    _code_blob_type;                  // CodeBlobType it contains
-  bool         _was_full;                        // True if the code heap was full
+  int          _blob_count;                      // Number of CodeBlobs
+  int          _nmethod_count;                   // Number of nmethods
+  int          _adapter_count;                   // Number of adapters
+  int          _full_count;                      // Number of times the code heap was full
+
 
   enum { free_sentinel = 0xFF };
 
@@ -179,8 +183,13 @@
 
   // Debugging / Profiling
   const char* name() const                       { return _name; }
-  bool was_full()                                { return _was_full; }
-  void report_full()                             { _was_full = true; }
+  int         blob_count()                       { return _blob_count; }
+  int         nmethod_count()                    { return _nmethod_count; }
+  void    set_nmethod_count(int count)           {        _nmethod_count = count; }
+  int         adapter_count()                    { return _adapter_count; }
+  void    set_adapter_count(int count)           {        _adapter_count = count; }
+  int         full_count()                       { return _full_count; }
+  void        report_full()                      {        _full_count++; }
 
 private:
   size_t heap_unallocated_capacity() const;

--- a/hotspot/src/share/vm/oops/instanceKlass.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -27,6 +27,7 @@
 #include "classfile/systemDictionary.hpp"
 #include "classfile/verifier.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "code/dependencyContext.hpp"
 #include "compiler/compileBroker.hpp"
 #include "gc/shared/collectedHeap.inline.hpp"
 #include "gc/shared/specialized_oop_closures.hpp"
@@ -203,7 +204,6 @@
 
   int iksize = InstanceKlass::size(vtable_len, itable_len, nonstatic_oop_map_size,
                                    access_flags.is_interface(), is_anonymous);
-
   set_vtable_length(vtable_len);
   set_itable_length(itable_len);
   set_static_field_size(static_field_size);
@@ -232,7 +232,7 @@
   set_static_oop_field_count(0);
   set_nonstatic_field_size(0);
   set_is_marked_dependent(false);
-  set_has_unloaded_dependent(false);
+  _dep_context = DependencyContext::EMPTY;
   set_init_state(InstanceKlass::allocated);
   set_init_thread(NULL);
   set_reference_type(rt);
@@ -246,7 +246,6 @@
   set_annotations(NULL);
   set_jvmti_cached_class_field_map(NULL);
   set_initial_method_idnum(0);
-  _dependencies = NULL;
   set_jvmti_cached_class_field_map(NULL);
   set_cached_class_file(NULL);
   set_initial_method_idnum(0);
@@ -1854,200 +1853,30 @@
   return id;
 }
 
-int nmethodBucket::decrement() {
-  return Atomic::add(-1, (volatile int *)&_count);
-}
-
-//
-// Walk the list of dependent nmethods searching for nmethods which
-// are dependent on the changes that were passed in and mark them for
-// deoptimization.  Returns the number of nmethods found.
-//
-int nmethodBucket::mark_dependent_nmethods(nmethodBucket* deps, DepChange& changes) {
-  assert_locked_or_safepoint(CodeCache_lock);
-  int found = 0;
-  for (nmethodBucket* b = deps; b != NULL; b = b->next()) {
-    nmethod* nm = b->get_nmethod();
-    // since dependencies aren't removed until an nmethod becomes a zombie,
-    // the dependency list may contain nmethods which aren't alive.
-    if (b->count() > 0 && nm->is_alive() && !nm->is_marked_for_deoptimization() && nm->check_dependency_on(changes)) {
-      if (TraceDependencies) {
-        ResourceMark rm;
-        tty->print_cr("Marked for deoptimization");
-        changes.print();
-        nm->print();
-        nm->print_dependencies();
-      }
-      nm->mark_for_deoptimization();
-      found++;
-    }
-  }
-  return found;
-}
-
-//
-// Add an nmethodBucket to the list of dependencies for this nmethod.
-// It's possible that an nmethod has multiple dependencies on this klass
-// so a count is kept for each bucket to guarantee that creation and
-// deletion of dependencies is consistent. Returns new head of the list.
-//
-nmethodBucket* nmethodBucket::add_dependent_nmethod(nmethodBucket* deps, nmethod* nm) {
-  assert_locked_or_safepoint(CodeCache_lock);
-  for (nmethodBucket* b = deps; b != NULL; b = b->next()) {
-    if (nm == b->get_nmethod()) {
-      b->increment();
-      return deps;
-    }
-  }
-  return new nmethodBucket(nm, deps);
+inline DependencyContext InstanceKlass::dependencies() {
+  DependencyContext dep_context(&_dep_context);
+  return dep_context;
 }
 
-//
-// Decrement count of the nmethod in the dependency list and remove
-// the bucket completely when the count goes to 0.  This method must
-// find a corresponding bucket otherwise there's a bug in the
-// recording of dependencies. Returns true if the bucket was deleted,
-// or marked ready for reclaimation.
-bool nmethodBucket::remove_dependent_nmethod(nmethodBucket** deps, nmethod* nm, bool delete_immediately) {
-  assert_locked_or_safepoint(CodeCache_lock);
-
-  nmethodBucket* first = *deps;
-  nmethodBucket* last = NULL;
-
-  for (nmethodBucket* b = first; b != NULL; b = b->next()) {
-    if (nm == b->get_nmethod()) {
-      int val = b->decrement();
-      guarantee(val >= 0, "Underflow: %d", val);
-      if (val == 0) {
-        if (delete_immediately) {
-          if (last == NULL) {
-            *deps = b->next();
-          } else {
-            last->set_next(b->next());
-          }
-          delete b;
-        }
-      }
-      return true;
-    }
-    last = b;
-  }
-
-#ifdef ASSERT
-  tty->print_raw_cr("### can't find dependent nmethod");
-  nm->print();
-#endif // ASSERT
-  ShouldNotReachHere();
-  return false;
-}
-
-// Convenience overload, for callers that don't want to delete the nmethodBucket entry.
-bool nmethodBucket::remove_dependent_nmethod(nmethodBucket* deps, nmethod* nm) {
-  nmethodBucket** deps_addr = &deps;
-  return remove_dependent_nmethod(deps_addr, nm, false /* Don't delete */);
-}
-
-//
-// Reclaim all unused buckets. Returns new head of the list.
-//
-nmethodBucket* nmethodBucket::clean_dependent_nmethods(nmethodBucket* deps) {
-  nmethodBucket* first = deps;
-  nmethodBucket* last = NULL;
-  nmethodBucket* b = first;
-
-  while (b != NULL) {
-    assert(b->count() >= 0, "bucket count: %d", b->count());
-    nmethodBucket* next = b->next();
-    if (b->count() == 0) {
-      if (last == NULL) {
-        first = next;
-      } else {
-        last->set_next(next);
-      }
-      delete b;
-      // last stays the same.
-    } else {
-      last = b;
-    }
-    b = next;
-  }
-  return first;
-}
-
-#ifndef PRODUCT
-void nmethodBucket::print_dependent_nmethods(nmethodBucket* deps, bool verbose) {
-  int idx = 0;
-  for (nmethodBucket* b = deps; b != NULL; b = b->next()) {
-    nmethod* nm = b->get_nmethod();
-    tty->print("[%d] count=%d { ", idx++, b->count());
-    if (!verbose) {
-      nm->print_on(tty, "nmethod");
-      tty->print_cr(" } ");
-    } else {
-      nm->print();
-      nm->print_dependencies();
-      tty->print_cr("--- } ");
-    }
-  }
-}
-
-bool nmethodBucket::is_dependent_nmethod(nmethodBucket* deps, nmethod* nm) {
-  for (nmethodBucket* b = deps; b != NULL; b = b->next()) {
-    if (nm == b->get_nmethod()) {
-#ifdef ASSERT
-      int count = b->count();
-      assert(count >= 0, "count shouldn't be negative: %d", count);
-#endif
-      return true;
-    }
-  }
-  return false;
-}
-#endif //PRODUCT
-
 int InstanceKlass::mark_dependent_nmethods(DepChange& changes) {
-  assert_locked_or_safepoint(CodeCache_lock);
-  return nmethodBucket::mark_dependent_nmethods(_dependencies, changes);
-}
-
-void InstanceKlass::clean_dependent_nmethods() {
-  assert_locked_or_safepoint(CodeCache_lock);
-
-  if (has_unloaded_dependent()) {
-    _dependencies = nmethodBucket::clean_dependent_nmethods(_dependencies);
-    set_has_unloaded_dependent(false);
-  }
-#ifdef ASSERT
-  else {
-    // Verification
-    for (nmethodBucket* b = _dependencies; b != NULL; b = b->next()) {
-      assert(b->count() >= 0, "bucket count: %d", b->count());
-      assert(b->count() != 0, "empty buckets need to be cleaned");
-    }
-  }
-#endif
+  return dependencies().mark_dependent_nmethods(changes);
 }
 
 void InstanceKlass::add_dependent_nmethod(nmethod* nm) {
-  assert_locked_or_safepoint(CodeCache_lock);
-  _dependencies = nmethodBucket::add_dependent_nmethod(_dependencies, nm);
+  dependencies().add_dependent_nmethod(nm);
 }
 
 void InstanceKlass::remove_dependent_nmethod(nmethod* nm, bool delete_immediately) {
-  assert_locked_or_safepoint(CodeCache_lock);
-
-  if (nmethodBucket::remove_dependent_nmethod(&_dependencies, nm, delete_immediately)) {
-    set_has_unloaded_dependent(true);
-  }
+  dependencies().remove_dependent_nmethod(nm, delete_immediately);
 }
 
 #ifndef PRODUCT
 void InstanceKlass::print_dependent_nmethods(bool verbose) {
-  nmethodBucket::print_dependent_nmethods(_dependencies, verbose);
+  dependencies().print_dependent_nmethods(verbose);
 }
 
 bool InstanceKlass::is_dependent_nmethod(nmethod* nm) {
-  return nmethodBucket::is_dependent_nmethod(_dependencies, nm);
+  return dependencies().is_dependent_nmethod(nm);
 }
 #endif //PRODUCT
 
@@ -2055,7 +1884,9 @@
   clean_implementors_list(is_alive);
   clean_method_data(is_alive);
 
-  clean_dependent_nmethods();
+  // Since GC iterates InstanceKlasses sequentially, it is safe to remove stale entries here.
+  DependencyContext dep_context(&_dep_context);
+  dep_context.expunge_stale_entries();
 }
 
 void InstanceKlass::clean_implementors_list(BoolObjectClosure* is_alive) {
@@ -2102,6 +1933,8 @@
 
   constants()->remove_unshareable_info();
 
+  assert(_dep_context == DependencyContext::EMPTY, "dependency context is not shareable");
+
   for (int i = 0; i < methods()->length(); i++) {
     Method* m = methods()->at(i);
     m->remove_unshareable_info();
@@ -2231,12 +2064,10 @@
   }
 
   // release dependencies
-  nmethodBucket* b = _dependencies;
-  _dependencies = NULL;
-  while (b != NULL) {
-    nmethodBucket* next = b->next();
-    delete b;
-    b = next;
+  {
+    DependencyContext ctx(&_dep_context);
+    int marked = ctx.remove_all_dependents();
+    assert(marked == 0, "all dependencies should be already invalidated");
   }
 
   // Deallocate breakpoint records
@@ -3558,199 +3389,3 @@
 unsigned char * InstanceKlass::get_cached_class_file_bytes() {
   return VM_RedefineClasses::get_cached_class_file_bytes(_cached_class_file);
 }
-
-
-/////////////// Unit tests ///////////////
-
-#ifndef PRODUCT
-
-class TestNmethodBucketContext {
- public:
-  nmethod* _nmethodLast;
-  nmethod* _nmethodMiddle;
-  nmethod* _nmethodFirst;
-
-  nmethodBucket* _bucketLast;
-  nmethodBucket* _bucketMiddle;
-  nmethodBucket* _bucketFirst;
-
-  nmethodBucket* _bucketList;
-
-  TestNmethodBucketContext() {
-    CodeCache_lock->lock_without_safepoint_check();
-
-    _nmethodLast   = reinterpret_cast<nmethod*>(0x8 * 0);
-    _nmethodMiddle = reinterpret_cast<nmethod*>(0x8 * 1);
-    _nmethodFirst  = reinterpret_cast<nmethod*>(0x8 * 2);
-
-    _bucketLast   = new nmethodBucket(_nmethodLast,   NULL);
-    _bucketMiddle = new nmethodBucket(_nmethodMiddle, _bucketLast);
-    _bucketFirst  = new nmethodBucket(_nmethodFirst,   _bucketMiddle);
-
-    _bucketList = _bucketFirst;
-  }
-
-  ~TestNmethodBucketContext() {
-    delete _bucketLast;
-    delete _bucketMiddle;
-    delete _bucketFirst;
-
-    CodeCache_lock->unlock();
-  }
-};
-
-class TestNmethodBucket {
- public:
-  static void testRemoveDependentNmethodFirstDeleteImmediately() {
-    TestNmethodBucketContext c;
-
-    nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodFirst, true /* delete */);
-
-    assert(c._bucketList == c._bucketMiddle, "check");
-    assert(c._bucketList->next() == c._bucketLast, "check");
-    assert(c._bucketList->next()->next() == NULL, "check");
-
-    // Cleanup before context is deleted.
-    c._bucketFirst = NULL;
-  }
-
-  static void testRemoveDependentNmethodMiddleDeleteImmediately() {
-    TestNmethodBucketContext c;
-
-    nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodMiddle, true /* delete */);
-
-    assert(c._bucketList == c._bucketFirst, "check");
-    assert(c._bucketList->next() == c._bucketLast, "check");
-    assert(c._bucketList->next()->next() == NULL, "check");
-
-    // Cleanup before context is deleted.
-    c._bucketMiddle = NULL;
-  }
-
-  static void testRemoveDependentNmethodLastDeleteImmediately() {
-    TestNmethodBucketContext c;
-
-    nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodLast, true /* delete */);
-
-    assert(c._bucketList == c._bucketFirst, "check");
-    assert(c._bucketList->next() == c._bucketMiddle, "check");
-    assert(c._bucketList->next()->next() == NULL, "check");
-
-    // Cleanup before context is deleted.
-    c._bucketLast = NULL;
-  }
-
-  static void testRemoveDependentNmethodFirstDeleteDeferred() {
-    TestNmethodBucketContext c;
-
-    nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodFirst, false /* delete */);
-
-    assert(c._bucketList                         == c._bucketFirst,  "check");
-    assert(c._bucketList->next()                 == c._bucketMiddle, "check");
-    assert(c._bucketList->next()->next()         == c._bucketLast,   "check");
-    assert(c._bucketList->next()->next()->next() == NULL,            "check");
-
-    assert(c._bucketFirst->count()  == 0, "check");
-    assert(c._bucketMiddle->count() == 1, "check");
-    assert(c._bucketLast->count()   == 1, "check");
-  }
-
-  static void testRemoveDependentNmethodMiddleDeleteDeferred() {
-    TestNmethodBucketContext c;
-
-    nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodMiddle, false /* delete */);
-
-    assert(c._bucketList                         == c._bucketFirst,  "check");
-    assert(c._bucketList->next()                 == c._bucketMiddle, "check");
-    assert(c._bucketList->next()->next()         == c._bucketLast,   "check");
-    assert(c._bucketList->next()->next()->next() == NULL,            "check");
-
-    assert(c._bucketFirst->count()  == 1, "check");
-    assert(c._bucketMiddle->count() == 0, "check");
-    assert(c._bucketLast->count()   == 1, "check");
-  }
-
-  static void testRemoveDependentNmethodLastDeleteDeferred() {
-    TestNmethodBucketContext c;
-
-    nmethodBucket::remove_dependent_nmethod(&c._bucketList, c._nmethodLast, false /* delete */);
-
-    assert(c._bucketList                         == c._bucketFirst,  "check");
-    assert(c._bucketList->next()                 == c._bucketMiddle, "check");
-    assert(c._bucketList->next()->next()         == c._bucketLast,   "check");
-    assert(c._bucketList->next()->next()->next() == NULL,            "check");
-
-    assert(c._bucketFirst->count()  == 1, "check");
-    assert(c._bucketMiddle->count() == 1, "check");
-    assert(c._bucketLast->count()   == 0, "check");
-  }
-
-  static void testRemoveDependentNmethodConvenienceFirst() {
-    TestNmethodBucketContext c;
-
-    nmethodBucket::remove_dependent_nmethod(c._bucketList, c._nmethodFirst);
-
-    assert(c._bucketList                         == c._bucketFirst,  "check");
-    assert(c._bucketList->next()                 == c._bucketMiddle, "check");
-    assert(c._bucketList->next()->next()         == c._bucketLast,   "check");
-    assert(c._bucketList->next()->next()->next() == NULL,            "check");
-
-    assert(c._bucketFirst->count()  == 0, "check");
-    assert(c._bucketMiddle->count() == 1, "check");
-    assert(c._bucketLast->count()   == 1, "check");
-  }
-
-  static void testRemoveDependentNmethodConvenienceMiddle() {
-    TestNmethodBucketContext c;
-
-    nmethodBucket::remove_dependent_nmethod(c._bucketList, c._nmethodMiddle);
-
-    assert(c._bucketList                         == c._bucketFirst,  "check");
-    assert(c._bucketList->next()                 == c._bucketMiddle, "check");
-    assert(c._bucketList->next()->next()         == c._bucketLast,   "check");
-    assert(c._bucketList->next()->next()->next() == NULL,            "check");
-
-    assert(c._bucketFirst->count()  == 1, "check");
-    assert(c._bucketMiddle->count() == 0, "check");
-    assert(c._bucketLast->count()   == 1, "check");
-  }
-
-  static void testRemoveDependentNmethodConvenienceLast() {
-    TestNmethodBucketContext c;
-
-    nmethodBucket::remove_dependent_nmethod(c._bucketList, c._nmethodLast);
-
-    assert(c._bucketList                         == c._bucketFirst,  "check");
-    assert(c._bucketList->next()                 == c._bucketMiddle, "check");
-    assert(c._bucketList->next()->next()         == c._bucketLast,   "check");
-    assert(c._bucketList->next()->next()->next() == NULL,            "check");
-
-    assert(c._bucketFirst->count()  == 1, "check");
-    assert(c._bucketMiddle->count() == 1, "check");
-    assert(c._bucketLast->count()   == 0, "check");
-  }
-
-  static void testRemoveDependentNmethod() {
-    testRemoveDependentNmethodFirstDeleteImmediately();
-    testRemoveDependentNmethodMiddleDeleteImmediately();
-    testRemoveDependentNmethodLastDeleteImmediately();
-
-    testRemoveDependentNmethodFirstDeleteDeferred();
-    testRemoveDependentNmethodMiddleDeleteDeferred();
-    testRemoveDependentNmethodLastDeleteDeferred();
-
-    testRemoveDependentNmethodConvenienceFirst();
-    testRemoveDependentNmethodConvenienceMiddle();
-    testRemoveDependentNmethodConvenienceLast();
-  }
-
-  static void test() {
-    testRemoveDependentNmethod();
-  }
-};
-
-void TestNmethodBucket_test() {
-  TestNmethodBucket::test();
-}
-
-#endif

--- a/hotspot/src/share/vm/oops/instanceKlass.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/oops/instanceKlass.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -53,15 +53,15 @@
 
 
 // forward declaration for class -- see below for definition
-class SuperTypeClosure;
-class JNIid;
+class BreakpointInfo;
+class DepChange;
+class DependencyContext;
+class fieldDescriptor;
 class jniIdMapBase;
-class BreakpointInfo;
-class fieldDescriptor;
-class DepChange;
-class nmethodBucket;
+class JNIid;
 class JvmtiCachedClassFieldMap;
 class MemberNameTable;
+class SuperTypeClosure;
 
 // This is used in iterators below.
 class FieldClosure: public StackObj {
@@ -198,7 +198,6 @@
   // _is_marked_dependent can be set concurrently, thus cannot be part of the
   // _misc_flags.
   bool            _is_marked_dependent;  // used for marking during flushing and deoptimization
-  bool            _has_unloaded_dependent;
 
   // The low two bits of _misc_flags contains the kind field.
   // This can be used to quickly discriminate among the four kinds of
@@ -235,7 +234,7 @@
   MemberNameTable* _member_names;        // Member names
   JNIid*          _jni_ids;              // First JNI identifier for static fields in this class
   jmethodID*      _methods_jmethod_ids;  // jmethodIDs corresponding to method_idnum, or NULL if none
-  nmethodBucket*  _dependencies;         // list of dependent nmethods
+  intptr_t        _dep_context;          // packed DependencyContext structure
   nmethod*        _osr_nmethods_head;    // Head of list of on-stack replacement nmethods for this class
   BreakpointInfo* _breakpoints;          // bpt lists, managed by Method*
   // Linked instanceKlasses of previous versions
@@ -468,9 +467,6 @@
   bool is_marked_dependent() const         { return _is_marked_dependent; }
   void set_is_marked_dependent(bool value) { _is_marked_dependent = value; }
 
-  bool has_unloaded_dependent() const         { return _has_unloaded_dependent; }
-  void set_has_unloaded_dependent(bool value) { _has_unloaded_dependent = value; }
-
   // initialization (virtuals from Klass)
   bool should_be_initialized() const;  // means that initialize should be called
   void initialize(TRAPS);
@@ -835,7 +831,8 @@
   JNIid* jni_id_for(int offset);
 
   // maintenance of deoptimization dependencies
-  int mark_dependent_nmethods(DepChange& changes);
+  inline DependencyContext dependencies();
+  int  mark_dependent_nmethods(DepChange& changes);
   void add_dependent_nmethod(nmethod* nm);
   void remove_dependent_nmethod(nmethod* nm, bool delete_immediately);
 
@@ -1027,7 +1024,6 @@
   void clean_weak_instanceklass_links(BoolObjectClosure* is_alive);
   void clean_implementors_list(BoolObjectClosure* is_alive);
   void clean_method_data(BoolObjectClosure* is_alive);
-  void clean_dependent_nmethods();
 
   // Explicit metaspace deallocation of fields
   // For RedefineClasses and class file parsing errors, we need to deallocate
@@ -1320,48 +1316,6 @@
   void verify(Klass* holder);
 };
 
-
-//
-// nmethodBucket is used to record dependent nmethods for
-// deoptimization.  nmethod dependencies are actually <klass, method>
-// pairs but we really only care about the klass part for purposes of
-// finding nmethods which might need to be deoptimized.  Instead of
-// recording the method, a count of how many times a particular nmethod
-// was recorded is kept.  This ensures that any recording errors are
-// noticed since an nmethod should be removed as many times are it's
-// added.
-//
-class nmethodBucket: public CHeapObj<mtClass> {
-  friend class VMStructs;
- private:
-  nmethod*       _nmethod;
-  int            _count;
-  nmethodBucket* _next;
-
- public:
-  nmethodBucket(nmethod* nmethod, nmethodBucket* next) {
-    _nmethod = nmethod;
-    _next = next;
-    _count = 1;
-  }
-  int count()                             { return _count; }
-  int increment()                         { _count += 1; return _count; }
-  int decrement();
-  nmethodBucket* next()                   { return _next; }
-  void set_next(nmethodBucket* b)         { _next = b; }
-  nmethod* get_nmethod()                  { return _nmethod; }
-
-  static int mark_dependent_nmethods(nmethodBucket* deps, DepChange& changes);
-  static nmethodBucket* add_dependent_nmethod(nmethodBucket* deps, nmethod* nm);
-  static bool remove_dependent_nmethod(nmethodBucket** deps, nmethod* nm, bool delete_immediately);
-  static bool remove_dependent_nmethod(nmethodBucket* deps, nmethod* nm);
-  static nmethodBucket* clean_dependent_nmethods(nmethodBucket* deps);
-#ifndef PRODUCT
-  static void print_dependent_nmethods(nmethodBucket* deps, bool verbose);
-  static bool is_dependent_nmethod(nmethodBucket* deps, nmethod* nm);
-#endif //PRODUCT
-};
-
 // An iterator that's used to access the inner classes indices in the
 // InstanceKlass::_inner_classes array.
 class InnerClassesIterator : public StackObj {

--- a/hotspot/src/share/vm/oops/method.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/oops/method.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -579,12 +579,45 @@
 }
 
 bool Method::is_accessor() const {
+  return is_getter() || is_setter();
+}
+
+bool Method::is_getter() const {
   if (code_size() != 5) return false;
   if (size_of_parameters() != 1) return false;
-  if (java_code_at(0) != Bytecodes::_aload_0 ) return false;
+  if (java_code_at(0) != Bytecodes::_aload_0)  return false;
   if (java_code_at(1) != Bytecodes::_getfield) return false;
-  if (java_code_at(4) != Bytecodes::_areturn &&
-      java_code_at(4) != Bytecodes::_ireturn ) return false;
+  switch (java_code_at(4)) {
+    case Bytecodes::_ireturn:
+    case Bytecodes::_lreturn:
+    case Bytecodes::_freturn:
+    case Bytecodes::_dreturn:
+    case Bytecodes::_areturn:
+      break;
+    default:
+      return false;
+  }
+  return true;
+}
+
+bool Method::is_setter() const {
+  if (code_size() != 6) return false;
+  if (java_code_at(0) != Bytecodes::_aload_0) return false;
+  switch (java_code_at(1)) {
+    case Bytecodes::_iload_1:
+    case Bytecodes::_aload_1:
+    case Bytecodes::_fload_1:
+      if (size_of_parameters() != 2) return false;
+      break;
+    case Bytecodes::_dload_1:
+    case Bytecodes::_lload_1:
+      if (size_of_parameters() != 3) return false;
+      break;
+    default:
+      return false;
+  }
+  if (java_code_at(2) != Bytecodes::_putfield) return false;
+  if (java_code_at(5) != Bytecodes::_return)   return false;
   return true;
 }

--- a/hotspot/src/share/vm/oops/method.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/oops/method.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -595,6 +595,12 @@
   // returns true if the method is an accessor function (setter/getter).
   bool is_accessor() const;
 
+  // returns true if the method is a getter
+  bool is_getter() const;
+
+  // returns true if the method is a setter
+  bool is_setter() const;
+
   // returns true if the method does nothing but return a constant of primitive type
   bool is_constant_getter() const;

--- a/hotspot/src/share/vm/opto/buildOopMap.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/buildOopMap.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -542,10 +542,11 @@
     if (i == cfg->number_of_blocks()) {
       break;                    // Got 'em all
     }
-#ifndef PRODUCT
-    if( PrintOpto && Verbose )
+
+    if (PrintOpto && Verbose) {
       tty->print_cr("retripping live calc");
-#endif
+    }
+
     // Force the issue (expensively): recheck everybody
     for (i = 1; i < cfg->number_of_blocks(); i++) {
       worklist->push(cfg->get_block(i));

--- a/hotspot/src/share/vm/opto/c2_globals.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -186,9 +186,9 @@
           "Maximum number of unrolls for main loop")                        \
           range(0, max_jint)                                                \
                                                                             \
-  product(bool,  SuperWordLoopUnrollAnalysis, false,                        \
-          "Map number of unrolls for main loop via "                        \
-          "Superword Level Parallelism analysis")                           \
+  product_pd(bool,  SuperWordLoopUnrollAnalysis,                            \
+           "Map number of unrolls for main loop via "                       \
+           "Superword Level Parallelism analysis")                          \
                                                                             \
   notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false,                 \
           "Trace what Superword Level Parallelism analysis applies")        \

--- a/hotspot/src/share/vm/opto/c2compiler.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/c2compiler.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -451,6 +451,7 @@
   case vmIntrinsics::_updateByteBufferAdler32:
   case vmIntrinsics::_profileBoolean:
   case vmIntrinsics::_isCompileConstant:
+  case vmIntrinsics::_Objects_checkIndex:
     break;
   default:
     return false;

--- a/hotspot/src/share/vm/opto/callnode.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/callnode.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -778,7 +778,7 @@
     }
     if (is_CallJava() && as_CallJava()->method() != NULL) {
       ciMethod* meth = as_CallJava()->method();
-      if (meth->is_accessor()) {
+      if (meth->is_getter()) {
         return false;
       }
       // May modify (by reflection) if an boxing object is passed

--- a/hotspot/src/share/vm/opto/cfgnode.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -984,7 +984,7 @@
 #ifdef ASSERT
   // The following logic has been moved into TypeOopPtr::filter.
   const Type* jt = t->join_speculative(_type);
-  if( jt->empty() ) {           // Emptied out???
+  if (jt->empty()) {           // Emptied out???
 
     // Check for evil case of 't' being a class and '_type' expecting an
     // interface.  This can happen because the bytecodes do not contain
@@ -995,14 +995,21 @@
     // be 'I' or 'j/l/O'.  Thus we'll pick 'j/l/O'.  If this then flows
     // into a Phi which "knows" it's an Interface type we'll have to
     // uplift the type.
-    if( !t->empty() && ttip && ttip->is_loaded() && ttip->klass()->is_interface() )
-      { assert(ft == _type, ""); } // Uplift to interface
-    else if( !t->empty() && ttkp && ttkp->is_loaded() && ttkp->klass()->is_interface() )
-      { assert(ft == _type, ""); } // Uplift to interface
-    // Otherwise it's something stupid like non-overlapping int ranges
-    // found on dying counted loops.
-    else
-      { assert(ft == Type::TOP, ""); } // Canonical empty value
+    if (!t->empty() && ttip && ttip->is_loaded() && ttip->klass()->is_interface()) {
+      assert(ft == _type, ""); // Uplift to interface
+    } else if (!t->empty() && ttkp && ttkp->is_loaded() && ttkp->klass()->is_interface()) {
+      assert(ft == _type, ""); // Uplift to interface
+    } else {
+      // We also have to handle 'evil cases' of interface- vs. class-arrays
+      Type::get_arrays_base_elements(jt, _type, NULL, &ttip);
+      if (!t->empty() && ttip != NULL && ttip->is_loaded() && ttip->klass()->is_interface()) {
+          assert(ft == _type, "");   // Uplift to array of interface
+      } else {
+        // Otherwise it's something stupid like non-overlapping int ranges
+        // found on dying counted loops.
+        assert(ft == Type::TOP, ""); // Canonical empty value
+      }
+    }
   }
 
   else {

--- a/hotspot/src/share/vm/opto/cfgnode.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/cfgnode.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -270,7 +270,6 @@
   virtual uint size_of() const { return sizeof(*this); }
 
 private:
-  ProjNode* range_check_trap_proj(int& flip, Node*& l, Node*& r);
   ProjNode* range_check_trap_proj() {
     int flip_test = 0;
     Node* l = NULL;
@@ -283,7 +282,7 @@
   bool is_ctrl_folds(Node* ctrl, PhaseIterGVN* igvn);
   bool has_shared_region(ProjNode* proj, ProjNode*& success, ProjNode*& fail);
   bool has_only_uncommon_traps(ProjNode* proj, ProjNode*& success, ProjNode*& fail, PhaseIterGVN* igvn);
-  static void merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn);
+  Node* merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn);
   static void improve_address_types(Node* l, Node* r, ProjNode* fail, PhaseIterGVN* igvn);
   bool is_cmp_with_loadrange(ProjNode* proj);
   bool is_null_check(ProjNode* proj, PhaseIterGVN* igvn);
@@ -292,6 +291,12 @@
   ProjNode* uncommon_trap_proj(CallStaticJavaNode*& call) const;
   bool fold_compares_helper(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn);
 
+protected:
+  ProjNode* range_check_trap_proj(int& flip, Node*& l, Node*& r);
+  Node* Ideal_common(PhaseGVN *phase, bool can_reshape);
+  Node* dominated_by(Node* prev_dom, PhaseIterGVN* igvn);
+  Node* search_identical(int dist);
+
 public:
 
   // Degrees of branch prediction probability by order of magnitude:
@@ -375,8 +380,6 @@
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual int required_outcnt() const { return 2; }
   virtual const RegMask &out_RegMask() const;
-  void dominated_by(Node* prev_dom, PhaseIterGVN* igvn);
-  int is_range_check(Node* &range, Node* &index, jint &offset);
   Node* fold_compares(PhaseIterGVN* phase);
   static Node* up_one_dom(Node* curr, bool linear_only = false);
 
@@ -391,6 +394,20 @@
 #endif
 };
 
+class RangeCheckNode : public IfNode {
+private:
+  int is_range_check(Node* &range, Node* &index, jint &offset);
+
+public:
+  RangeCheckNode(Node* control, Node *b, float p, float fcnt)
+    : IfNode(control, b, p, fcnt) {
+    init_class_id(Class_RangeCheck);
+  }
+
+  virtual int Opcode() const;
+  virtual Node* Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
 class IfProjNode : public CProjNode {
 public:
   IfProjNode(IfNode *ifnode, uint idx) : CProjNode(ifnode,idx) {}

--- a/hotspot/src/share/vm/opto/classes.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/classes.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -138,6 +138,7 @@
 macro(Halt)
 macro(HasNegatives)
 macro(If)
+macro(RangeCheck)
 macro(IfFalse)
 macro(IfTrue)
 macro(Initialize)

--- a/hotspot/src/share/vm/opto/compile.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/compile.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -707,7 +707,7 @@
     _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
   }
 #endif
-  set_print_inlining(directive->PrintInliningOption NOT_PRODUCT( || PrintOptoInlining));
+  set_print_inlining(directive->PrintInliningOption || PrintOptoInlining);
   set_print_intrinsics(directive->PrintIntrinsicsOption);
   set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
 
@@ -3181,6 +3181,13 @@
       n->set_req(MemBarNode::Precedent, top());
     }
     break;
+  case Op_RangeCheck: {
+    RangeCheckNode* rc = n->as_RangeCheck();
+    Node* iff = new IfNode(rc->in(0), rc->in(1), rc->_prob, rc->_fcnt);
+    n->subsume_by(iff, this);
+    frc._tests.push(iff);
+    break;
+  }
   default:
     assert( !n->is_Call(), "" );
     assert( !n->is_Mem(), "" );
@@ -3189,8 +3196,9 @@
   }
 
   // Collect CFG split points
-  if (n->is_MultiBranch())
+  if (n->is_MultiBranch() && !n->is_RangeCheck()) {
     frc._tests.push(n);
+  }
 }
 
 //------------------------------final_graph_reshaping_walk---------------------

--- a/hotspot/src/share/vm/opto/doCall.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/doCall.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -45,7 +45,7 @@
   if (TraceTypeProfile || C->print_inlining()) {
     outputStream* out = tty;
     if (!C->print_inlining()) {
-      if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) {
+      if (!PrintOpto && !PrintCompilation) {
         method->print_short_name();
         tty->cr();
       }
@@ -426,12 +426,10 @@
   // uncommon-trap when callee is unloaded, uninitialized or will not link
   // bailout when too many arguments for register representation
   if (!will_link || can_not_compile_call_site(orig_callee, klass)) {
-#ifndef PRODUCT
     if (PrintOpto && (Verbose || WizardMode)) {
       method()->print_name(); tty->print_cr(" can not compile call at bci %d to:", bci());
       orig_callee->print_name(); tty->cr();
     }
-#endif
     return;
   }
   assert(holder_klass->is_loaded(), "");
@@ -634,12 +632,10 @@
     // If the return type of the method is not loaded, assert that the
     // value we got is a null.  Otherwise, we need to recompile.
     if (!rtype->is_loaded()) {
-#ifndef PRODUCT
       if (PrintOpto && (Verbose || WizardMode)) {
         method()->print_name(); tty->print_cr(" asserting nullness of result at bci: %d", bci());
         cg->method()->print_name(); tty->cr();
       }
-#endif
       if (C->log() != NULL) {
         C->log()->elem("assert_null reason='return' klass='%d'",
                        C->log()->identify(rtype));
@@ -851,11 +847,9 @@
 
     if (remaining == 1) {
       push_ex_oop(ex_node);        // Push exception oop for handler
-#ifndef PRODUCT
       if (PrintOpto && WizardMode) {
         tty->print_cr("  Catching every inline exception bci:%d -> handler_bci:%d", bci(), handler_bci);
       }
-#endif
       merge_exception(handler_bci); // jump to handler
       return;                   // No more handling to be done here!
     }
@@ -882,13 +876,11 @@
       assert(klass->has_subklass() || tinst->klass_is_exact(), "lost exactness");
       Node* ex_oop = _gvn.transform(new CheckCastPPNode(control(), ex_node, tinst));
       push_ex_oop(ex_oop);      // Push exception oop for handler
-#ifndef PRODUCT
       if (PrintOpto && WizardMode) {
         tty->print("  Catching inline exception bci:%d -> handler_bci:%d -- ", bci(), handler_bci);
         klass->print_name();
         tty->cr();
       }
-#endif
       merge_exception(handler_bci);
     }
     set_control(not_subtype_ctrl);
@@ -1067,13 +1059,11 @@
     // such method can be changed when its class is redefined.
     ciMethod* exact_method = callee->resolve_invoke(calling_klass, actual_receiver);
     if (exact_method != NULL) {
-#ifndef PRODUCT
       if (PrintOpto) {
         tty->print("  Calling method via exact type @%d --- ", bci);
         exact_method->print_name();
         tty->cr();
       }
-#endif
       return exact_method;
     }
   }

--- a/hotspot/src/share/vm/opto/graphKit.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1457,18 +1457,22 @@
 // factory methods in "int adr_idx"
 Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
                           int adr_idx,
-                          MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency, bool require_atomic_access) {
+                          MemNode::MemOrd mo,
+                          LoadNode::ControlDependency control_dependency,
+                          bool require_atomic_access,
+                          bool unaligned,
+                          bool mismatched) {
   assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" );
   const TypePtr* adr_type = NULL; // debug-mode-only argument
   debug_only(adr_type = C->get_adr_type(adr_idx));
   Node* mem = memory(adr_idx);
   Node* ld;
   if (require_atomic_access && bt == T_LONG) {
-    ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency);
+    ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched);
   } else if (require_atomic_access && bt == T_DOUBLE) {
-    ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency);
+    ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched);
   } else {
-    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency);
+    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched);
   }
   ld = _gvn.transform(ld);
   if ((bt == T_OBJECT) && C->do_escape_analysis() || C->eliminate_boxing()) {
@@ -1481,7 +1485,9 @@
 Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt,
                                 int adr_idx,
                                 MemNode::MemOrd mo,
-                                bool require_atomic_access) {
+                                bool require_atomic_access,
+                                bool unaligned,
+                                bool mismatched) {
   assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
   const TypePtr* adr_type = NULL;
   debug_only(adr_type = C->get_adr_type(adr_idx));
@@ -1494,6 +1500,12 @@
   } else {
     st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo);
   }
+  if (unaligned) {
+    st->as_Store()->set_unaligned_access();
+  }
+  if (mismatched) {
+    st->as_Store()->set_mismatched_access();
+  }
   st = _gvn.transform(st);
   set_memory(st, adr_idx);
   // Back-to-back stores can only remove intermediate store with DU info
@@ -1587,7 +1599,8 @@
                           const TypeOopPtr* val_type,
                           BasicType bt,
                           bool use_precise,
-                          MemNode::MemOrd mo) {
+                          MemNode::MemOrd mo,
+                          bool mismatched) {
   // Transformation of a value which could be NULL pointer (CastPP #NULL)
   // could be delayed during Parse (for example, in adjust_map_after_if()).
   // Execute transformation here to avoid barrier generation in such case.
@@ -1607,7 +1620,7 @@
               NULL /* pre_val */,
               bt);
 
-  Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo);
+  Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo, mismatched);
   post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise);
   return store;
 }
@@ -1619,7 +1632,8 @@
                              const TypePtr* adr_type,
                              Node* val,
                              BasicType bt,
-                             MemNode::MemOrd mo) {
+                             MemNode::MemOrd mo,
+                             bool mismatched) {
   Compile::AliasType* at = C->alias_type(adr_type);
   const TypeOopPtr* val_type = NULL;
   if (adr_type->isa_instptr()) {
@@ -1638,7 +1652,7 @@
   if (val_type == NULL) {
     val_type = TypeInstPtr::BOTTOM;
   }
-  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo);
+  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo, mismatched);
 }
 
 
@@ -4373,7 +4387,8 @@
   set_memory(mem, TypeAryPtr::BYTES);
   Node* ch = load_array_element(control(), src, i_byte, TypeAryPtr::BYTES);
   Node* st = store_to_memory(control(), array_element_address(dst, i_char, T_BYTE),
-                             AndI(ch, intcon(0xff)), T_CHAR, TypeAryPtr::BYTES, MemNode::unordered);
+                             AndI(ch, intcon(0xff)), T_CHAR, TypeAryPtr::BYTES, MemNode::unordered,
+                             false, false, true /* mismatched */);
 
   IfNode* iff = create_and_map_if(head, Bool(CmpI(i_byte, count), BoolTest::lt), PROB_FAIR, COUNT_UNKNOWN);
   head->init_req(2, IfTrue(iff));

--- a/hotspot/src/share/vm/opto/graphKit.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/graphKit.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -513,23 +513,28 @@
   // of volatile fields.
   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
-                  bool require_atomic_access = false) {
+                  bool require_atomic_access = false, bool unaligned = false,
+                  bool mismatched = false) {
     // This version computes alias_index from bottom_type
     return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(),
-                     mo, control_dependency, require_atomic_access);
+                     mo, control_dependency, require_atomic_access,
+                     unaligned, mismatched);
   }
   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type,
                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
-                  bool require_atomic_access = false) {
+                  bool require_atomic_access = false, bool unaligned = false,
+                  bool mismatched = false) {
     // This version computes alias_index from an address type
     assert(adr_type != NULL, "use other make_load factory");
     return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type),
-                     mo, control_dependency, require_atomic_access);
+                     mo, control_dependency, require_atomic_access,
+                     unaligned, mismatched);
   }
   // This is the base version which is given an alias index.
   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx,
                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
-                  bool require_atomic_access = false);
+                  bool require_atomic_access = false, bool unaligned = false,
+                  bool mismatched = false);
 
   // Create & transform a StoreNode and store the effect into the
   // parser's memory state.
@@ -542,19 +547,24 @@
   Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt,
                         const TypePtr* adr_type,
                         MemNode::MemOrd mo,
-                        bool require_atomic_access = false) {
+                        bool require_atomic_access = false,
+                        bool unaligned = false,
+                        bool mismatched = false) {
     // This version computes alias_index from an address type
     assert(adr_type != NULL, "use other store_to_memory factory");
     return store_to_memory(ctl, adr, val, bt,
                            C->get_alias_index(adr_type),
-                           mo, require_atomic_access);
+                           mo, require_atomic_access,
+                           unaligned, mismatched);
   }
   // This is the base version which is given alias index
   // Return the new StoreXNode
   Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt,
                         int adr_idx,
                         MemNode::MemOrd,
-                        bool require_atomic_access = false);
+                        bool require_atomic_access = false,
+                        bool unaligned = false,
+                        bool mismatched = false);
 
 
   // All in one pre-barrier, store, post_barrier
@@ -577,7 +587,8 @@
                   const TypeOopPtr* val_type,
                   BasicType bt,
                   bool use_precise,
-                  MemNode::MemOrd mo);
+                  MemNode::MemOrd mo,
+                  bool mismatched = false);
 
   Node* store_oop_to_object(Node* ctl,
                             Node* obj,   // containing obj
@@ -608,7 +619,8 @@
                              const TypePtr* adr_type,
                              Node* val,
                              BasicType bt,
-                             MemNode::MemOrd mo);
+                             MemNode::MemOrd mo,
+                             bool mismatched = false);
 
   // For the few case where the barriers need special help
   void pre_barrier(bool do_load, Node* ctl,

--- a/hotspot/src/share/vm/opto/idealKit.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/idealKit.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -368,7 +368,8 @@
 
 Node* IdealKit::store(Node* ctl, Node* adr, Node *val, BasicType bt,
                       int adr_idx,
-                      MemNode::MemOrd mo, bool require_atomic_access) {
+                      MemNode::MemOrd mo, bool require_atomic_access,
+                      bool mismatched) {
   assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory");
   const TypePtr* adr_type = NULL;
   debug_only(adr_type = C->get_adr_type(adr_idx));
@@ -379,6 +380,9 @@
   } else {
     st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo);
   }
+  if (mismatched) {
+    st->as_Store()->set_mismatched_access();
+  }
   st = transform(st);
   set_memory(st, adr_idx);

--- a/hotspot/src/share/vm/opto/idealKit.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/idealKit.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -229,7 +229,8 @@
               BasicType bt,
               int adr_idx,
               MemNode::MemOrd mo,
-              bool require_atomic_access = false);
+              bool require_atomic_access = false,
+              bool mismatched = false);
 
   // Store a card mark ordered after store_oop
   Node* storeCM(Node* ctl,

--- a/hotspot/src/share/vm/opto/ifnode.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/ifnode.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "ci/ciTypeFlow.hpp"
 #include "memory/allocation.inline.hpp"
 #include "opto/addnode.hpp"
 #include "opto/castnode.hpp"
@@ -305,12 +306,16 @@
   Node *b_c = phase->transform(new BoolNode(cmp_c,b->_test._test));
   Node *b_x = phase->transform(new BoolNode(cmp_x,b->_test._test));
   // Make the IfNode
-  IfNode *iff_c = new IfNode(region_c,b_c,iff->_prob,iff->_fcnt);
+  IfNode* iff_c = iff->clone()->as_If();
+  iff_c->set_req(0, region_c);
+  iff_c->set_req(1, b_c);
   igvn->set_type_bottom(iff_c);
   igvn->_worklist.push(iff_c);
   hook->init_req(2, iff_c);
 
-  IfNode *iff_x = new IfNode(region_x,b_x,iff->_prob, iff->_fcnt);
+  IfNode* iff_x = iff->clone()->as_If();
+  iff_x->set_req(0, region_x);
+  iff_x->set_req(1, b_x);
   igvn->set_type_bottom(iff_x);
   igvn->_worklist.push(iff_x);
   hook->init_req(3, iff_x);
@@ -480,7 +485,7 @@
     return NULL;
   }
   if (l->is_top())  return NULL;   // Top input means dead test
-  if (r->Opcode() != Op_LoadRange)  return NULL;
+  if (r->Opcode() != Op_LoadRange && !is_RangeCheck())  return NULL;
 
   // We have recognized one of these forms:
   //  Flip 1:  If (Bool[<] CmpU(l, LoadRange)) ...
@@ -495,7 +500,7 @@
 // Return 0 if not a range check.  Return 1 if a range check and set index and
 // offset.  Return 2 if we had to negate the test.  Index is NULL if the check
 // is versus a constant.
-int IfNode::is_range_check(Node* &range, Node* &index, jint &offset) {
+int RangeCheckNode::is_range_check(Node* &range, Node* &index, jint &offset) {
   int flip_test = 0;
   Node* l = NULL;
   Node* r = NULL;
@@ -520,9 +525,9 @@
     return 0;
   } else if (l->Opcode() == Op_AddI) {
     if ((off = l->in(1)->find_int_con(0)) != 0) {
-      ind = l->in(2);
+      ind = l->in(2)->uncast();
     } else if ((off = l->in(2)->find_int_con(0)) != 0) {
-      ind = l->in(1);
+      ind = l->in(1)->uncast();
     }
   } else if ((off = l->find_int_con(-1)) >= 0) {
     // constant offset with no variable index
@@ -723,7 +728,7 @@
   return ctrl != NULL &&
     ctrl->is_Proj() &&
     ctrl->in(0) != NULL &&
-    ctrl->in(0)->is_If() &&
+    ctrl->in(0)->Opcode() == Op_If &&
     ctrl->in(0)->outcnt() == 2 &&
     ctrl->in(0)->as_If()->cmpi_folds(igvn) &&
     // Must compare same value
@@ -771,6 +776,11 @@
   CallStaticJavaNode* dom_unc = otherproj->is_uncommon_trap_proj(Deoptimization::Reason_none);
 
   if (otherproj->outcnt() == 1 && dom_unc != NULL) {
+    // We need to re-execute the folded Ifs after deoptimization from the merged traps
+    if (!dom_unc->jvms()->should_reexecute()) {
+      return false;
+    }
+
     CallStaticJavaNode* unc = NULL;
     ProjNode* unc_proj = uncommon_trap_proj(unc);
     if (unc_proj != NULL && unc_proj->outcnt() == 1) {
@@ -784,12 +794,41 @@
       } else if (dom_unc->in(0) != otherproj || unc->in(0) != unc_proj) {
         return false;
       }
+
+      // Different methods and methods containing jsrs are not supported.
+      ciMethod* method = unc->jvms()->method();
+      ciMethod* dom_method = dom_unc->jvms()->method();
+      if (method != dom_method || method->has_jsrs()) {
+        return false;
+      }
+      // Check that both traps are in the same activation of the method (instead
+      // of two activations being inlined through different call sites) by verifying
+      // that the call stacks are equal for both JVMStates.
+      JVMState* dom_caller = dom_unc->jvms()->caller();
+      JVMState* caller = unc->jvms()->caller();
+      if ((dom_caller == NULL) != (caller == NULL)) {
+        // The current method must either be inlined into both dom_caller and
+        // caller or must not be inlined at all (top method). Bail out otherwise.
+        return false;
+      } else if (dom_caller != NULL && !dom_caller->same_calls_as(caller)) {
+        return false;
+      }
+      // Check that the bci of the dominating uncommon trap dominates the bci
+      // of the dominated uncommon trap. Otherwise we may not re-execute
+      // the dominated check after deoptimization from the merged uncommon trap.
+      ciTypeFlow* flow = dom_method->get_flow_analysis();
+      int bci = unc->jvms()->bci();
+      int dom_bci = dom_unc->jvms()->bci();
+      if (!flow->is_dominated_by(bci, dom_bci)) {
+        return false;
+      }
+
       // See merge_uncommon_traps: the reason of the uncommon trap
       // will be changed and the state of the dominating If will be
       // used. Checked that we didn't apply this transformation in a
       // previous compilation and it didn't cause too many traps
-      if (!igvn->C->too_many_traps(dom_unc->jvms()->method(), dom_unc->jvms()->bci(), Deoptimization::Reason_unstable_fused_if) &&
-          !igvn->C->too_many_traps(dom_unc->jvms()->method(), dom_unc->jvms()->bci(), Deoptimization::Reason_range_check)) {
+      if (!igvn->C->too_many_traps(dom_method, dom_bci, Deoptimization::Reason_unstable_fused_if) &&
+          !igvn->C->too_many_traps(dom_method, dom_bci, Deoptimization::Reason_range_check)) {
         success = unc_proj;
         fail = unc_proj->other_if_proj();
         return true;
@@ -941,8 +980,8 @@
         if (failtype->_lo > failtype->_hi) {
           // previous if determines the result of this if so
           // replace Bool with constant
-          igvn->hash_delete(this);
-          set_req(1, igvn->intcon(success->_con));
+          igvn->_worklist.push(in(1));
+          igvn->replace_input_of(this, 1, igvn->intcon(success->_con));
           return true;
         }
       }
@@ -961,7 +1000,8 @@
     Node* newbool = igvn->transform(new BoolNode(newcmp, cond));
 
     igvn->replace_input_of(dom_iff, 1, igvn->intcon(proj->_con));
-    set_req(1, newbool);
+    igvn->_worklist.push(in(1));
+    igvn->replace_input_of(this, 1, newbool);
 
     return true;
   }
@@ -971,7 +1011,10 @@
 // Merge the branches that trap for this If and the dominating If into
 // a single region that branches to the uncommon trap for the
 // dominating If
-void IfNode::merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn) {
+Node* IfNode::merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn) {
+  Node* res = this;
+  assert(success->in(0) == this, "bad projection");
+
   ProjNode* otherproj = proj->other_if_proj();
 
   CallStaticJavaNode* unc = success->is_uncommon_trap_proj(Deoptimization::Reason_none);
@@ -1007,6 +1050,8 @@
     trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_range_check, action);
 
     improve_address_types(l, r, fail, igvn);
+
+    res = igvn->transform(new RangeCheckNode(in(0), in(1), _prob, _fcnt));
   } else if (unc != dom_unc) {
     // If we trap we won't know what CmpI would have caused the trap
     // so use a special trap reason to mark this pair of CmpI nodes as
@@ -1016,6 +1061,7 @@
     trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_unstable_fused_if, action);
   }
   igvn->replace_input_of(dom_unc, TypeFunc::Parms, igvn->intcon(trap_request));
+  return res;
 }
 
 // If we are turning 2 CmpI nodes into a CmpU that follows the pattern
@@ -1209,8 +1255,7 @@
       if (has_only_uncommon_traps(dom_cmp, success, fail, igvn) &&
           // Next call modifies graph so must be last
           fold_compares_helper(dom_cmp, success, fail, igvn)) {
-        merge_uncommon_traps(dom_cmp, success, fail, igvn);
-        return this;
+        return merge_uncommon_traps(dom_cmp, success, fail, igvn);
       }
       return NULL;
     } else if (ctrl->in(0) != NULL &&
@@ -1229,8 +1274,7 @@
           // Next call modifies graph so must be last
           fold_compares_helper(dom_cmp, success, fail, igvn)) {
         reroute_side_effect_free_unc(other_cmp, dom_cmp, igvn);
-        merge_uncommon_traps(dom_cmp, success, fail, igvn);
-        return this;
+        return merge_uncommon_traps(dom_cmp, success, fail, igvn);
       }
     }
   }
@@ -1311,14 +1355,10 @@
   jint off;
 };
 
-//------------------------------Ideal------------------------------------------
-// Return a node which is more "ideal" than the current node.  Strip out
-// control copies
-Node *IfNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+Node* IfNode::Ideal_common(PhaseGVN *phase, bool can_reshape) {
   if (remove_dead_region(phase, can_reshape))  return this;
   // No Def-Use info?
   if (!can_reshape)  return NULL;
-  PhaseIterGVN *igvn = phase->is_IterGVN();
 
   // Don't bother trying to transform a dead if
   if (in(0)->is_top())  return NULL;
@@ -1334,24 +1374,291 @@
   if (idt_if != NULL)  return idt_if;
 
   // Try to split the IF
+  PhaseIterGVN *igvn = phase->is_IterGVN();
   Node *s = split_if(this, igvn);
   if (s != NULL)  return s;
 
+  return NodeSentinel;
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Strip out
+// control copies
+Node* IfNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* res = Ideal_common(phase, can_reshape);
+  if (res != NodeSentinel) {
+    return res;
+  }
+
   // Check for people making a useless boolean: things like
   // if( (x < y ? true : false) ) { ... }
   // Replace with if( x < y ) { ... }
   Node *bol2 = remove_useless_bool(this, phase);
   if( bol2 ) return bol2;
 
+  if (in(0) == NULL) return NULL;     // Dead loop?
+
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  Node* result = fold_compares(igvn);
+  if (result != NULL) {
+    return result;
+  }
+
+  // Scan for an equivalent test
+  Node *cmp;
+  int dist = 0;               // Cutoff limit for search
+  int op = Opcode();
+  if( op == Op_If &&
+      (cmp=in(1)->in(1))->Opcode() == Op_CmpP ) {
+    if( cmp->in(2) != NULL && // make sure cmp is not already dead
+        cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) {
+      dist = 64;              // Limit for null-pointer scans
+    } else {
+      dist = 4;               // Do not bother for random pointer tests
+    }
+  } else {
+    dist = 4;                 // Limit for random junky scans
+  }
+
+  Node* prev_dom = search_identical(dist);
+
+  if (prev_dom == NULL) {
+    return NULL;
+  }
+
+  // Replace dominated IfNode
+  return dominated_by(prev_dom, igvn);
+}
+
+//------------------------------dominated_by-----------------------------------
+Node* IfNode::dominated_by(Node* prev_dom, PhaseIterGVN *igvn) {
+#ifndef PRODUCT
+  if (TraceIterativeGVN) {
+    tty->print("   Removing IfNode: "); this->dump();
+  }
+  if (VerifyOpto && !igvn->allow_progress()) {
+    // Found an equivalent dominating test,
+    // we can not guarantee reaching a fix-point for these during iterativeGVN
+    // since intervening nodes may not change.
+    return NULL;
+  }
+#endif
+
+  igvn->hash_delete(this);      // Remove self to prevent spurious V-N
+  Node *idom = in(0);
+  // Need opcode to decide which way 'this' test goes
+  int prev_op = prev_dom->Opcode();
+  Node *top = igvn->C->top(); // Shortcut to top
+
+  // Loop predicates may have depending checks which should not
+  // be skipped. For example, range check predicate has two checks
+  // for lower and upper bounds.
+  ProjNode* unc_proj = proj_out(1 - prev_dom->as_Proj()->_con)->as_Proj();
+  if (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL)
+   prev_dom = idom;
+
+  // Now walk the current IfNode's projections.
+  // Loop ends when 'this' has no more uses.
+  for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) {
+    Node *ifp = last_out(i);     // Get IfTrue/IfFalse
+    igvn->add_users_to_worklist(ifp);
+    // Check which projection it is and set target.
+    // Data-target is either the dominating projection of the same type
+    // or TOP if the dominating projection is of opposite type.
+    // Data-target will be used as the new control edge for the non-CFG
+    // nodes like Casts and Loads.
+    Node *data_target = (ifp->Opcode() == prev_op) ? prev_dom : top;
+    // Control-target is just the If's immediate dominator or TOP.
+    Node *ctrl_target = (ifp->Opcode() == prev_op) ?     idom : top;
+
+    // For each child of an IfTrue/IfFalse projection, reroute.
+    // Loop ends when projection has no more uses.
+    for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) {
+      Node* s = ifp->last_out(j);   // Get child of IfTrue/IfFalse
+      if( !s->depends_only_on_test() ) {
+        // Find the control input matching this def-use edge.
+        // For Regions it may not be in slot 0.
+        uint l;
+        for( l = 0; s->in(l) != ifp; l++ ) { }
+        igvn->replace_input_of(s, l, ctrl_target);
+      } else {                      // Else, for control producers,
+        igvn->replace_input_of(s, 0, data_target); // Move child to data-target
+      }
+    } // End for each child of a projection
+
+    igvn->remove_dead_node(ifp);
+  } // End for each IfTrue/IfFalse child of If
+
+  // Kill the IfNode
+  igvn->remove_dead_node(this);
+
+  // Must return either the original node (now dead) or a new node
+  // (Do not return a top here, since that would break the uniqueness of top.)
+  return new ConINode(TypeInt::ZERO);
+}
+
+Node* IfNode::search_identical(int dist) {
   // Setup to scan up the CFG looking for a dominating test
-  Node *dom = in(0);
-  Node *prev_dom = this;
+  Node* dom = in(0);
+  Node* prev_dom = this;
+  int op = Opcode();
+  // Search up the dominator tree for an If with an identical test
+  while( dom->Opcode() != op    ||  // Not same opcode?
+         dom->in(1)    != in(1) ||  // Not same input 1?
+         (req() == 3 && dom->in(2) != in(2)) || // Not same input 2?
+         prev_dom->in(0) != dom ) { // One path of test does not dominate?
+    if( dist < 0 ) return NULL;
+
+    dist--;
+    prev_dom = dom;
+    dom = up_one_dom( dom );
+    if( !dom ) return NULL;
+  }
+
+  // Check that we did not follow a loop back to ourselves
+  if( this == dom )
+    return NULL;
+
+  if( dist > 2 )              // Add to count of NULL checks elided
+    explicit_null_checks_elided++;
+
+  return prev_dom;
+}
+
+//------------------------------Identity---------------------------------------
+// If the test is constant & we match, then we are the input Control
+Node *IfProjNode::Identity(PhaseTransform *phase) {
+  // Can only optimize if cannot go the other way
+  const TypeTuple *t = phase->type(in(0))->is_tuple();
+  if (t == TypeTuple::IFNEITHER ||
+      // kill dead branch first otherwise the IfNode's control will
+      // have 2 control uses (the IfNode that doesn't go away because
+      // it still has uses and this branch of the
+      // If). Node::has_special_unique_user() will cause this node to
+      // be reprocessed once the dead branch is killed.
+      (always_taken(t) && in(0)->outcnt() == 1)) {
+    // IfNode control
+    return in(0)->in(0);
+  }
+  // no progress
+  return this;
+}
+
+#ifndef PRODUCT
+//-------------------------------related---------------------------------------
+// An IfProjNode's related node set consists of its input (an IfNode) including
+// the IfNode's condition, plus all of its outputs at level 1. In compact mode,
+// the restrictions for IfNode apply (see IfNode::rel).
+void IfProjNode::related(GrowableArray<Node*> *in_rel, GrowableArray<Node*> *out_rel, bool compact) const {
+  Node* ifNode = this->in(0);
+  in_rel->append(ifNode);
+  if (compact) {
+    ifNode->collect_nodes(in_rel, 3, false, true);
+  } else {
+    ifNode->collect_nodes_in_all_data(in_rel, false);
+  }
+  this->collect_nodes(out_rel, -1, false, false);
+}
+
+//------------------------------dump_spec--------------------------------------
+void IfNode::dump_spec(outputStream *st) const {
+  st->print("P=%f, C=%f",_prob,_fcnt);
+}
+
+//-------------------------------related---------------------------------------
+// For an IfNode, the set of related output nodes is just the output nodes till
+// depth 2, i.e, the IfTrue/IfFalse projection nodes plus the nodes they refer.
+// The related input nodes contain no control nodes, but all data nodes
+// pertaining to the condition. In compact mode, the input nodes are collected
+// up to a depth of 3.
+void IfNode::related(GrowableArray <Node *> *in_rel, GrowableArray <Node *> *out_rel, bool compact) const {
+  if (compact) {
+    this->collect_nodes(in_rel, 3, false, true);
+  } else {
+    this->collect_nodes_in_all_data(in_rel, false);
+  }
+  this->collect_nodes(out_rel, -2, false, false);
+}
+#endif
+
+//------------------------------idealize_test----------------------------------
+// Try to canonicalize tests better.  Peek at the Cmp/Bool/If sequence and
+// come up with a canonical sequence.  Bools getting 'eq', 'gt' and 'ge' forms
+// converted to 'ne', 'le' and 'lt' forms.  IfTrue/IfFalse get swapped as
+// needed.
+static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff) {
+  assert(iff->in(0) != NULL, "If must be live");
+
+  if (iff->outcnt() != 2)  return NULL; // Malformed projections.
+  Node* old_if_f = iff->proj_out(false);
+  Node* old_if_t = iff->proj_out(true);
+
+  // CountedLoopEnds want the back-control test to be TRUE, irregardless of
+  // whether they are testing a 'gt' or 'lt' condition.  The 'gt' condition
+  // happens in count-down loops
+  if (iff->is_CountedLoopEnd())  return NULL;
+  if (!iff->in(1)->is_Bool())  return NULL; // Happens for partially optimized IF tests
+  BoolNode *b = iff->in(1)->as_Bool();
+  BoolTest bt = b->_test;
+  // Test already in good order?
+  if( bt.is_canonical() )
+    return NULL;
+
+  // Flip test to be canonical.  Requires flipping the IfFalse/IfTrue and
+  // cloning the IfNode.
+  Node* new_b = phase->transform( new BoolNode(b->in(1), bt.negate()) );
+  if( !new_b->is_Bool() ) return NULL;
+  b = new_b->as_Bool();
+
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  assert( igvn, "Test is not canonical in parser?" );
+
+  // The IF node never really changes, but it needs to be cloned
+  iff = iff->clone()->as_If();
+  iff->set_req(1, b);
+  iff->_prob = 1.0-iff->_prob;
+
+  Node *prior = igvn->hash_find_insert(iff);
+  if( prior ) {
+    igvn->remove_dead_node(iff);
+    iff = (IfNode*)prior;
+  } else {
+    // Cannot call transform on it just yet
+    igvn->set_type_bottom(iff);
+  }
+  igvn->_worklist.push(iff);
+
+  // Now handle projections.  Cloning not required.
+  Node* new_if_f = (Node*)(new IfFalseNode( iff ));
+  Node* new_if_t = (Node*)(new IfTrueNode ( iff ));
+
+  igvn->register_new_node_with_optimizer(new_if_f);
+  igvn->register_new_node_with_optimizer(new_if_t);
+  // Flip test, so flip trailing control
+  igvn->replace_node(old_if_f, new_if_t);
+  igvn->replace_node(old_if_t, new_if_f);
+
+  // Progress
+  return iff;
+}
+
+Node* RangeCheckNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* res = Ideal_common(phase, can_reshape);
+  if (res != NodeSentinel) {
+    return res;
+  }
+
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  // Setup to scan up the CFG looking for a dominating test
+  Node* prev_dom = this;
 
   // Check for range-check vs other kinds of tests
-  Node *index1, *range1;
+  Node* index1;
+  Node* range1;
   jint offset1;
   int flip1 = is_range_check(range1, index1, offset1);
-  if( flip1 ) {
+  if (flip1) {
+    Node* dom = in(0);
     // Try to remove extra range checks.  All 'up_one_dom' gives up at merges
     // so all checks we inspect post-dominate the top-most check we find.
     // If we are going to fail the current check and we reach the top check
@@ -1372,13 +1679,14 @@
 
     // Scan for the top checks and collect range of offsets
     for (int dist = 0; dist < 999; dist++) { // Range-Check scan limit
-      if (dom->Opcode() == Op_If &&  // Not same opcode?
+      if (dom->Opcode() == Op_RangeCheck &&  // Not same opcode?
           prev_dom->in(0) == dom) { // One path of test does dominate?
         if (dom == this) return NULL; // dead loop
         // See if this is a range check
-        Node *index2, *range2;
+        Node* index2;
+        Node* range2;
         jint offset2;
-        int flip2 = dom->as_If()->is_range_check(range2, index2, offset2);
+        int flip2 = dom->as_RangeCheck()->is_range_check(range2, index2, offset2);
         // See if this is a _matching_ range check, checking against
         // the same array bounds.
         if (flip2 == flip1 && range2 == range1 && index2 == index1 &&
@@ -1486,237 +1794,14 @@
         prev_dom = rc0.ctl;
       }
     }
-
-  } else {                      // Scan for an equivalent test
-
-    Node *cmp;
-    int dist = 0;               // Cutoff limit for search
-    int op = Opcode();
-    if( op == Op_If &&
-        (cmp=in(1)->in(1))->Opcode() == Op_CmpP ) {
-      if( cmp->in(2) != NULL && // make sure cmp is not already dead
-          cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) {
-        dist = 64;              // Limit for null-pointer scans
-      } else {
-        dist = 4;               // Do not bother for random pointer tests
-      }
-    } else {
-      dist = 4;                 // Limit for random junky scans
-    }
-
-    // Normal equivalent-test check.
-    if( !dom ) return NULL;     // Dead loop?
-
-    Node* result = fold_compares(igvn);
-    if (result != NULL) {
-      return result;
-    }
+  } else {
+    prev_dom = search_identical(4);
 
-    // Search up the dominator tree for an If with an identical test
-    while( dom->Opcode() != op    ||  // Not same opcode?
-           dom->in(1)    != in(1) ||  // Not same input 1?
-           (req() == 3 && dom->in(2) != in(2)) || // Not same input 2?
-           prev_dom->in(0) != dom ) { // One path of test does not dominate?
-      if( dist < 0 ) return NULL;
-
-      dist--;
-      prev_dom = dom;
-      dom = up_one_dom( dom );
-      if( !dom ) return NULL;
-    }
-
-    // Check that we did not follow a loop back to ourselves
-    if( this == dom )
+    if (prev_dom == NULL) {
       return NULL;
-
-    if( dist > 2 )              // Add to count of NULL checks elided
-      explicit_null_checks_elided++;
-
-  } // End of Else scan for an equivalent test
-
-  // Hit!  Remove this IF
-#ifndef PRODUCT
-  if( TraceIterativeGVN ) {
-    tty->print("   Removing IfNode: "); this->dump();
+    }
   }
-  if( VerifyOpto && !phase->allow_progress() ) {
-    // Found an equivalent dominating test,
-    // we can not guarantee reaching a fix-point for these during iterativeGVN
-    // since intervening nodes may not change.
-    return NULL;
-  }
-#endif
 
   // Replace dominated IfNode
-  dominated_by( prev_dom, igvn );
-
-  // Must return either the original node (now dead) or a new node
-  // (Do not return a top here, since that would break the uniqueness of top.)
-  return new ConINode(TypeInt::ZERO);
-}
-
-//------------------------------dominated_by-----------------------------------
-void IfNode::dominated_by( Node *prev_dom, PhaseIterGVN *igvn ) {
-  igvn->hash_delete(this);      // Remove self to prevent spurious V-N
-  Node *idom = in(0);
-  // Need opcode to decide which way 'this' test goes
-  int prev_op = prev_dom->Opcode();
-  Node *top = igvn->C->top(); // Shortcut to top
-
-  // Loop predicates may have depending checks which should not
-  // be skipped. For example, range check predicate has two checks
-  // for lower and upper bounds.
-  ProjNode* unc_proj = proj_out(1 - prev_dom->as_Proj()->_con)->as_Proj();
-  if (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL)
-   prev_dom = idom;
-
-  // Now walk the current IfNode's projections.
-  // Loop ends when 'this' has no more uses.
-  for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) {
-    Node *ifp = last_out(i);     // Get IfTrue/IfFalse
-    igvn->add_users_to_worklist(ifp);
-    // Check which projection it is and set target.
-    // Data-target is either the dominating projection of the same type
-    // or TOP if the dominating projection is of opposite type.
-    // Data-target will be used as the new control edge for the non-CFG
-    // nodes like Casts and Loads.
-    Node *data_target = (ifp->Opcode() == prev_op) ? prev_dom : top;
-    // Control-target is just the If's immediate dominator or TOP.
-    Node *ctrl_target = (ifp->Opcode() == prev_op) ?     idom : top;
-
-    // For each child of an IfTrue/IfFalse projection, reroute.
-    // Loop ends when projection has no more uses.
-    for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) {
-      Node* s = ifp->last_out(j);   // Get child of IfTrue/IfFalse
-      if( !s->depends_only_on_test() ) {
-        // Find the control input matching this def-use edge.
-        // For Regions it may not be in slot 0.
-        uint l;
-        for( l = 0; s->in(l) != ifp; l++ ) { }
-        igvn->replace_input_of(s, l, ctrl_target);
-      } else {                      // Else, for control producers,
-        igvn->replace_input_of(s, 0, data_target); // Move child to data-target
-      }
-    } // End for each child of a projection
-
-    igvn->remove_dead_node(ifp);
-  } // End for each IfTrue/IfFalse child of If
-
-  // Kill the IfNode
-  igvn->remove_dead_node(this);
-}
-
-//------------------------------Identity---------------------------------------
-// If the test is constant & we match, then we are the input Control
-Node *IfProjNode::Identity(PhaseTransform *phase) {
-  // Can only optimize if cannot go the other way
-  const TypeTuple *t = phase->type(in(0))->is_tuple();
-  if (t == TypeTuple::IFNEITHER ||
-      // kill dead branch first otherwise the IfNode's control will
-      // have 2 control uses (the IfNode that doesn't go away because
-      // it still has uses and this branch of the
-      // If). Node::has_special_unique_user() will cause this node to
-      // be reprocessed once the dead branch is killed.
-      (always_taken(t) && in(0)->outcnt() == 1)) {
-    // IfNode control
-    return in(0)->in(0);
-  }
-  // no progress
-  return this;
+  return dominated_by(prev_dom, igvn);
 }
-
-#ifndef PRODUCT
-//-------------------------------related---------------------------------------
-// An IfProjNode's related node set consists of its input (an IfNode) including
-// the IfNode's condition, plus all of its outputs at level 1. In compact mode,
-// the restrictions for IfNode apply (see IfNode::rel).
-void IfProjNode::related(GrowableArray<Node*> *in_rel, GrowableArray<Node*> *out_rel, bool compact) const {
-  Node* ifNode = this->in(0);
-  in_rel->append(ifNode);
-  if (compact) {
-    ifNode->collect_nodes(in_rel, 3, false, true);
-  } else {
-    ifNode->collect_nodes_in_all_data(in_rel, false);
-  }
-  this->collect_nodes(out_rel, -1, false, false);
-}
-
-//------------------------------dump_spec--------------------------------------
-void IfNode::dump_spec(outputStream *st) const {
-  st->print("P=%f, C=%f",_prob,_fcnt);
-}
-
-//-------------------------------related---------------------------------------
-// For an IfNode, the set of related output nodes is just the output nodes till
-// depth 2, i.e, the IfTrue/IfFalse projection nodes plus the nodes they refer.
-// The related input nodes contain no control nodes, but all data nodes
-// pertaining to the condition. In compact mode, the input nodes are collected
-// up to a depth of 3.
-void IfNode::related(GrowableArray <Node *> *in_rel, GrowableArray <Node *> *out_rel, bool compact) const {
-  if (compact) {
-    this->collect_nodes(in_rel, 3, false, true);
-  } else {
-    this->collect_nodes_in_all_data(in_rel, false);
-  }
-  this->collect_nodes(out_rel, -2, false, false);
-}
-#endif
-
-//------------------------------idealize_test----------------------------------
-// Try to canonicalize tests better.  Peek at the Cmp/Bool/If sequence and
-// come up with a canonical sequence.  Bools getting 'eq', 'gt' and 'ge' forms
-// converted to 'ne', 'le' and 'lt' forms.  IfTrue/IfFalse get swapped as
-// needed.
-static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff) {
-  assert(iff->in(0) != NULL, "If must be live");
-
-  if (iff->outcnt() != 2)  return NULL; // Malformed projections.
-  Node* old_if_f = iff->proj_out(false);
-  Node* old_if_t = iff->proj_out(true);
-
-  // CountedLoopEnds want the back-control test to be TRUE, irregardless of
-  // whether they are testing a 'gt' or 'lt' condition.  The 'gt' condition
-  // happens in count-down loops
-  if (iff->is_CountedLoopEnd())  return NULL;
-  if (!iff->in(1)->is_Bool())  return NULL; // Happens for partially optimized IF tests
-  BoolNode *b = iff->in(1)->as_Bool();
-  BoolTest bt = b->_test;
-  // Test already in good order?
-  if( bt.is_canonical() )
-    return NULL;
-
-  // Flip test to be canonical.  Requires flipping the IfFalse/IfTrue and
-  // cloning the IfNode.
-  Node* new_b = phase->transform( new BoolNode(b->in(1), bt.negate()) );
-  if( !new_b->is_Bool() ) return NULL;
-  b = new_b->as_Bool();
-
-  PhaseIterGVN *igvn = phase->is_IterGVN();
-  assert( igvn, "Test is not canonical in parser?" );
-
-  // The IF node never really changes, but it needs to be cloned
-  iff = new IfNode( iff->in(0), b, 1.0-iff->_prob, iff->_fcnt);
-
-  Node *prior = igvn->hash_find_insert(iff);
-  if( prior ) {
-    igvn->remove_dead_node(iff);
-    iff = (IfNode*)prior;
-  } else {
-    // Cannot call transform on it just yet
-    igvn->set_type_bottom(iff);
-  }
-  igvn->_worklist.push(iff);
-
-  // Now handle projections.  Cloning not required.
-  Node* new_if_f = (Node*)(new IfFalseNode( iff ));
-  Node* new_if_t = (Node*)(new IfTrueNode ( iff ));
-
-  igvn->register_new_node_with_optimizer(new_if_f);
-  igvn->register_new_node_with_optimizer(new_if_t);
-  // Flip test, so flip trailing control
-  igvn->replace_node(old_if_f, new_if_t);
-  igvn->replace_node(old_if_t, new_if_f);
-
-  // Progress
-  return iff;
-}

--- a/hotspot/src/share/vm/opto/lcm.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/lcm.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -498,9 +498,13 @@
       continue;
 
     // Schedule IV increment last.
-    if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd &&
-        e->in(1)->in(1) == n && n->is_iteratively_computed())
-      continue;
+    if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd) {
+      // Cmp might be matched into CountedLoopEnd node.
+      Node *cmp = (e->in(1)->ideal_reg() == Op_RegFlags) ? e->in(1) : e;
+      if (cmp->req() > 1 && cmp->in(1) == n && n->is_iteratively_computed()) {
+        continue;
+      }
+    }
 
     uint n_choice  = 2;

--- a/hotspot/src/share/vm/opto/library_call.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -238,7 +238,7 @@
   // Generates the guards that check whether the result of
   // Unsafe.getObject should be recorded in an SATB log buffer.
   void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);
-  bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
+  bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool is_unaligned);
   static bool klass_needs_init_guard(Node* kls);
   bool inline_unsafe_allocate();
   bool inline_unsafe_copyMemory();
@@ -256,6 +256,7 @@
   bool inline_native_getLength();
   bool inline_array_copyOf(bool is_copyOfRange);
   bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
+  bool inline_objects_checkIndex();
   void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark);
   bool inline_native_clone(bool is_virtual);
   bool inline_native_Reflection_getCallerClass();
@@ -544,72 +545,72 @@
   case vmIntrinsics::_inflateStringC:
   case vmIntrinsics::_inflateStringB:           return inline_string_copy(!is_compress);
 
-  case vmIntrinsics::_getObject:                return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,  !is_volatile);
-  case vmIntrinsics::_getBoolean:               return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile);
-  case vmIntrinsics::_getByte:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,    !is_volatile);
-  case vmIntrinsics::_getShort:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_getChar:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_getInt:                   return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_getLong:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile);
-  case vmIntrinsics::_getFloat:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,   !is_volatile);
-  case vmIntrinsics::_getDouble:                return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,  !is_volatile);
-  case vmIntrinsics::_putObject:                return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,  !is_volatile);
-  case vmIntrinsics::_putBoolean:               return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN, !is_volatile);
-  case vmIntrinsics::_putByte:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,    !is_volatile);
-  case vmIntrinsics::_putShort:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_putChar:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_putInt:                   return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_putLong:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile);
-  case vmIntrinsics::_putFloat:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,   !is_volatile);
-  case vmIntrinsics::_putDouble:                return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,  !is_volatile);
-
-  case vmIntrinsics::_getByte_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE,    !is_volatile);
-  case vmIntrinsics::_getShort_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_getChar_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_getInt_raw:               return inline_unsafe_access( is_native_ptr, !is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_getLong_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_LONG,    !is_volatile);
-  case vmIntrinsics::_getFloat_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT,   !is_volatile);
-  case vmIntrinsics::_getDouble_raw:            return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE,  !is_volatile);
-  case vmIntrinsics::_getAddress_raw:           return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile);
-
-  case vmIntrinsics::_putByte_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_BYTE,    !is_volatile);
-  case vmIntrinsics::_putShort_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_putChar_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_putInt_raw:               return inline_unsafe_access( is_native_ptr,  is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_putLong_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_LONG,    !is_volatile);
-  case vmIntrinsics::_putFloat_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_FLOAT,   !is_volatile);
-  case vmIntrinsics::_putDouble_raw:            return inline_unsafe_access( is_native_ptr,  is_store, T_DOUBLE,  !is_volatile);
-  case vmIntrinsics::_putAddress_raw:           return inline_unsafe_access( is_native_ptr,  is_store, T_ADDRESS, !is_volatile);
-
-  case vmIntrinsics::_getObjectVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   is_volatile);
-  case vmIntrinsics::_getBooleanVolatile:       return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  is_volatile);
-  case vmIntrinsics::_getByteVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     is_volatile);
-  case vmIntrinsics::_getShortVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    is_volatile);
-  case vmIntrinsics::_getCharVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     is_volatile);
-  case vmIntrinsics::_getIntVolatile:           return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      is_volatile);
-  case vmIntrinsics::_getLongVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     is_volatile);
-  case vmIntrinsics::_getFloatVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    is_volatile);
-  case vmIntrinsics::_getDoubleVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   is_volatile);
-
-  case vmIntrinsics::_putObjectVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   is_volatile);
-  case vmIntrinsics::_putBooleanVolatile:       return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  is_volatile);
-  case vmIntrinsics::_putByteVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     is_volatile);
-  case vmIntrinsics::_putShortVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    is_volatile);
-  case vmIntrinsics::_putCharVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     is_volatile);
-  case vmIntrinsics::_putIntVolatile:           return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      is_volatile);
-  case vmIntrinsics::_putLongVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     is_volatile);
-  case vmIntrinsics::_putFloatVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    is_volatile);
-  case vmIntrinsics::_putDoubleVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   is_volatile);
-
-  case vmIntrinsics::_getShortUnaligned:        return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_getCharUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_getIntUnaligned:          return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_getLongUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile);
-
-  case vmIntrinsics::_putShortUnaligned:        return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_putCharUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_putIntUnaligned:          return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_putLongUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile);
+  case vmIntrinsics::_getObject:                return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,  !is_volatile, false);
+  case vmIntrinsics::_getBoolean:               return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile, false);
+  case vmIntrinsics::_getByte:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,    !is_volatile, false);
+  case vmIntrinsics::_getShort:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile, false);
+  case vmIntrinsics::_getChar:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile, false);
+  case vmIntrinsics::_getInt:                   return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile, false);
+  case vmIntrinsics::_getLong:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile, false);
+  case vmIntrinsics::_getFloat:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,   !is_volatile, false);
+  case vmIntrinsics::_getDouble:                return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,  !is_volatile, false);
+  case vmIntrinsics::_putObject:                return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,  !is_volatile, false);
+  case vmIntrinsics::_putBoolean:               return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN, !is_volatile, false);
+  case vmIntrinsics::_putByte:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,    !is_volatile, false);
+  case vmIntrinsics::_putShort:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile, false);
+  case vmIntrinsics::_putChar:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile, false);
+  case vmIntrinsics::_putInt:                   return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile, false);
+  case vmIntrinsics::_putLong:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile, false);
+  case vmIntrinsics::_putFloat:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,   !is_volatile, false);
+  case vmIntrinsics::_putDouble:                return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,  !is_volatile, false);
+
+  case vmIntrinsics::_getByte_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE,    !is_volatile, false);
+  case vmIntrinsics::_getShort_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT,   !is_volatile, false);
+  case vmIntrinsics::_getChar_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR,    !is_volatile, false);
+  case vmIntrinsics::_getInt_raw:               return inline_unsafe_access( is_native_ptr, !is_store, T_INT,     !is_volatile, false);
+  case vmIntrinsics::_getLong_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_LONG,    !is_volatile, false);
+  case vmIntrinsics::_getFloat_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT,   !is_volatile, false);
+  case vmIntrinsics::_getDouble_raw:            return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE,  !is_volatile, false);
+  case vmIntrinsics::_getAddress_raw:           return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile, false);
+
+  case vmIntrinsics::_putByte_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_BYTE,    !is_volatile, false);
+  case vmIntrinsics::_putShort_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_SHORT,   !is_volatile, false);
+  case vmIntrinsics::_putChar_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_CHAR,    !is_volatile, false);
+  case vmIntrinsics::_putInt_raw:               return inline_unsafe_access( is_native_ptr,  is_store, T_INT,     !is_volatile, false);
+  case vmIntrinsics::_putLong_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_LONG,    !is_volatile, false);
+  case vmIntrinsics::_putFloat_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_FLOAT,   !is_volatile, false);
+  case vmIntrinsics::_putDouble_raw:            return inline_unsafe_access( is_native_ptr,  is_store, T_DOUBLE,  !is_volatile, false);
+  case vmIntrinsics::_putAddress_raw:           return inline_unsafe_access( is_native_ptr,  is_store, T_ADDRESS, !is_volatile, false);
+
+  case vmIntrinsics::_getObjectVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   is_volatile, false);
+  case vmIntrinsics::_getBooleanVolatile:       return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  is_volatile, false);
+  case vmIntrinsics::_getByteVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     is_volatile, false);
+  case vmIntrinsics::_getShortVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    is_volatile, false);
+  case vmIntrinsics::_getCharVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     is_volatile, false);
+  case vmIntrinsics::_getIntVolatile:           return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      is_volatile, false);
+  case vmIntrinsics::_getLongVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     is_volatile, false);
+  case vmIntrinsics::_getFloatVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    is_volatile, false);
+  case vmIntrinsics::_getDoubleVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   is_volatile, false);
+
+  case vmIntrinsics::_putObjectVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   is_volatile, false);
+  case vmIntrinsics::_putBooleanVolatile:       return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  is_volatile, false);
+  case vmIntrinsics::_putByteVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     is_volatile, false);
+  case vmIntrinsics::_putShortVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    is_volatile, false);
+  case vmIntrinsics::_putCharVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     is_volatile, false);
+  case vmIntrinsics::_putIntVolatile:           return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      is_volatile, false);
+  case vmIntrinsics::_putLongVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     is_volatile, false);
+  case vmIntrinsics::_putFloatVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    is_volatile, false);
+  case vmIntrinsics::_putDoubleVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   is_volatile, false);
+
+  case vmIntrinsics::_getShortUnaligned:        return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile, true);
+  case vmIntrinsics::_getCharUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile, true);
+  case vmIntrinsics::_getIntUnaligned:          return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile, true);
+  case vmIntrinsics::_getLongUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile, true);
+
+  case vmIntrinsics::_putShortUnaligned:        return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile, true);
+  case vmIntrinsics::_putCharUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile, true);
+  case vmIntrinsics::_putIntUnaligned:          return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile, true);
+  case vmIntrinsics::_putLongUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile, true);
 
   case vmIntrinsics::_compareAndSwapObject:     return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg);
   case vmIntrinsics::_compareAndSwapInt:        return inline_unsafe_load_store(T_INT,    LS_cmpxchg);
@@ -647,6 +648,7 @@
   case vmIntrinsics::_copyOfRange:              return inline_array_copyOf(true);
   case vmIntrinsics::_equalsB:                  return inline_array_equals(StrIntrinsicNode::LL);
   case vmIntrinsics::_equalsC:                  return inline_array_equals(StrIntrinsicNode::UU);
+  case vmIntrinsics::_Objects_checkIndex:       return inline_objects_checkIndex();
   case vmIntrinsics::_clone:                    return inline_native_clone(intrinsic()->is_virtual());
 
   case vmIntrinsics::_isAssignableFrom:         return inline_native_subtype_check();
@@ -1045,6 +1047,54 @@
   return true;
 }
 
+bool LibraryCallKit::inline_objects_checkIndex() {
+  Node* index = argument(0);
+  Node* length = argument(1);
+  if (too_many_traps(Deoptimization::Reason_intrinsic) || too_many_traps(Deoptimization::Reason_range_check)) {
+    return false;
+  }
+
+  Node* len_pos_cmp = _gvn.transform(new CmpINode(length, intcon(0)));
+  Node* len_pos_bol = _gvn.transform(new BoolNode(len_pos_cmp, BoolTest::ge));
+
+  {
+    BuildCutout unless(this, len_pos_bol, PROB_MAX);
+    uncommon_trap(Deoptimization::Reason_intrinsic,
+                  Deoptimization::Action_make_not_entrant);
+  }
+
+  if (stopped()) {
+    return false;
+  }
+
+  Node* rc_cmp = _gvn.transform(new CmpUNode(index, length));
+  BoolTest::mask btest = BoolTest::lt;
+  Node* rc_bool = _gvn.transform(new BoolNode(rc_cmp, btest));
+  RangeCheckNode* rc = new RangeCheckNode(control(), rc_bool, PROB_MAX, COUNT_UNKNOWN);
+  _gvn.set_type(rc, rc->Value(&_gvn));
+  if (!rc_bool->is_Con()) {
+    record_for_igvn(rc);
+  }
+  set_control(_gvn.transform(new IfTrueNode(rc)));
+  {
+    PreserveJVMState pjvms(this);
+    set_control(_gvn.transform(new IfFalseNode(rc)));
+    uncommon_trap(Deoptimization::Reason_range_check,
+                  Deoptimization::Action_make_not_entrant);
+  }
+
+  if (stopped()) {
+    return false;
+  }
+
+  Node* result = new CastIINode(index, TypeInt::make(0, _gvn.type(length)->is_int()->_hi, Type::WidenMax));
+  result->set_req(0, control());
+  result = _gvn.transform(result);
+  set_result(result);
+  replace_in_map(index, result);
+  return true;
+}
+
 //------------------------------inline_string_indexOf------------------------
 bool LibraryCallKit::inline_string_indexOf(StrIntrinsicNode::ArgEnc ae) {
   if (!Matcher::has_match_rule(Op_StrIndexOf) || !UseSSE42Intrinsics) {
@@ -1453,9 +1503,11 @@
 
   Node* adr = array_element_address(value, index, T_CHAR);
   if (is_store) {
-    (void) store_to_memory(control(), adr, ch, T_CHAR, TypeAryPtr::BYTES, MemNode::unordered);
+    (void) store_to_memory(control(), adr, ch, T_CHAR, TypeAryPtr::BYTES, MemNode::unordered,
+                           false, false, true /* mismatched */);
   } else {
-    ch = make_load(control(), adr, TypeInt::CHAR, T_CHAR, MemNode::unordered);
+    ch = make_load(control(), adr, TypeInt::CHAR, T_CHAR, MemNode::unordered,
+                   LoadNode::DependsOnlyOnTest, false, false, true /* mismatched */);
     set_result(ch);
   }
   return true;
@@ -2385,7 +2437,7 @@
   return NULL;
 }
 
-bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile) {
+bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool unaligned) {
   if (callee()->is_static())  return false;  // caller must have the capability!
 
 #ifndef PRODUCT
@@ -2527,7 +2579,28 @@
   // of safe & unsafe memory.
   if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
 
-   if (!is_store) {
+  assert(alias_type->adr_type() == TypeRawPtr::BOTTOM || alias_type->adr_type() == TypeOopPtr::BOTTOM ||
+         alias_type->field() != NULL || alias_type->element() != NULL, "field, array element or unknown");
+  bool mismatched = false;
+  if (alias_type->element() != NULL || alias_type->field() != NULL) {
+    BasicType bt;
+    if (alias_type->element() != NULL) {
+      const Type* element = alias_type->element();
+      bt = element->isa_narrowoop() ? T_OBJECT : element->array_element_basic_type();
+    } else {
+      bt = alias_type->field()->type()->basic_type();
+    }
+    if (bt == T_ARRAY) {
+      // accessing an array field with getObject is not a mismatch
+      bt = T_OBJECT;
+    }
+    if (bt != type) {
+      mismatched = true;
+    }
+  }
+  assert(type != T_OBJECT || !unaligned, "unaligned access not supported with object type");
+
+  if (!is_store) {
     Node* p = NULL;
     // Try to constant fold a load from a constant field
     ciField* field = alias_type->field();
@@ -2543,7 +2616,7 @@
       MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered;
       // To be valid, unsafe loads may depend on other conditions than
       // the one that guards them: pin the Load node
-      p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile);
+      p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile, unaligned, mismatched);
       // load value
       switch (type) {
       case T_BOOLEAN:
@@ -2590,12 +2663,12 @@
 
     MemNode::MemOrd mo = is_volatile ? MemNode::release : MemNode::unordered;
     if (type != T_OBJECT ) {
-      (void) store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile);
+      (void) store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile, unaligned, mismatched);
     } else {
       // Possibly an oop being stored to Java heap or native memory
       if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) {
         // oop to Java heap.
-        (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo);
+        (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched);
       } else {
         // We can't tell at compile time if we are storing in the Java heap or outside
         // of it. So we need to emit code to conditionally do the proper type of
@@ -2607,11 +2680,11 @@
         __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
           // Sync IdealKit and graphKit.
           sync_kit(ideal);
-          Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo);
+          Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched);
           // Update IdealKit memory.
           __ sync_kit(this);
         } __ else_(); {
-          __ store(__ ctrl(), adr, val, type, alias_type->index(), mo, is_volatile);
+          __ store(__ ctrl(), adr, val, type, alias_type->index(), mo, is_volatile, mismatched);
         } __ end_if();
         // Final sync IdealKit and GraphKit.
         final_sync(ideal);

--- a/hotspot/src/share/vm/opto/loopPredicate.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/loopPredicate.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -91,7 +91,8 @@
 // The true projecttion (if_cont) of the new_iff is returned.
 // This code is also used to clone predicates to cloned loops.
 ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
-                                                      Deoptimization::DeoptReason reason) {
+                                                      Deoptimization::DeoptReason reason,
+                                                      int opcode) {
   assert(cont_proj->is_uncommon_trap_if_pattern(reason), "must be a uct if pattern!");
   IfNode* iff = cont_proj->in(0)->as_If();
 
@@ -133,8 +134,13 @@
   }
   // Create new_iff
   IdealLoopTree* lp = get_loop(entry);
-  IfNode *new_iff = iff->clone()->as_If();
-  new_iff->set_req(0, entry);
+  IfNode* new_iff = NULL;
+  if (opcode == Op_If) {
+    new_iff = new IfNode(entry, iff->in(1), iff->_prob, iff->_fcnt);
+  } else {
+    assert(opcode == Op_RangeCheck, "no other if variant here");
+    new_iff = new RangeCheckNode(entry, iff->in(1), iff->_prob, iff->_fcnt);
+  }
   register_control(new_iff, lp, entry);
   Node *if_cont = new IfTrueNode(new_iff);
   Node *if_uct  = new IfFalseNode(new_iff);
@@ -183,7 +189,8 @@
 //------------------------------create_new_if_for_predicate------------------------
 // Create a new if below new_entry for the predicate to be cloned (IGVN optimization)
 ProjNode* PhaseIterGVN::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
-                                                    Deoptimization::DeoptReason reason) {
+                                                    Deoptimization::DeoptReason reason,
+                                                    int opcode) {
   assert(new_entry != 0, "only used for clone predicate");
   assert(cont_proj->is_uncommon_trap_if_pattern(reason), "must be a uct if pattern!");
   IfNode* iff = cont_proj->in(0)->as_If();
@@ -208,8 +215,13 @@
   }
 
   // Create new_iff in new location.
-  IfNode *new_iff = iff->clone()->as_If();
-  new_iff->set_req(0, new_entry);
+  IfNode* new_iff = NULL;
+  if (opcode == Op_If) {
+    new_iff = new IfNode(new_entry, iff->in(1), iff->_prob, iff->_fcnt);
+  } else {
+    assert(opcode == Op_RangeCheck, "no other if variant here");
+    new_iff = new RangeCheckNode(new_entry, iff->in(1), iff->_prob, iff->_fcnt);
+  }
 
   register_new_node_with_optimizer(new_iff);
   Node *if_cont = new IfTrueNode(new_iff);
@@ -249,9 +261,9 @@
                                           PhaseIterGVN* igvn) {
   ProjNode* new_predicate_proj;
   if (loop_phase != NULL) {
-    new_predicate_proj = loop_phase->create_new_if_for_predicate(predicate_proj, new_entry, reason);
+    new_predicate_proj = loop_phase->create_new_if_for_predicate(predicate_proj, new_entry, reason, Op_If);
   } else {
-    new_predicate_proj =       igvn->create_new_if_for_predicate(predicate_proj, new_entry, reason);
+    new_predicate_proj =       igvn->create_new_if_for_predicate(predicate_proj, new_entry, reason, Op_If);
   }
   IfNode* iff = new_predicate_proj->in(0)->as_If();
   Node* ctrl  = iff->in(0);
@@ -557,7 +569,7 @@
     return false;
   }
   Node* range = cmp->in(2);
-  if (range->Opcode() != Op_LoadRange) {
+  if (range->Opcode() != Op_LoadRange && !iff->is_RangeCheck()) {
     const TypeInt* tint = phase->_igvn.type(range)->isa_int();
     if (tint == NULL || tint->empty() || tint->_lo < 0) {
       // Allow predication on positive values that aren't LoadRanges.
@@ -714,7 +726,8 @@
   while (current_proj != head) {
     if (loop == get_loop(current_proj) && // still in the loop ?
         current_proj->is_Proj()        && // is a projection  ?
-        current_proj->in(0)->Opcode() == Op_If) { // is a if projection ?
+        (current_proj->in(0)->Opcode() == Op_If ||
+         current_proj->in(0)->Opcode() == Op_RangeCheck)) { // is a if projection ?
       if_proj_list.push(current_proj);
     }
     current_proj = idom(current_proj);
@@ -753,7 +766,8 @@
     if (invar.is_invariant(bol)) {
       // Invariant test
       new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL,
-                                                       Deoptimization::Reason_predicate);
+                                                       Deoptimization::Reason_predicate,
+                                                       iff->Opcode());
       Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
       BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool();
 
@@ -797,8 +811,8 @@
       // lower_bound test will dominate the upper bound test and all
       // cloned or created nodes will use the lower bound test as
       // their declared control.
-      ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate);
-      ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate);
+      ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, iff->Opcode());
+      ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, iff->Opcode());
       assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate");
       Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0);

--- a/hotspot/src/share/vm/opto/loopTransform.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -290,7 +290,7 @@
       if (ctrl->is_top())
         return false;           // Found dead test on live IF?  No peeling!
       // Standard IF only has one input value to check for loop invariance
-      assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
+      assert(test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
       // Condition is not a member of this loop?
       if( !is_member(phase->get_loop(ctrl)) &&
           is_loop_exit(test) )
@@ -792,8 +792,10 @@
     return false;
   }
 
-  if(cl->do_unroll_only()) {
-    NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct));
+  if (cl->do_unroll_only()) {
+    if (TraceSuperWordLoopUnrollAnalysis) {
+      tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct);
+    }
   }
 
   // Unroll once!  (Each trip will soon do double iterations)
@@ -818,7 +820,9 @@
       if (slp_max_unroll_factor >= future_unroll_ct) {
         int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
         if (new_limit > LoopUnrollLimit) {
-          NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit));
+          if (TraceSuperWordLoopUnrollAnalysis) {
+            tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit);
+          }
           _local_loop_unroll_limit = new_limit;
         }
       }
@@ -856,7 +860,8 @@
   // loop-invariant.
   for (uint i = 0; i < _body.size(); i++) {
     Node *iff = _body[i];
-    if (iff->Opcode() == Op_If) { // Test?
+    if (iff->Opcode() == Op_If ||
+        iff->Opcode() == Op_RangeCheck) { // Test?
 
       // Comparing trip+off vs limit
       Node *bol = iff->in(1);
@@ -2035,8 +2040,8 @@
   // loop-invariant.
   for( uint i = 0; i < loop->_body.size(); i++ ) {
     Node *iff = loop->_body[i];
-    if( iff->Opcode() == Op_If ) { // Test?
-
+    if (iff->Opcode() == Op_If ||
+        iff->Opcode() == Op_RangeCheck) { // Test?
       // Test is an IfNode, has 2 projections.  If BOTH are in the loop
       // we need loop unswitching instead of iteration splitting.
       Node *exit = loop->is_loop_exit(iff);
@@ -2119,10 +2124,9 @@
             conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
           }
         } else {
-#ifndef PRODUCT
-          if( PrintOpto )
+          if (PrintOpto) {
             tty->print_cr("missed RCE opportunity");
-#endif
+          }
           continue;             // In release mode, ignore it
         }
       } else {                  // Otherwise work on normal compares
@@ -2157,10 +2161,9 @@
           }
           break;
         default:
-#ifndef PRODUCT
-          if( PrintOpto )
+          if (PrintOpto) {
             tty->print_cr("missed RCE opportunity");
-#endif
+          }
           continue;             // Unhandled case
         }
       }
@@ -2504,9 +2507,7 @@
       return false;
     }
     if (should_peel) {            // Should we peel?
-#ifndef PRODUCT
-      if (PrintOpto) tty->print_cr("should_peel");
-#endif
+      if (PrintOpto) { tty->print_cr("should_peel"); }
       phase->do_peeling(this,old_new);
     } else if (should_unswitch) {
       phase->do_unswitching(this, old_new);

--- a/hotspot/src/share/vm/opto/loopUnswitch.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/loopUnswitch.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -132,7 +132,7 @@
     head->as_CountedLoop()->set_normal_loop();
   }
 
-  ProjNode* proj_true = create_slow_version_of_loop(loop, old_new);
+  ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode());
 
 #ifdef ASSERT
   Node* uniqc = proj_true->unique_ctrl_out();
@@ -222,7 +222,8 @@
 // and inserting an if to select fast-slow versions.
 // Return control projection of the entry to the fast version.
 ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
-                                                      Node_List &old_new) {
+                                                      Node_List &old_new,
+                                                      int opcode) {
   LoopNode* head  = loop->_head->as_Loop();
   bool counted_loop = head->is_CountedLoop();
   Node*     entry = head->in(LoopNode::EntryControl);
@@ -235,7 +236,8 @@
   register_node(opq, outer_loop, entry, dom_depth(entry));
   Node *bol       = new Conv2BNode(opq);
   register_node(bol, outer_loop, entry, dom_depth(entry));
-  IfNode* iff = new IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN);
+  IfNode* iff = (opcode == Op_RangeCheck) ? new RangeCheckNode(entry, bol, PROB_MAX, COUNT_UNKNOWN) :
+    new IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN);
   register_node(iff, outer_loop, entry, dom_depth(entry));
   ProjNode* iffast = new IfTrueNode(iff);
   register_node(iffast, outer_loop, iff, dom_depth(iff));
@@ -359,16 +361,22 @@
   }
 
   if(!_lpt->_head->is_CountedLoop()) {
-    NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not counted loop", _lpt->_head->_idx);})
+    if (TraceLoopOpts) {
+      tty->print_cr("CountedLoopReserveKit::create_reserve: %d not counted loop", _lpt->_head->_idx);
+    }
     return false;
   }
   CountedLoopNode *cl = _lpt->_head->as_CountedLoop();
   if (!cl->is_valid_counted_loop()) {
-    NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not valid counted loop", cl->_idx);})
+    if (TraceLoopOpts) {
+      tty->print_cr("CountedLoopReserveKit::create_reserve: %d not valid counted loop", cl->_idx);
+    }
     return false; // skip malformed counted loop
   }
   if (!cl->is_main_loop()) {
-    NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not main loop", cl->_idx);})
+    if (TraceLoopOpts) {
+      tty->print_cr("CountedLoopReserveKit::create_reserve: %d not main loop", cl->_idx);
+    }
     return false; // skip normal, pre, and post loops
   }

--- a/hotspot/src/share/vm/opto/loopnode.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/loopnode.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -329,6 +329,9 @@
 
   Node* phi_incr = NULL;
   // Trip-counter increment must be commutative & associative.
+  if (incr->Opcode() == Op_CastII) {
+    incr = incr->in(1);
+  }
   if (incr->is_Phi()) {
     if (incr->as_Phi()->region() != x || incr->req() != 3)
       return false; // Not simple trip counter expression
@@ -356,6 +359,9 @@
     xphi = stride;
     stride = tmp;
   }
+  if (xphi->Opcode() == Op_CastII) {
+    xphi = xphi->in(1);
+  }
   // Stride must be constant
   int stride_con = stride->get_int();
   if (stride_con == 0)
@@ -2397,11 +2403,9 @@
   // After that switch predicates off and do more loop optimizations.
   if (!C->major_progress() && (C->predicate_count() > 0)) {
      C->cleanup_loop_predicates(_igvn);
-#ifndef PRODUCT
      if (TraceLoopOpts) {
        tty->print_cr("PredicatesOff");
      }
-#endif
      C->set_major_progress();
   }

--- a/hotspot/src/share/vm/opto/loopnode.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/loopnode.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -916,7 +916,8 @@
 
   // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted
   ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
-                                        Deoptimization::DeoptReason reason);
+                                        Deoptimization::DeoptReason reason,
+                                        int opcode);
   void register_control(Node* n, IdealLoopTree *loop, Node* pred);
 
   // Clone loop predicates to cloned loops (peeled, unswitched)
@@ -966,7 +967,8 @@
   // Create a slow version of the loop by cloning the loop
   // and inserting an if to select fast-slow versions.
   ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
-                                        Node_List &old_new);
+                                        Node_List &old_new,
+                                        int opcode);
 
   // Clone a loop and return the clone head (clone_loop_head).
   // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,

--- a/hotspot/src/share/vm/opto/loopopts.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/loopopts.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -199,14 +199,11 @@
 // IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
 // live path up to the dominating control.
 void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
-#ifndef PRODUCT
-  if (VerifyLoopOptimizations && PrintOpto) tty->print_cr("dominating test");
-#endif
-
+  if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); }
 
   // prevdom is the dominating projection of the dominating test.
   assert( iff->is_If(), "" );
-  assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
+  assert(iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd || iff->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
   int pop = prevdom->Opcode();
   assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
   if (flip) {
@@ -617,9 +614,7 @@
       }
     }
     if (phi == NULL)  break;
-#ifndef PRODUCT
-    if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV");
-#endif
+    if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); }
     // Move speculative ops
     for (uint j = 1; j < region->req(); j++) {
       Node *proj = region->in(j);
@@ -963,10 +958,9 @@
   }
   int nodes_left = C->max_node_limit() - C->live_nodes();
   if (weight * 8 > nodes_left) {
-#ifndef PRODUCT
-    if (PrintOpto)
+    if (PrintOpto) {
       tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
-#endif
+    }
     return true;
   } else {
     return false;
@@ -1123,7 +1117,8 @@
   int n_op = n->Opcode();
 
   // Check for an IF being dominated by another IF same test
-  if (n_op == Op_If) {
+  if (n_op == Op_If ||
+      n_op == Op_RangeCheck) {
     Node *bol = n->in(1);
     uint max = bol->outcnt();
     // Check for same test used more than once?
@@ -1489,14 +1484,12 @@
 void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
                                  Node* side_by_side_idom) {
 
-#ifndef PRODUCT
   if (C->do_vector_loop() && PrintOpto) {
     const char* mname = C->method()->name()->as_quoted_ascii();
     if (mname != NULL) {
       tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
     }
   }
-#endif
 
   CloneMap& cm = C->clone_map();
   Dict* dict = cm.dict();
@@ -1945,7 +1938,10 @@
   BoolNode* bol = new BoolNode(cmp, relop);
   register_node(bol, loop, proj2, ddepth);
 
-  IfNode* new_if = new IfNode(proj2, bol, iff->_prob, iff->_fcnt);
+  int opcode = iff->Opcode();
+  assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode");
+  IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt):
+    new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt);
   register_node(new_if, loop, proj2, ddepth);
 
   proj->set_req(0, new_if); // reattach

--- a/hotspot/src/share/vm/opto/machnode.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/machnode.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -433,36 +433,49 @@
   if (is_MachTemp()) return true;
 
   uint r = rule();              // Match rule
-  if( r <  Matcher::_begin_rematerialize ||
-      r >= Matcher::_end_rematerialize )
+  if (r <  Matcher::_begin_rematerialize ||
+      r >= Matcher::_end_rematerialize) {
     return false;
+  }
 
   // For 2-address instructions, the input live range is also the output
-  // live range.  Remateralizing does not make progress on the that live range.
-  if( two_adr() )  return false;
+  // live range. Remateralizing does not make progress on the that live range.
+  if (two_adr()) return false;
 
   // Check for rematerializing float constants, or not
-  if( !Matcher::rematerialize_float_constants ) {
+  if (!Matcher::rematerialize_float_constants) {
     int op = ideal_Opcode();
-    if( op == Op_ConF || op == Op_ConD )
+    if (op == Op_ConF || op == Op_ConD) {
       return false;
+    }
+  }
+
+  // Defining flags - can't spill these! Must remateralize.
+  if (ideal_reg() == Op_RegFlags) {
+    return true;
   }
 
-  // Defining flags - can't spill these!  Must remateralize.
-  if( ideal_reg() == Op_RegFlags )
-    return true;
+  // Stretching lots of inputs - don't do it.
+  if (req() > 2) {
+    return false;
+  }
 
-  // Stretching lots of inputs - don't do it.
-  if( req() > 2 )
-    return false;
+  if (req() == 2 && in(1) && in(1)->ideal_reg() == Op_RegFlags) {
+    // In(1) will be rematerialized, too.
+    // Stretching lots of inputs - don't do it.
+    if (in(1)->req() > 2) {
+      return false;
+    }
+  }
 
   // Don't remateralize somebody with bound inputs - it stretches a
   // fixed register lifetime.
   uint idx = oper_input_base();
   if (req() > idx) {
     const RegMask &rm = in_RegMask(idx);
-    if (rm.is_bound(ideal_reg()))
+    if (rm.is_bound(ideal_reg())) {
       return false;
+    }
   }
 
   return true;

--- a/hotspot/src/share/vm/opto/machnode.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/machnode.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -578,8 +578,8 @@
 
 
 #ifndef PRODUCT
-  virtual const char *Name() const {
-    switch (_spill_type) {
+  static const char *spill_type(SpillType st) {
+    switch (st) {
       case TwoAddress:
         return "TwoAddressSpillCopy";
       case PhiInput:
@@ -612,6 +612,10 @@
     }
   }
 
+  virtual const char *Name() const {
+    return spill_type(_spill_type);
+  }
+
   virtual void format( PhaseRegAlloc *, outputStream *st ) const;
 #endif
 };

--- a/hotspot/src/share/vm/opto/matcher.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/matcher.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1569,13 +1569,11 @@
         // Can NOT include the match of a subtree when its memory state
         // is used by any of the other subtrees
         (input_mem == NodeSentinel) ) {
-#ifndef PRODUCT
       // Print when we exclude matching due to different memory states at input-loads
-      if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
-        && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) {
+      if (PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
+        && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem)) {
         tty->print_cr("invalid input_mem");
       }
-#endif
       // Switch to a register-only opcode; this value must be in a register
       // and cannot be subsumed as part of a larger instruction.
       s->DFA( m->ideal_reg(), m );

--- a/hotspot/src/share/vm/opto/matcher.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/matcher.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -269,6 +269,10 @@
   // should generate this one.
   static const bool match_rule_supported(int opcode);
 
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  static const bool match_rule_supported_vector(int opcode, int vlen);
+
   // Some uarchs have different sized float register resources
   static const int float_pressure(int default_pressure_threshold);

--- a/hotspot/src/share/vm/opto/memnode.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/memnode.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -72,8 +72,15 @@
   dump_adr_type(this, _adr_type, st);
 
   Compile* C = Compile::current();
-  if( C->alias_type(_adr_type)->is_volatile() )
+  if (C->alias_type(_adr_type)->is_volatile()) {
     st->print(" Volatile!");
+  }
+  if (_unaligned_access) {
+    st->print(" unaligned");
+  }
+  if (_mismatched_access) {
+    st->print(" mismatched");
+  }
 }
 
 void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) {
@@ -754,7 +761,8 @@
 
 //----------------------------LoadNode::make-----------------------------------
 // Polymorphic factory method:
-Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo, ControlDependency control_dependency) {
+Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo,
+                     ControlDependency control_dependency, bool unaligned, bool mismatched) {
   Compile* C = gvn.C;
 
   // sanity check the alias category against the created node type
@@ -769,40 +777,68 @@
           // oop will be recorded in oop map if load crosses safepoint
           rt->isa_oopptr() || is_immutable_value(adr),
           "raw memory operations should have control edge");
+  LoadNode* load = NULL;
   switch (bt) {
-  case T_BOOLEAN: return new LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
-  case T_BYTE:    return new LoadBNode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
-  case T_INT:     return new LoadINode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
-  case T_CHAR:    return new LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
-  case T_SHORT:   return new LoadSNode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
-  case T_LONG:    return new LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency);
-  case T_FLOAT:   return new LoadFNode (ctl, mem, adr, adr_type, rt,            mo, control_dependency);
-  case T_DOUBLE:  return new LoadDNode (ctl, mem, adr, adr_type, rt,            mo, control_dependency);
-  case T_ADDRESS: return new LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(),  mo, control_dependency);
+  case T_BOOLEAN: load = new LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency); break;
+  case T_BYTE:    load = new LoadBNode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency); break;
+  case T_INT:     load = new LoadINode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency); break;
+  case T_CHAR:    load = new LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency); break;
+  case T_SHORT:   load = new LoadSNode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency); break;
+  case T_LONG:    load = new LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency); break;
+  case T_FLOAT:   load = new LoadFNode (ctl, mem, adr, adr_type, rt,            mo, control_dependency); break;
+  case T_DOUBLE:  load = new LoadDNode (ctl, mem, adr, adr_type, rt,            mo, control_dependency); break;
+  case T_ADDRESS: load = new LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(),  mo, control_dependency); break;
   case T_OBJECT:
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
-      Node* load  = gvn.transform(new LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo, control_dependency));
-      return new DecodeNNode(load, load->bottom_type()->make_ptr());
+      load = new LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo, control_dependency);
     } else
 #endif
     {
       assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop");
-      return new LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo, control_dependency);
+      load = new LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo, control_dependency);
     }
+    break;
+  }
+  assert(load != NULL, "LoadNode should have been created");
+  if (unaligned) {
+    load->set_unaligned_access();
   }
-  ShouldNotReachHere();
-  return (LoadNode*)NULL;
+  if (mismatched) {
+    load->set_mismatched_access();
+  }
+  if (load->Opcode() == Op_LoadN) {
+    Node* ld = gvn.transform(load);
+    return new DecodeNNode(ld, ld->bottom_type()->make_ptr());
+  }
+
+  return load;
 }
 
-LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, ControlDependency control_dependency) {
+LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo,
+                                  ControlDependency control_dependency, bool unaligned, bool mismatched) {
   bool require_atomic = true;
-  return new LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic);
+  LoadLNode* load = new LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic);
+  if (unaligned) {
+    load->set_unaligned_access();
+  }
+  if (mismatched) {
+    load->set_mismatched_access();
+  }
+  return load;
 }
 
-LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, ControlDependency control_dependency) {
+LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo,
+                                  ControlDependency control_dependency, bool unaligned, bool mismatched) {
   bool require_atomic = true;
-  return new LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic);
+  LoadDNode* load = new LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic);
+  if (unaligned) {
+    load->set_unaligned_access();
+  }
+  if (mismatched) {
+    load->set_mismatched_access();
+  }
+  return load;
 }
 
 
@@ -2393,7 +2429,8 @@
              st->Opcode() == Op_StoreVector ||
              Opcode() == Op_StoreVector ||
              phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw ||
-             (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI), // expanded ClearArrayNode
+             (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI) || // expanded ClearArrayNode
+             (is_mismatched_access() || st->as_Store()->is_mismatched_access()),
              "no mismatched stores, except on raw memory: %s %s", NodeClassNames[Opcode()], NodeClassNames[st->Opcode()]);
 
       if (st->in(MemNode::Address)->eqv_uncast(address) &&
@@ -3213,6 +3250,9 @@
 // within the initialized memory.
 intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseTransform* phase, bool can_reshape) {
   const int FAIL = 0;
+  if (st->is_unaligned_access()) {
+    return FAIL;
+  }
   if (st->req() != MemNode::ValueIn + 1)
     return FAIL;                // an inscrutable StoreNode (card mark?)
   Node* ctl = st->in(MemNode::Control);

--- a/hotspot/src/share/vm/opto/memnode.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/memnode.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -39,11 +39,14 @@
 //------------------------------MemNode----------------------------------------
 // Load or Store, possibly throwing a NULL pointer exception
 class MemNode : public Node {
+private:
+  bool _unaligned_access; // Unaligned access from unsafe
+  bool _mismatched_access; // Mismatched access from unsafe: byte read in integer array for instance
 protected:
 #ifdef ASSERT
   const TypePtr* _adr_type;     // What kind of memory is being addressed?
 #endif
-  virtual uint size_of() const; // Size is bigger (ASSERT only)
+  virtual uint size_of() const;
 public:
   enum { Control,               // When is it safe to do this load?
          Memory,                // Chunk of memory is being loaded from
@@ -57,17 +60,17 @@
   } MemOrd;
 protected:
   MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at )
-    : Node(c0,c1,c2   ) {
+    : Node(c0,c1,c2   ), _unaligned_access(false), _mismatched_access(false) {
     init_class_id(Class_Mem);
     debug_only(_adr_type=at; adr_type();)
   }
   MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 )
-    : Node(c0,c1,c2,c3) {
+    : Node(c0,c1,c2,c3), _unaligned_access(false), _mismatched_access(false) {
     init_class_id(Class_Mem);
     debug_only(_adr_type=at; adr_type();)
   }
   MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4)
-    : Node(c0,c1,c2,c3,c4) {
+    : Node(c0,c1,c2,c3,c4), _unaligned_access(false), _mismatched_access(false) {
     init_class_id(Class_Mem);
     debug_only(_adr_type=at; adr_type();)
   }
@@ -127,6 +130,11 @@
   // the given memory state?  (The state may or may not be in(Memory).)
   Node* can_see_stored_value(Node* st, PhaseTransform* phase) const;
 
+  void set_unaligned_access() { _unaligned_access = true; }
+  bool is_unaligned_access() const { return _unaligned_access; }
+  void set_mismatched_access() { _mismatched_access = true; }
+  bool is_mismatched_access() const { return _mismatched_access; }
+
 #ifndef PRODUCT
   static void dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st);
   virtual void dump_spec(outputStream *st) const;
@@ -190,9 +198,10 @@
   }
 
   // Polymorphic factory method:
-   static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr,
-                     const TypePtr* at, const Type *rt, BasicType bt,
-                     MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest);
+  static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr,
+                    const TypePtr* at, const Type *rt, BasicType bt,
+                    MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest,
+                    bool unaligned = false, bool mismatched = false);
 
   virtual uint hash()   const;  // Check the type
 
@@ -367,7 +376,8 @@
   virtual BasicType memory_type() const { return T_LONG; }
   bool require_atomic_access() const { return _require_atomic_access; }
   static LoadLNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
-                                const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest);
+                                const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest,
+                                bool unaligned = false, bool mismatched = false);
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const {
     LoadNode::dump_spec(st);
@@ -418,7 +428,8 @@
   virtual BasicType memory_type() const { return T_DOUBLE; }
   bool require_atomic_access() const { return _require_atomic_access; }
   static LoadDNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
-                                const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest);
+                                const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest,
+                                bool unaligned = false, bool mismatched = false);
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const {
     LoadNode::dump_spec(st);

--- a/hotspot/src/share/vm/opto/movenode.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/movenode.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -230,9 +230,7 @@
 
   // Convert to a bool (flipped)
   // Build int->bool conversion
-#ifndef PRODUCT
-  if( PrintOpto ) tty->print_cr("CMOV to I2B");
-#endif
+  if (PrintOpto) { tty->print_cr("CMOV to I2B"); }
   Node *n = new Conv2BNode( cmp->in(1) );
   if( flip )
   n = new XorINode( phase->transform(n), phase->intcon(1) );

--- a/hotspot/src/share/vm/opto/multnode.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/multnode.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -44,14 +44,14 @@
 //------------------------------proj_out---------------------------------------
 // Get a named projection
 ProjNode* MultiNode::proj_out(uint which_proj) const {
-  assert(Opcode() != Op_If || which_proj == (uint)true || which_proj == (uint)false, "must be 1 or 0");
-  assert(Opcode() != Op_If || outcnt() == 2, "bad if #1");
+  assert((Opcode() != Op_If && Opcode() != Op_RangeCheck) || which_proj == (uint)true || which_proj == (uint)false, "must be 1 or 0");
+  assert((Opcode() != Op_If && Opcode() != Op_RangeCheck) || outcnt() == 2, "bad if #1");
   for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) {
     Node *p = fast_out(i);
     if (p->is_Proj()) {
       ProjNode *proj = p->as_Proj();
       if (proj->_con == which_proj) {
-        assert(Opcode() != Op_If || proj->Opcode() == (which_proj?Op_IfTrue:Op_IfFalse), "bad if #2");
+        assert((Opcode() != Op_If && Opcode() != Op_RangeCheck) || proj->Opcode() == (which_proj ? Op_IfTrue : Op_IfFalse), "bad if #2");
         return proj;
       }
     } else {

--- a/hotspot/src/share/vm/opto/node.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/node.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -125,6 +125,7 @@
 class PhiNode;
 class Pipeline;
 class ProjNode;
+class RangeCheckNode;
 class RegMask;
 class RegionNode;
 class RootNode;
@@ -584,6 +585,7 @@
           DEFINE_CLASS_ID(Jump,        PCTable, 1)
         DEFINE_CLASS_ID(If,          MultiBranch, 1)
           DEFINE_CLASS_ID(CountedLoopEnd, If, 0)
+          DEFINE_CLASS_ID(RangeCheck, If, 1)
         DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2)
       DEFINE_CLASS_ID(Start,       Multi, 2)
       DEFINE_CLASS_ID(MemBar,      Multi, 3)
@@ -758,6 +760,7 @@
   DEFINE_CLASS_QUERY(FastLock)
   DEFINE_CLASS_QUERY(FastUnlock)
   DEFINE_CLASS_QUERY(If)
+  DEFINE_CLASS_QUERY(RangeCheck)
   DEFINE_CLASS_QUERY(IfFalse)
   DEFINE_CLASS_QUERY(IfTrue)
   DEFINE_CLASS_QUERY(Initialize)

--- a/hotspot/src/share/vm/opto/output.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/output.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -91,13 +91,10 @@
   }
 
   // Break before main entry point
-  if( (_method && C->directive()->BreakAtExecuteOption)
-#ifndef PRODUCT
-    ||(OptoBreakpoint && is_method_compilation())
-    ||(OptoBreakpointOSR && is_osr_compilation())
-    ||(OptoBreakpointC2R && !_method)
-#endif
-    ) {
+  if ((_method && C->directive()->BreakAtExecuteOption) ||
+      (OptoBreakpoint && is_method_compilation())       ||
+      (OptoBreakpointOSR && is_osr_compilation())       ||
+      (OptoBreakpointC2R && !_method)                   ) {
     // checking for _method means that OptoBreakpoint does not apply to
     // runtime stubs or frame converters
     _cfg->insert( entry, 1, new MachBreakpointNode() );

--- a/hotspot/src/share/vm/opto/parse1.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/parse1.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -958,25 +958,22 @@
            PPC64_ONLY(wrote_volatile() ||)
            (AlwaysSafeConstructors && wrote_fields()))) {
     _exits.insert_mem_bar(Op_MemBarRelease, alloc_with_final());
-#ifndef PRODUCT
     if (PrintOpto && (Verbose || WizardMode)) {
       method()->print_name();
       tty->print_cr(" writes finals and needs a memory barrier");
     }
-#endif
   }
 
-  // Any method can write a @Stable field; insert memory barriers after
-  // those also. If there is a predecessor allocation node, bind the
-  // barrier there.
+  // Any method can write a @Stable field; insert memory barriers
+  // after those also. Can't bind predecessor allocation node (if any)
+  // with barrier because allocation doesn't always dominate
+  // MemBarRelease.
   if (wrote_stable()) {
-    _exits.insert_mem_bar(Op_MemBarRelease, alloc_with_final());
-#ifndef PRODUCT
+    _exits.insert_mem_bar(Op_MemBarRelease);
     if (PrintOpto && (Verbose || WizardMode)) {
       method()->print_name();
       tty->print_cr(" writes @Stable and needs a memory barrier");
     }
-#endif
   }
 
   for (MergeMemStream mms(_exits.merged_memory()); mms.next_non_empty(); ) {
@@ -991,13 +988,18 @@
       // In case of concurrent class loading, the type we set for the
       // ret_phi in build_exits() may have been too optimistic and the
       // ret_phi may be top now.
-#ifdef ASSERT
+      // Otherwise, we've encountered an error and have to mark the method as
+      // not compilable. Just using an assertion instead would be dangerous
+      // as this could lead to an infinite compile loop in non-debug builds.
       {
         MutexLockerEx ml(Compile_lock, Mutex::_no_safepoint_check_flag);
-        assert(ret_type->isa_ptr() && C->env()->system_dictionary_modification_counter_changed(), "return value must be well defined");
+        if (C->env()->system_dictionary_modification_counter_changed()) {
+          C->record_failure(C2Compiler::retry_class_loading_during_parsing());
+        } else {
+          C->record_method_not_compilable("Can't determine return type.");
+        }
       }
-#endif
-      C->record_failure(C2Compiler::retry_class_loading_during_parsing());
+      return;
     }
     _exits.push_node(ret_type->basic_type(), ret_phi);
   }
@@ -2147,15 +2149,24 @@
     // here.
     Node* phi = _exits.argument(0);
     const TypeInstPtr *tr = phi->bottom_type()->isa_instptr();
-    if( tr && tr->klass()->is_loaded() &&
-        tr->klass()->is_interface() ) {
+    if (tr && tr->klass()->is_loaded() &&
+        tr->klass()->is_interface()) {
       const TypeInstPtr *tp = value->bottom_type()->isa_instptr();
       if (tp && tp->klass()->is_loaded() &&
           !tp->klass()->is_interface()) {
         // sharpen the type eagerly; this eases certain assert checking
         if (tp->higher_equal(TypeInstPtr::NOTNULL))
           tr = tr->join_speculative(TypeInstPtr::NOTNULL)->is_instptr();
-        value = _gvn.transform(new CheckCastPPNode(0,value,tr));
+        value = _gvn.transform(new CheckCastPPNode(0, value, tr));
+      }
+    } else {
+      // Also handle returns of oop-arrays to an arrays-of-interface return
+      const TypeInstPtr* phi_tip;
+      const TypeInstPtr* val_tip;
+      Type::get_arrays_base_elements(phi->bottom_type(), value->bottom_type(), &phi_tip, &val_tip);
+      if (phi_tip != NULL && phi_tip->is_loaded() && phi_tip->klass()->is_interface() &&
+          val_tip != NULL && val_tip->is_loaded() && !val_tip->klass()->is_interface()) {
+        value = _gvn.transform(new CheckCastPPNode(0, value, phi->bottom_type()));
       }
     }
     phi->add_req(value);

--- a/hotspot/src/share/vm/opto/parse2.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/parse2.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -136,8 +136,16 @@
       BoolTest::mask btest = BoolTest::lt;
       tst = _gvn.transform( new BoolNode(chk, btest) );
     }
+    RangeCheckNode* rc = new RangeCheckNode(control(), tst, PROB_MAX, COUNT_UNKNOWN);
+    _gvn.set_type(rc, rc->Value(&_gvn));
+    if (!tst->is_Con()) {
+      record_for_igvn(rc);
+    }
+    set_control(_gvn.transform(new IfTrueNode(rc)));
     // Branch to failure if out of bounds
-    { BuildCutout unless(this, tst, PROB_MAX);
+    {
+      PreserveJVMState pjvms(this);
+      set_control(_gvn.transform(new IfFalseNode(rc)));
       if (C->allow_range_check_smearing()) {
         // Do not use builtin_throw, since range checks are sometimes
         // made more stringent by an optimistic transformation.
@@ -940,13 +948,11 @@
 //-------------------------------repush_if_args--------------------------------
 // Push arguments of an "if" bytecode back onto the stack by adjusting _sp.
 inline int Parse::repush_if_args() {
-#ifndef PRODUCT
   if (PrintOpto && WizardMode) {
     tty->print("defending against excessive implicit null exceptions on %s @%d in ",
                Bytecodes::name(iter().cur_bc()), iter().cur_bci());
     method()->print_name(); tty->cr();
   }
-#endif
   int bc_depth = - Bytecodes::depth(iter().cur_bc());
   assert(bc_depth == 1 || bc_depth == 2, "only two kinds of branches");
   DEBUG_ONLY(sync_jvms());   // argument(n) requires a synced jvms
@@ -967,10 +973,9 @@
   float prob = branch_prediction(cnt, btest, target_bci, c);
   if (prob == PROB_UNKNOWN) {
     // (An earlier version of do_ifnull omitted this trap for OSR methods.)
-#ifndef PRODUCT
-    if (PrintOpto && Verbose)
-      tty->print_cr("Never-taken edge stops compilation at bci %d",bci());
-#endif
+    if (PrintOpto && Verbose) {
+      tty->print_cr("Never-taken edge stops compilation at bci %d", bci());
+    }
     repush_if_args(); // to gather stats on loop
     // We need to mark this branch as taken so that if we recompile we will
     // see that it is possible. In the tiered system the interpreter doesn't
@@ -1049,10 +1054,9 @@
   float untaken_prob = 1.0 - prob;
 
   if (prob == PROB_UNKNOWN) {
-#ifndef PRODUCT
-    if (PrintOpto && Verbose)
-      tty->print_cr("Never-taken edge stops compilation at bci %d",bci());
-#endif
+    if (PrintOpto && Verbose) {
+      tty->print_cr("Never-taken edge stops compilation at bci %d", bci());
+    }
     repush_if_args(); // to gather stats on loop
     // We need to mark this branch as taken so that if we recompile we will
     // see that it is possible. In the tiered system the interpreter doesn't

--- a/hotspot/src/share/vm/opto/parse3.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/parse3.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -213,11 +213,9 @@
     // not need to mention the class index, since the class will
     // already have been loaded if we ever see a non-null value.)
     // uncommon_trap(iter().get_field_signature_index());
-#ifndef PRODUCT
     if (PrintOpto && (Verbose || WizardMode)) {
       method()->print_name(); tty->print_cr(" asserting nullness of field at bci: %d", bci());
     }
-#endif
     if (C->log() != NULL) {
       C->log()->elem("assert_null reason='field' klass='%d'",
                      C->log()->identify(field->type()));
@@ -313,9 +311,8 @@
 
     // Preserve allocation ptr to create precedent edge to it in membar
     // generated on exit from constructor.
-    if (C->eliminate_boxing() &&
-        adr_type->isa_oopptr() && adr_type->is_oopptr()->is_ptr_to_boxed_value() &&
-        AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) {
+    // Can't bind stable with its allocation, only record allocation for final field.
+    if (field->is_final() && AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) {
       set_alloc_with_final(obj);
     }
   }

--- a/hotspot/src/share/vm/opto/phaseX.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/phaseX.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -521,7 +521,8 @@
   Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check);
   // Create a new if below new_entry for the predicate to be cloned
   ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
-                                        Deoptimization::DeoptReason reason);
+                                        Deoptimization::DeoptReason reason,
+                                        int opcode);
 
   void remove_speculative_types();
   void check_no_speculative_types() {

--- a/hotspot/src/share/vm/opto/reg_split.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/reg_split.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -55,13 +55,15 @@
 // Get a SpillCopy node with wide-enough masks.  Use the 'wide-mask', the
 // wide ideal-register spill-mask if possible.  If the 'wide-mask' does
 // not cover the input (or output), use the input (or output) mask instead.
-Node *PhaseChaitin::get_spillcopy_wide(MachSpillCopyNode::SpillType spill_type, Node *def, Node *use, uint uidx ) {
+Node *PhaseChaitin::get_spillcopy_wide(MachSpillCopyNode::SpillType spill_type, Node *def, Node *use, uint uidx) {
   // If ideal reg doesn't exist we've got a bad schedule happening
   // that is forcing us to spill something that isn't spillable.
   // Bail rather than abort
   int ireg = def->ideal_reg();
-  if( ireg == 0 || ireg == Op_RegFlags ) {
-    assert(false, "attempted to spill a non-spillable item");
+  if (ireg == 0 || ireg == Op_RegFlags) {
+    assert(false, "attempted to spill a non-spillable item: %d: %s <- %d: %s, ireg = %d, spill_type: %s",
+           def->_idx, def->Name(), use->_idx, use->Name(), ireg,
+           MachSpillCopyNode::spill_type(spill_type));
     C->record_method_not_compilable("attempted to spill a non-spillable item");
     return NULL;
   }
@@ -308,14 +310,16 @@
 
 //------------------------------split_Rematerialize----------------------------
 // Clone a local copy of the def.
-Node *PhaseChaitin::split_Rematerialize( Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits, int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru ) {
+Node *PhaseChaitin::split_Rematerialize(Node *def, Block *b, uint insidx, uint &maxlrg,
+                                        GrowableArray<uint> splits, int slidx, uint *lrg2reach,
+                                        Node **Reachblock, bool walkThru) {
   // The input live ranges will be stretched to the site of the new
   // instruction.  They might be stretched past a def and will thus
   // have the old and new values of the same live range alive at the
   // same time - a definite no-no.  Split out private copies of
   // the inputs.
-  if( def->req() > 1 ) {
-    for( uint i = 1; i < def->req(); i++ ) {
+  if (def->req() > 1) {
+    for (uint i = 1; i < def->req(); i++) {
       Node *in = def->in(i);
       uint lidx = _lrg_map.live_range_id(in);
       // We do not need this for live ranges that are only defined once.
@@ -327,12 +331,29 @@
 
       Block *b_def = _cfg.get_block_for_node(def);
       int idx_def = b_def->find_node(def);
-      Node *in_spill = get_spillcopy_wide(MachSpillCopyNode::InputToRematerialization, in, def, i );
-      if( !in_spill ) return 0; // Bailed out
-      insert_proj(b_def,idx_def,in_spill,maxlrg++);
-      if( b_def == b )
-        insidx++;
-      def->set_req(i,in_spill);
+      // Cannot spill Op_RegFlags.
+      Node *in_spill;
+      if (in->ideal_reg() != Op_RegFlags) {
+        in_spill = get_spillcopy_wide(MachSpillCopyNode::InputToRematerialization, in, def, i);
+        if (!in_spill) { return 0; } // Bailed out
+        insert_proj(b_def, idx_def, in_spill, maxlrg++);
+        if (b_def == b) {
+          insidx++;
+        }
+        def->set_req(i, in_spill);
+      } else {
+        // The 'in' defines a flag register. Flag registers can not be spilled.
+        // Register allocation handles live ranges with flag registers
+        // by rematerializing the def (in this case 'in'). Thus, this is not
+        // critical if the input can be rematerialized, too.
+        if (!in->rematerialize()) {
+          assert(false, "Can not rematerialize %d: %s. Prolongs RegFlags live"
+                 " range and defining node %d: %s may not be rematerialized.",
+                 def->_idx, def->Name(), in->_idx, in->Name());
+          C->record_method_not_compilable("attempted to spill a non-spillable item with RegFlags input");
+          return 0; // Bailed out
+        }
+      }
     }
   }
 
@@ -506,10 +527,9 @@
       // Initialize the split counts to zero
       splits.append(0);
 #endif
-#ifndef PRODUCT
-      if( PrintOpto && WizardMode && lrgs(bidx)._was_spilled1 )
+      if (PrintOpto && WizardMode && lrgs(bidx)._was_spilled1) {
         tty->print_cr("Warning, 2nd spill of L%d",bidx);
-#endif
+      }
     }
   }

--- a/hotspot/src/share/vm/opto/split_if.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/split_if.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -390,13 +390,13 @@
 // Found an If getting its condition-code input from a Phi in the same block.
 // Split thru the Region.
 void PhaseIdealLoop::do_split_if( Node *iff ) {
-#ifndef PRODUCT
-  if( PrintOpto && VerifyLoopOptimizations )
+  if (PrintOpto && VerifyLoopOptimizations) {
     tty->print_cr("Split-if");
+  }
   if (TraceLoopOpts) {
     tty->print_cr("SplitIf");
   }
-#endif
+
   C->set_major_progress();
   Node *region = iff->in(0);
   Node *region_dom = idom(region);

--- a/hotspot/src/share/vm/opto/stringopts.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/stringopts.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1293,7 +1293,7 @@
   Node* index = __ SubI(charPos, __ intcon((bt == T_BYTE) ? 1 : 2));
   Node* ch = __ AddI(r, __ intcon('0'));
   Node* st = __ store_to_memory(kit.control(), kit.array_element_address(dst_array, index, T_BYTE),
-                                ch, bt, byte_adr_idx, MemNode::unordered);
+                                ch, bt, byte_adr_idx, MemNode::unordered, (bt != T_BYTE) /* mismatched */);
 
   iff = kit.create_and_map_if(head, __ Bool(__ CmpI(q, __ intcon(0)), BoolTest::ne),
                               PROB_FAIR, COUNT_UNKNOWN);
@@ -1331,7 +1331,7 @@
   } else {
     Node* index = __ SubI(charPos, __ intcon((bt == T_BYTE) ? 1 : 2));
     st = __ store_to_memory(kit.control(), kit.array_element_address(dst_array, index, T_BYTE),
-                            sign, bt, byte_adr_idx, MemNode::unordered);
+                            sign, bt, byte_adr_idx, MemNode::unordered, (bt != T_BYTE) /* mismatched */);
 
     final_merge->init_req(merge_index + 1, kit.control());
     final_mem->init_req(merge_index + 1, st);
@@ -1524,7 +1524,7 @@
       } else {
         val = readChar(src_array, i++);
       }
-      __ store(__ ctrl(), adr, __ ConI(val), T_CHAR, byte_adr_idx, MemNode::unordered);
+      __ store(__ ctrl(), adr, __ ConI(val), T_CHAR, byte_adr_idx, MemNode::unordered, true /* mismatched */);
       index = __ AddI(index, __ ConI(2));
     }
     if (src_is_byte) {
@@ -1612,7 +1612,7 @@
   }
   if (!dcon || !dbyte) {
     // Destination is UTF16. Store a char.
-    __ store(__ ctrl(), adr, val, T_CHAR, byte_adr_idx, MemNode::unordered);
+    __ store(__ ctrl(), adr, val, T_CHAR, byte_adr_idx, MemNode::unordered, true /* mismatched */);
     __ set(end, __ AddI(start, __ ConI(2)));
   }
   if (!dcon) {

--- a/hotspot/src/share/vm/opto/superword.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/superword.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -276,7 +276,9 @@
       // stop looking, we already have the max vector to map to.
       if (cur_max_vector < local_loop_unroll_factor) {
         is_slp = false;
-        NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis fails: unroll limit greater than max vector\n"));
+        if (TraceSuperWordLoopUnrollAnalysis) {
+          tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
+        }
         break;
       }
 
@@ -389,11 +391,9 @@
 
   if (_do_vector_loop) {
     if (_packset.length() == 0) {
-#ifndef PRODUCT
       if (TraceSuperWord) {
         tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
       }
-#endif
       pack_parallel();
     }
   }
@@ -558,9 +558,11 @@
           assert(!same_velt_type(s, mem_ref), "sanity");
           memops.push(s);
         }
-        MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
+        best_align_to_mem_ref = find_align_to_ref(memops);
         if (best_align_to_mem_ref == NULL) {
-          NOT_PRODUCT(if (TraceSuperWord) tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");)
+          if (TraceSuperWord) {
+            tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");
+          }
           break;
         }
         best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
@@ -582,12 +584,10 @@
   } // while (memops.size() != 0
   set_align_to_ref(best_align_to_mem_ref);
 
-#ifndef PRODUCT
   if (TraceSuperWord) {
     tty->print_cr("\nAfter find_adjacent_refs");
     print_packset();
   }
-#endif
 }
 
 #ifndef PRODUCT
@@ -874,7 +874,7 @@
         _dg.make_edge(s1, slice_sink);
       }
     }
-#ifndef PRODUCT
+
     if (TraceSuperWord) {
       tty->print_cr("\nDependence graph for slice: %d", n->_idx);
       for (int q = 0; q < _nlist.length(); q++) {
@@ -882,11 +882,10 @@
       }
       tty->cr();
     }
-#endif
+
     _nlist.clear();
   }
 
-#ifndef PRODUCT
   if (TraceSuperWord) {
     tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE");
     for (int r = 0; r < _disjoint_ptrs.length(); r++) {
@@ -895,7 +894,7 @@
     }
     tty->cr();
   }
-#endif
+
 }
 
 //---------------------------mem_slice_preds---------------------------
@@ -912,7 +911,9 @@
       if (out->is_Load()) {
         if (in_bb(out)) {
           preds.push(out);
-          NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);)
+          if (TraceSuperWord && Verbose) {
+            tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);
+          }
         }
       } else {
         // FIXME
@@ -931,7 +932,9 @@
     }//for
     if (n == stop) break;
     preds.push(n);
-    NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);)
+    if (TraceSuperWord && Verbose) {
+      tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);
+    }
     prev = n;
     assert(n->is_Mem(), "unexpected node %s", n->Name());
     n = n->in(MemNode::Memory);
@@ -1123,12 +1126,10 @@
     }
   }
 
-#ifndef PRODUCT
   if (TraceSuperWord) {
     tty->print_cr("\nAfter extend_packlist");
     print_packset();
   }
-#endif
 }
 
 //------------------------------follow_use_defs---------------------------
@@ -1412,12 +1413,10 @@
     }
   }
 
-#ifndef PRODUCT
   if (TraceSuperWord) {
     tty->print_cr("\nAfter combine_packs");
     print_packset();
   }
-#endif
 }
 
 //-----------------------------construct_my_pack_map--------------------------
@@ -2244,10 +2243,15 @@
     if (cl->has_passed_slp()) {
       uint slp_max_unroll_factor = cl->slp_max_unroll();
       if (slp_max_unroll_factor == max_vlen) {
-        NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte));
+        if (TraceSuperWordLoopUnrollAnalysis) {
+          tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte);
+        }
         // For atomic unrolled loops which are vector mapped, instigate more unrolling.
         cl->set_notpassed_slp();
-        C->set_major_progress();
+        // if vector resources are limited, do not allow additional unrolling
+        if (FLOATPRESSURE > 8) {
+          C->set_major_progress();
+        }
         cl->mark_do_unroll_only();
       }
     }
@@ -2650,10 +2654,10 @@
     }
     ct++;
   } while (again);
-#ifndef PRODUCT
-  if (TraceSuperWord && Verbose)
+
+  if (TraceSuperWord && Verbose) {
     tty->print_cr("compute_max_depth iterated: %d times", ct);
-#endif
+  }
 }
 
 //-------------------------compute_vector_element_type-----------------------
@@ -2664,10 +2668,9 @@
 // Normally the type of the add is integer, but for packed character
 // operations the type of the add needs to be char.
 void SuperWord::compute_vector_element_type() {
-#ifndef PRODUCT
-  if (TraceSuperWord && Verbose)
+  if (TraceSuperWord && Verbose) {
     tty->print_cr("\ncompute_velt_type:");
-#endif
+  }
 
   // Initial type
   for (int i = 0; i < _block.length(); i++) {
@@ -2758,7 +2761,9 @@
   offset     += iv_adjust*p.memory_size();
   int off_rem = offset % vw;
   int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
-  NOT_PRODUCT(if(TraceSuperWord && Verbose) tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);)
+  if (TraceSuperWord && Verbose) {
+    tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);
+  }
   return off_mod;
 }
 
@@ -4046,11 +4051,9 @@
   }//for (int i...
 
   if (_ii_first == -1 || _ii_last == -1) {
-#ifndef PRODUCT
     if (TraceSuperWord && Verbose) {
       tty->print_cr("SuperWord::mark_generations unknown error, something vent wrong");
     }
-#endif
     return -1; // something vent wrong
   }
   // collect nodes in the first and last generations
@@ -4083,11 +4086,9 @@
       }//for
 
       if (found == false) {
-#ifndef PRODUCT
         if (TraceSuperWord && Verbose) {
           tty->print_cr("SuperWord::mark_generations: Cannot build order of iterations - no dependent Store for %d", nd->_idx);
         }
-#endif
         _ii_order.clear();
         return -1;
       }
@@ -4153,11 +4154,10 @@
     return true;
   }
 
-#ifndef PRODUCT
   if (TraceSuperWord && Verbose) {
     tty->print_cr("SuperWord::fix_commutative_inputs: cannot fix node %d", fix->_idx);
   }
-#endif
+
   return false;
 }
 
@@ -4224,11 +4224,9 @@
   for (int i = 0; i < _mem_slice_head.length(); i++) {
     Node* n = _mem_slice_head.at(i);
     if ( !in_bb(n) || !n->is_Phi() || n->bottom_type() != Type::MEMORY) {
-#ifndef PRODUCT
       if (TraceSuperWord && Verbose) {
         tty->print_cr("SuperWord::hoist_loads_in_graph: skipping unexpected node n=%d", n->_idx);
       }
-#endif
       continue;
     }
 
@@ -4275,11 +4273,10 @@
 
   restart(); // invalidate all basic structures, since we rebuilt the graph
 
-#ifndef PRODUCT
   if (TraceSuperWord && Verbose) {
     tty->print_cr("\nSuperWord::hoist_loads_in_graph() the graph was rebuilt, all structures invalidated and need rebuild");
   }
-#endif
+
   return true;
 }

--- a/hotspot/src/share/vm/opto/type.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/type.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -150,6 +150,33 @@
   return bt;
 }
 
+// For two instance arrays of same dimension, return the base element types.
+// Otherwise or if the arrays have different dimensions, return NULL.
+void Type::get_arrays_base_elements(const Type *a1, const Type *a2,
+                                    const TypeInstPtr **e1, const TypeInstPtr **e2) {
+
+  if (e1) *e1 = NULL;
+  if (e2) *e2 = NULL;
+  const TypeAryPtr* a1tap = (a1 == NULL) ? NULL : a1->isa_aryptr();
+  const TypeAryPtr* a2tap = (a2 == NULL) ? NULL : a2->isa_aryptr();
+
+  if (a1tap != NULL && a2tap != NULL) {
+    // Handle multidimensional arrays
+    const TypePtr* a1tp = a1tap->elem()->make_ptr();
+    const TypePtr* a2tp = a2tap->elem()->make_ptr();
+    while (a1tp && a1tp->isa_aryptr() && a2tp && a2tp->isa_aryptr()) {
+      a1tap = a1tp->is_aryptr();
+      a2tap = a2tp->is_aryptr();
+      a1tp = a1tap->elem()->make_ptr();
+      a2tp = a2tap->elem()->make_ptr();
+    }
+    if (a1tp && a1tp->isa_instptr() && a2tp && a2tp->isa_instptr()) {
+      if (e1) *e1 = a1tp->is_instptr();
+      if (e2) *e2 = a2tp->is_instptr();
+    }
+  }
+}
+
 //---------------------------get_typeflow_type---------------------------------
 // Import a type produced by ciTypeFlow.
 const Type* Type::get_typeflow_type(ciType* type) {
@@ -2029,7 +2056,11 @@
 bool TypeAry::interface_vs_oop(const Type *t) const {
   const TypeAry* t_ary = t->is_ary();
   if (t_ary) {
-    return _elem->interface_vs_oop(t_ary->_elem);
+    const TypePtr* this_ptr = _elem->make_ptr(); // In case we have narrow_oops
+    const TypePtr*    t_ptr = t_ary->_elem->make_ptr();
+    if(this_ptr != NULL && t_ptr != NULL) {
+      return this_ptr->interface_vs_oop(t_ptr);
+    }
   }
   return false;
 }
@@ -3134,8 +3165,17 @@
     // be 'I' or 'j/l/O'.  Thus we'll pick 'j/l/O'.  If this then flows
     // into a Phi which "knows" it's an Interface type we'll have to
     // uplift the type.
-    if (!empty() && ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface())
-      return kills;             // Uplift to interface
+    if (!empty()) {
+      if (ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface()) {
+        return kills;           // Uplift to interface
+      }
+      // Also check for evil cases of 'this' being a class array
+      // and 'kills' expecting an array of interfaces.
+      Type::get_arrays_base_elements(ft, kills, NULL, &ktip);
+      if (ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface()) {
+        return kills;           // Uplift to array of interface
+      }
+    }
 
     return Type::TOP;           // Canonical empty value
   }

--- a/hotspot/src/share/vm/opto/type.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/type.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -368,6 +368,11 @@
     return _const_basic_type[type];
   }
 
+  // For two instance arrays of same dimension, return the base element types.
+  // Otherwise or if the arrays have different dimensions, return NULL.
+  static void get_arrays_base_elements(const Type *a1, const Type *a2,
+                                       const TypeInstPtr **e1, const TypeInstPtr **e2);
+
   // Mapping to the array element's basic type.
   BasicType array_element_basic_type() const;

--- a/hotspot/src/share/vm/opto/vectornode.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/opto/vectornode.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -188,7 +188,7 @@
       (vlen > 1) && is_power_of_2(vlen) &&
       Matcher::vector_size_supported(bt, vlen)) {
     int vopc = VectorNode::opcode(opc, bt);
-    return vopc > 0 && Matcher::match_rule_supported(vopc) && (vopc != Op_CMoveD || vlen == 4);
+    return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
   }
   return false;
 }

--- a/hotspot/src/share/vm/prims/jni.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/prims/jni.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -3878,7 +3878,7 @@
   unit_test_function_call
 
 // Forward declaration
-void TestNmethodBucket_test();
+void TestDependencyContext_test();
 void test_semaphore();
 void TestOS_test();
 void TestReservedSpace_test();
@@ -3902,7 +3902,7 @@
 void TestBufferingOopClosure_test();
 void TestCodeCacheRemSet_test();
 void FreeRegionList_test();
-void test_memset_with_concurrent_readers();
+void test_memset_with_concurrent_readers() NOT_DEBUG_RETURN;
 void TestPredictions_test();
 void WorkerDataArray_test();
 #endif
@@ -3910,7 +3910,7 @@
 void execute_internal_vm_tests() {
   if (ExecuteInternalVMTests) {
     tty->print_cr("Running internal VM tests");
-    run_unit_test(TestNmethodBucket_test());
+    run_unit_test(TestDependencyContext_test());
     run_unit_test(test_semaphore());
     run_unit_test(TestOS_test());
     run_unit_test(TestReservedSpace_test());

--- a/hotspot/src/share/vm/prims/methodHandles.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/prims/methodHandles.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -27,6 +27,7 @@
 #include "classfile/stringTable.hpp"
 #include "code/codeCache.hpp"
 #include "code/codeCacheExtensions.hpp"
+#include "code/dependencyContext.hpp"
 #include "compiler/compileBroker.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/oopMapCache.hpp"
@@ -945,30 +946,33 @@
   return rfill + overflow;
 }
 
+// Is it safe to remove stale entries from a dependency list?
+static bool safe_to_expunge() {
+  // Since parallel GC threads can concurrently iterate over a dependency
+  // list during safepoint, it is safe to remove entries only when
+  // CodeCache lock is held.
+  return CodeCache_lock->owned_by_self();
+}
+
 void MethodHandles::add_dependent_nmethod(oop call_site, nmethod* nm) {
   assert_locked_or_safepoint(CodeCache_lock);
 
   oop context = java_lang_invoke_CallSite::context(call_site);
-  nmethodBucket* deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context);
-
-  nmethodBucket* new_deps = nmethodBucket::add_dependent_nmethod(deps, nm);
-  if (deps != new_deps) {
-    java_lang_invoke_MethodHandleNatives_CallSiteContext::set_vmdependencies(context, new_deps);
-  }
+  DependencyContext deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context);
+  // Try to purge stale entries on updates.
+  // Since GC doesn't clean dependency contexts rooted at CallSiteContext objects,
+  // in order to avoid memory leak, stale entries are purged whenever a dependency list
+  // is changed (both on addition and removal). Though memory reclamation is delayed,
+  // it avoids indefinite memory usage growth.
+  deps.add_dependent_nmethod(nm, /*expunge_stale_entries=*/safe_to_expunge());
 }
 
 void MethodHandles::remove_dependent_nmethod(oop call_site, nmethod* nm) {
   assert_locked_or_safepoint(CodeCache_lock);
 
   oop context = java_lang_invoke_CallSite::context(call_site);
-  nmethodBucket* deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context);
-
-  if (nmethodBucket::remove_dependent_nmethod(deps, nm)) {
-    nmethodBucket* new_deps = nmethodBucket::clean_dependent_nmethods(deps);
-    if (deps != new_deps) {
-      java_lang_invoke_MethodHandleNatives_CallSiteContext::set_vmdependencies(context, new_deps);
-    }
-  }
+  DependencyContext deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context);
+  deps.remove_dependent_nmethod(nm, /*expunge_stale_entries=*/safe_to_expunge());
 }
 
 void MethodHandles::flush_dependent_nmethods(Handle call_site, Handle target) {
@@ -977,21 +981,15 @@
   int marked = 0;
   CallSiteDepChange changes(call_site(), target());
   {
+    No_Safepoint_Verifier nsv;
     MutexLockerEx mu2(CodeCache_lock, Mutex::_no_safepoint_check_flag);
 
     oop context = java_lang_invoke_CallSite::context(call_site());
-    nmethodBucket* deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context);
-
-    marked = nmethodBucket::mark_dependent_nmethods(deps, changes);
-    if (marked > 0) {
-      nmethodBucket* new_deps = nmethodBucket::clean_dependent_nmethods(deps);
-      if (deps != new_deps) {
-        java_lang_invoke_MethodHandleNatives_CallSiteContext::set_vmdependencies(context, new_deps);
-      }
-    }
+    DependencyContext deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context);
+    marked = deps.mark_dependent_nmethods(changes);
   }
   if (marked > 0) {
-    // At least one nmethod has been marked for deoptimization
+    // At least one nmethod has been marked for deoptimization.
     VM_Deoptimize op;
     VMThread::execute(&op);
   }
@@ -1331,6 +1329,8 @@
 }
 JVM_END
 
+// It is called by a Cleaner object which ensures that dropped CallSites properly
+// deallocate their dependency information.
 JVM_ENTRY(void, MHN_clearCallSiteContext(JNIEnv* env, jobject igcls, jobject context_jh)) {
   Handle context(THREAD, JNIHandles::resolve_non_null(context_jh));
   {
@@ -1339,19 +1339,11 @@
 
     int marked = 0;
     {
+      No_Safepoint_Verifier nsv;
       MutexLockerEx mu2(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-      nmethodBucket* b = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context());
-      while(b != NULL) {
-        nmethod* nm = b->get_nmethod();
-        if (b->count() > 0 && nm->is_alive() && !nm->is_marked_for_deoptimization()) {
-          nm->mark_for_deoptimization();
-          marked++;
-        }
-        nmethodBucket* next = b->next();
-        delete b;
-        b = next;
-      }
-      java_lang_invoke_MethodHandleNatives_CallSiteContext::set_vmdependencies(context(), NULL); // reset context
+      assert(safe_to_expunge(), "removal is not safe");
+      DependencyContext deps = java_lang_invoke_MethodHandleNatives_CallSiteContext::vmdependencies(context());
+      marked = deps.remove_all_dependents();
     }
     if (marked > 0) {
       // At least one nmethod has been marked for deoptimization

--- a/hotspot/src/share/vm/runtime/arguments.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -459,7 +459,7 @@
   return flag_name;
 }
 
-#ifndef PRODUCT
+#ifdef ASSERT
 static bool lookup_special_flag(const char *flag_name, size_t skip_index) {
   for (size_t i = 0; special_jvm_flags[i].name != NULL; i++) {
     if ((i != skip_index) && (strcmp(special_jvm_flags[i].name, flag_name) == 0)) {
@@ -1468,24 +1468,6 @@
   // Enable SegmentedCodeCache if TieredCompilation is enabled and ReservedCodeCacheSize >= 240M
   if (FLAG_IS_DEFAULT(SegmentedCodeCache) && ReservedCodeCacheSize >= 240*M) {
     FLAG_SET_ERGO(bool, SegmentedCodeCache, true);
-
-    if (FLAG_IS_DEFAULT(ReservedCodeCacheSize)) {
-      // Multiply sizes by 5 but fix NonNMethodCodeHeapSize (distribute among non-profiled and profiled code heap)
-      if (FLAG_IS_DEFAULT(ProfiledCodeHeapSize)) {
-        FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, ProfiledCodeHeapSize * 5 + NonNMethodCodeHeapSize * 2);
-      }
-      if (FLAG_IS_DEFAULT(NonProfiledCodeHeapSize)) {
-        FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, NonProfiledCodeHeapSize * 5 + NonNMethodCodeHeapSize * 2);
-      }
-      // Check consistency of code heap sizes
-      if ((NonNMethodCodeHeapSize + NonProfiledCodeHeapSize + ProfiledCodeHeapSize) != ReservedCodeCacheSize) {
-        jio_fprintf(defaultStream::error_stream(),
-                    "Invalid code heap sizes: NonNMethodCodeHeapSize(%dK) + ProfiledCodeHeapSize(%dK) + NonProfiledCodeHeapSize(%dK) = %dK. Must be equal to ReservedCodeCacheSize = %uK.\n",
-                    NonNMethodCodeHeapSize/K, ProfiledCodeHeapSize/K, NonProfiledCodeHeapSize/K,
-                    (NonNMethodCodeHeapSize + ProfiledCodeHeapSize + NonProfiledCodeHeapSize)/K, ReservedCodeCacheSize/K);
-        vm_exit(1);
-      }
-    }
   }
   if (!UseInterpreter) { // -Xcomp
     Tier3InvokeNotifyFreqLog = 0;
@@ -2535,18 +2517,11 @@
                 "Invalid ReservedCodeCacheSize=%dM. Must be at most %uM.\n", ReservedCodeCacheSize/M,
                 CODE_CACHE_SIZE_LIMIT/M);
     status = false;
-  } else if (NonNMethodCodeHeapSize < min_code_cache_size){
+  } else if (NonNMethodCodeHeapSize < min_code_cache_size) {
     jio_fprintf(defaultStream::error_stream(),
                 "Invalid NonNMethodCodeHeapSize=%dK. Must be at least %uK.\n", NonNMethodCodeHeapSize/K,
                 min_code_cache_size/K);
     status = false;
-  } else if ((!FLAG_IS_DEFAULT(NonNMethodCodeHeapSize) || !FLAG_IS_DEFAULT(ProfiledCodeHeapSize) || !FLAG_IS_DEFAULT(NonProfiledCodeHeapSize))
-             && (NonNMethodCodeHeapSize + NonProfiledCodeHeapSize + ProfiledCodeHeapSize) != ReservedCodeCacheSize) {
-    jio_fprintf(defaultStream::error_stream(),
-                "Invalid code heap sizes: NonNMethodCodeHeapSize(%dK) + ProfiledCodeHeapSize(%dK) + NonProfiledCodeHeapSize(%dK) = %dK. Must be equal to ReservedCodeCacheSize = %uK.\n",
-                NonNMethodCodeHeapSize/K, ProfiledCodeHeapSize/K, NonProfiledCodeHeapSize/K,
-                (NonNMethodCodeHeapSize + ProfiledCodeHeapSize + NonProfiledCodeHeapSize)/K, ReservedCodeCacheSize/K);
-    status = false;
   }
 
   if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) {

--- a/hotspot/src/share/vm/runtime/deoptimization.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/runtime/deoptimization.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -68,7 +68,8 @@
                                          int  number_of_frames,
                                          intptr_t* frame_sizes,
                                          address* frame_pcs,
-                                         BasicType return_type) {
+                                         BasicType return_type,
+                                         int exec_mode) {
   _size_of_deoptimized_frame = size_of_deoptimized_frame;
   _caller_adjustment         = caller_adjustment;
   _caller_actual_parameters  = caller_actual_parameters;
@@ -80,10 +81,11 @@
   _initial_info              = 0;
   // PD (x86 only)
   _counter_temp              = 0;
-  _unpack_kind               = 0;
+  _unpack_kind               = exec_mode;
   _sender_sp_temp            = 0;
 
   _total_frame_sizes         = size_of_frames();
+  assert(exec_mode >= 0 && exec_mode < Unpack_LIMIT, "Unexpected exec_mode");
 }
 
 
@@ -128,7 +130,7 @@
 // ResetNoHandleMark and HandleMark were removed from it. The actual reallocation
 // of previously eliminated objects occurs in realloc_objects, which is
 // called from the method fetch_unroll_info_helper below.
-JRT_BLOCK_ENTRY(Deoptimization::UnrollBlock*, Deoptimization::fetch_unroll_info(JavaThread* thread))
+JRT_BLOCK_ENTRY(Deoptimization::UnrollBlock*, Deoptimization::fetch_unroll_info(JavaThread* thread, int exec_mode))
   // It is actually ok to allocate handles in a leaf method. It causes no safepoints,
   // but makes the entry a little slower. There is however a little dance we have to
   // do in debug mode to get around the NoHandleMark code in the JRT_LEAF macro
@@ -142,12 +144,12 @@
   }
   thread->inc_in_deopt_handler();
 
-  return fetch_unroll_info_helper(thread);
+  return fetch_unroll_info_helper(thread, exec_mode);
 JRT_END
 
 
 // This is factored, since it is both called from a JRT_LEAF (deoptimization) and a JRT_ENTRY (uncommon_trap)
-Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread* thread) {
+Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread* thread, int exec_mode) {
 
   // Note: there is a safepoint safety issue here. No matter whether we enter
   // via vanilla deopt or uncommon trap we MUST NOT stop at a safepoint once
@@ -186,6 +188,19 @@
   assert(vf->is_compiled_frame(), "Wrong frame type");
   chunk->push(compiledVFrame::cast(vf));
 
+  ScopeDesc* trap_scope = chunk->at(0)->scope();
+  Handle exceptionObject;
+  if (trap_scope->rethrow_exception()) {
+    if (PrintDeoptimizationDetails) {
+      tty->print_cr("Exception to be rethrown in the interpreter for method %s::%s at bci %d", trap_scope->method()->method_holder()->name()->as_C_string(), trap_scope->method()->name()->as_C_string(), trap_scope->bci());
+    }
+    GrowableArray<ScopeValue*>* expressions = trap_scope->expressions();
+    guarantee(expressions != NULL && expressions->length() > 0, "must have exception to throw");
+    ScopeValue* topOfStack = expressions->top();
+    exceptionObject = StackValue::create_stack_value(&deoptee, &map, topOfStack)->get_obj();
+    assert(exceptionObject() != NULL, "exception oop can not be null");
+  }
+
   bool realloc_failures = false;
 
 #if defined(COMPILER2) || INCLUDE_JVMCI
@@ -474,13 +489,21 @@
   assert(CodeCache::find_blob_unsafe(frame_pcs[0]) != NULL, "bad pc");
 #endif // SHARK
 
+#ifdef INCLUDE_JVMCI
+  if (exceptionObject() != NULL) {
+    thread->set_exception_oop(exceptionObject());
+    exec_mode = Unpack_exception;
+  }
+#endif
+
   UnrollBlock* info = new UnrollBlock(array->frame_size() * BytesPerWord,
                                       caller_adjustment * BytesPerWord,
                                       caller_was_method_handle ? 0 : callee_parameters,
                                       number_of_frames,
                                       frame_sizes,
                                       frame_pcs,
-                                      return_type);
+                                      return_type,
+                                      exec_mode);
   // On some platforms, we need a way to pass some platform dependent
   // information to the unpacking code so the skeletal frames come out
   // correct (initial fp value, unextended sp, ...)
@@ -1495,18 +1518,6 @@
 #endif
 
     Bytecodes::Code trap_bc     = trap_method->java_code_at(trap_bci);
-
-    if (trap_scope->rethrow_exception()) {
-      if (PrintDeoptimizationDetails) {
-        tty->print_cr("Exception to be rethrown in the interpreter for method %s::%s at bci %d", trap_method->method_holder()->name()->as_C_string(), trap_method->name()->as_C_string(), trap_bci);
-      }
-      GrowableArray<ScopeValue*>* expressions = trap_scope->expressions();
-      guarantee(expressions != NULL, "must have exception to throw");
-      ScopeValue* topOfStack = expressions->top();
-      Handle topOfStackObj = StackValue::create_stack_value(&fr, &reg_map, topOfStack)->get_obj();
-      THREAD->set_pending_exception(topOfStackObj(), NULL, 0);
-    }
-
     // Record this event in the histogram.
     gather_statistics(reason, action, trap_bc);
 
@@ -1985,7 +1996,7 @@
                            ignore_maybe_prior_recompile);
 }
 
-Deoptimization::UnrollBlock* Deoptimization::uncommon_trap(JavaThread* thread, jint trap_request) {
+Deoptimization::UnrollBlock* Deoptimization::uncommon_trap(JavaThread* thread, jint trap_request, jint exec_mode) {
   if (TraceDeoptimization) {
     tty->print("Uncommon trap ");
   }
@@ -1994,7 +2005,7 @@
     // This enters VM and may safepoint
     uncommon_trap_inner(thread, trap_request);
   }
-  return fetch_unroll_info_helper(thread);
+  return fetch_unroll_info_helper(thread, exec_mode);
 }
 
 // Local derived constants.

--- a/hotspot/src/share/vm/runtime/deoptimization.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/runtime/deoptimization.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -123,7 +123,8 @@
     Unpack_deopt                = 0, // normal deoptimization, use pc computed in unpack_vframe_on_stack
     Unpack_exception            = 1, // exception is pending
     Unpack_uncommon_trap        = 2, // redo last byte code (C2 only)
-    Unpack_reexecute            = 3  // reexecute bytecode (C1 only)
+    Unpack_reexecute            = 3, // reexecute bytecode (C1 only)
+    Unpack_LIMIT                = 4
   };
 
   // Checks all compiled methods. Invalid methods are deleted and
@@ -179,13 +180,13 @@
     intptr_t  _initial_info;              // Platform dependent data for the sender frame (was FP on x86)
     int       _caller_actual_parameters;  // The number of actual arguments at the
                                           // interpreted caller of the deoptimized frame
+    int       _unpack_kind;               // exec_mode that can be changed during fetch_unroll_info
 
     // The following fields are used as temps during the unpacking phase
     // (which is tight on registers, especially on x86). They really ought
     // to be PD variables but that involves moving this class into its own
     // file to use the pd include mechanism. Maybe in a later cleanup ...
     intptr_t  _counter_temp;              // SHOULD BE PD VARIABLE (x86 frame count temp)
-    intptr_t  _unpack_kind;               // SHOULD BE PD VARIABLE (x86 unpack kind)
     intptr_t  _sender_sp_temp;            // SHOULD BE PD VARIABLE (x86 sender_sp)
    public:
     // Constructor
@@ -195,7 +196,8 @@
                 int  number_of_frames,
                 intptr_t* frame_sizes,
                 address* frames_pcs,
-                BasicType return_type);
+                BasicType return_type,
+                int unpack_kind);
     ~UnrollBlock();
 
     // Returns where a register is located.
@@ -205,6 +207,7 @@
     intptr_t* frame_sizes()  const { return _frame_sizes; }
     int number_of_frames()  const { return _number_of_frames; }
     address*  frame_pcs()   const { return _frame_pcs ; }
+    int  unpack_kind()   const { return _unpack_kind; }
 
     // Returns the total size of frames
     int size_of_frames() const;
@@ -237,7 +240,7 @@
   // deoptimized frame.
   // @argument thread.     Thread where stub_frame resides.
   // @see OptoRuntime::deoptimization_fetch_unroll_info_C
-  static UnrollBlock* fetch_unroll_info(JavaThread* thread);
+  static UnrollBlock* fetch_unroll_info(JavaThread* thread, int exec_mode);
 
   //** Unpacks vframeArray onto execution stack
   // Called by assembly stub after execution has returned to
@@ -262,7 +265,7 @@
 
   //** Performs an uncommon trap for compiled code.
   // The top most compiler frame is converted into interpreter frames
-  static UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
+  static UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode);
   // Helper routine that enters the VM and may block
   static void uncommon_trap_inner(JavaThread* thread, jint unloaded_class_index);
 
@@ -423,7 +426,7 @@
   static void load_class_by_index(const constantPoolHandle& constant_pool, int index, TRAPS);
   static void load_class_by_index(const constantPoolHandle& constant_pool, int index);
 
-  static UnrollBlock* fetch_unroll_info_helper(JavaThread* thread);
+  static UnrollBlock* fetch_unroll_info_helper(JavaThread* thread, int exec_mode);
 
   static DeoptAction _unloaded_action; // == Action_reinterpret;
   static const char* _trap_reason_name[];

--- a/hotspot/src/share/vm/runtime/globals.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/runtime/globals.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -528,59 +528,57 @@
 // 4991491 do not "optimize out" the was_set false values: omitting them
 // tickles a Microsoft compiler bug causing flagTable to be malformed
 
-#define NAME(name) NOT_PRODUCT(&name) PRODUCT_ONLY(&CONST_##name)
+#define RUNTIME_PRODUCT_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT) },
+#define RUNTIME_PD_PRODUCT_FLAG_STRUCT(  type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
+#define RUNTIME_DIAGNOSTIC_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DIAGNOSTIC) },
+#define RUNTIME_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_EXPERIMENTAL) },
+#define RUNTIME_MANAGEABLE_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_MANAGEABLE) },
+#define RUNTIME_PRODUCT_RW_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_READ_WRITE) },
+#define RUNTIME_DEVELOP_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP) },
+#define RUNTIME_PD_DEVELOP_FLAG_STRUCT(  type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
+#define RUNTIME_NOTPRODUCT_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_NOT_PRODUCT) },
 
-#define RUNTIME_PRODUCT_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT) },
-#define RUNTIME_PD_PRODUCT_FLAG_STRUCT(  type, name,        doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
-#define RUNTIME_DIAGNOSTIC_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DIAGNOSTIC) },
-#define RUNTIME_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_EXPERIMENTAL) },
-#define RUNTIME_MANAGEABLE_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_MANAGEABLE) },
-#define RUNTIME_PRODUCT_RW_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_READ_WRITE) },
-#define RUNTIME_DEVELOP_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP) },
-#define RUNTIME_PD_DEVELOP_FLAG_STRUCT(  type, name,        doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
-#define RUNTIME_NOTPRODUCT_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_NOT_PRODUCT) },
-
-#define JVMCI_PRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT) },
-#define JVMCI_PD_PRODUCT_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
-#define JVMCI_DEVELOP_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP) },
-#define JVMCI_PD_DEVELOP_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
-#define JVMCI_DIAGNOSTIC_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DIAGNOSTIC) },
-#define JVMCI_EXPERIMENTAL_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_EXPERIMENTAL) },
-#define JVMCI_NOTPRODUCT_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_NOT_PRODUCT) },
+#define JVMCI_PRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT) },
+#define JVMCI_PD_PRODUCT_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
+#define JVMCI_DIAGNOSTIC_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DIAGNOSTIC) },
+#define JVMCI_EXPERIMENTAL_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_EXPERIMENTAL) },
+#define JVMCI_DEVELOP_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP) },
+#define JVMCI_PD_DEVELOP_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
+#define JVMCI_NOTPRODUCT_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_NOT_PRODUCT) },
 
 #ifdef _LP64
-#define RUNTIME_LP64_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_LP64_PRODUCT) },
+#define RUNTIME_LP64_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_LP64_PRODUCT) },
 #else
 #define RUNTIME_LP64_PRODUCT_FLAG_STRUCT(type, name, value, doc) /* flag is constant */
 #endif // _LP64
 
-#define C1_PRODUCT_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT) },
-#define C1_PD_PRODUCT_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
-#define C1_DIAGNOSTIC_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DIAGNOSTIC) },
-#define C1_DEVELOP_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP) },
-#define C1_PD_DEVELOP_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
-#define C1_NOTPRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_NOT_PRODUCT) },
+#define C1_PRODUCT_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT) },
+#define C1_PD_PRODUCT_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
+#define C1_DIAGNOSTIC_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DIAGNOSTIC) },
+#define C1_DEVELOP_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP) },
+#define C1_PD_DEVELOP_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
+#define C1_NOTPRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_NOT_PRODUCT) },
 
-#define C2_PRODUCT_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT) },
-#define C2_PD_PRODUCT_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
-#define C2_DIAGNOSTIC_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DIAGNOSTIC) },
-#define C2_EXPERIMENTAL_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_EXPERIMENTAL) },
-#define C2_DEVELOP_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP) },
-#define C2_PD_DEVELOP_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
-#define C2_NOTPRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_NOT_PRODUCT) },
+#define C2_PRODUCT_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT) },
+#define C2_PD_PRODUCT_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
+#define C2_DIAGNOSTIC_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DIAGNOSTIC) },
+#define C2_EXPERIMENTAL_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_EXPERIMENTAL) },
+#define C2_DEVELOP_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP) },
+#define C2_PD_DEVELOP_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
+#define C2_NOTPRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_NOT_PRODUCT) },
 
-#define ARCH_PRODUCT_FLAG_STRUCT(        type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_PRODUCT) },
-#define ARCH_DIAGNOSTIC_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DIAGNOSTIC) },
-#define ARCH_EXPERIMENTAL_FLAG_STRUCT(   type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_EXPERIMENTAL) },
-#define ARCH_DEVELOP_FLAG_STRUCT(        type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DEVELOP) },
-#define ARCH_NOTPRODUCT_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_NOT_PRODUCT) },
+#define ARCH_PRODUCT_FLAG_STRUCT(        type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_PRODUCT) },
+#define ARCH_DIAGNOSTIC_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DIAGNOSTIC) },
+#define ARCH_EXPERIMENTAL_FLAG_STRUCT(   type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_EXPERIMENTAL) },
+#define ARCH_DEVELOP_FLAG_STRUCT(        type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DEVELOP) },
+#define ARCH_NOTPRODUCT_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_NOT_PRODUCT) },
 
-#define SHARK_PRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT) },
-#define SHARK_PD_PRODUCT_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
-#define SHARK_DIAGNOSTIC_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DIAGNOSTIC) },
-#define SHARK_DEVELOP_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP) },
-#define SHARK_PD_DEVELOP_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
-#define SHARK_NOTPRODUCT_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_NOT_PRODUCT) },
+#define SHARK_PRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT) },
+#define SHARK_PD_PRODUCT_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
+#define SHARK_DIAGNOSTIC_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DIAGNOSTIC) },
+#define SHARK_DEVELOP_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP) },
+#define SHARK_PD_DEVELOP_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
+#define SHARK_NOTPRODUCT_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_NOT_PRODUCT) },
 
 static Flag flagTable[] = {
  RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, \

--- a/hotspot/src/share/vm/runtime/globals.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -1126,9 +1126,6 @@
   diagnostic(bool, PrintNMethods, false,                                    \
           "Print assembly code for nmethods when generated")                \
                                                                             \
-  diagnostic(intx, PrintNMethodsAtLevel, -1,                                \
-          "Only print code for nmethods at the given compilation level")    \
-                                                                            \
   diagnostic(bool, PrintNativeNMethods, false,                              \
           "Print assembly code for native nmethods when generated")         \
                                                                             \
@@ -3554,7 +3551,7 @@
                                                                             \
   product_pd(intx, CompilerThreadStackSize,                                 \
           "Compiler Thread Stack Size (in Kbytes)")                         \
-          range(0, max_intx)                                                \
+          range(0, max_intx /(1 * K))                                       \
                                                                             \
   develop_pd(size_t, JVMInvokeMethodSlack,                                  \
           "Stack space (bytes) required for JVM_InvokeMethod to complete")  \
@@ -4283,9 +4280,9 @@
 #define DECLARE_MANAGEABLE_FLAG(type, name, value, doc)   extern "C" type name;
 #define DECLARE_PRODUCT_RW_FLAG(type, name, value, doc)   extern "C" type name;
 #ifdef PRODUCT
-#define DECLARE_DEVELOPER_FLAG(type, name, value, doc)    extern "C" type CONST_##name; const type name = value;
-#define DECLARE_PD_DEVELOPER_FLAG(type, name, doc)        extern "C" type CONST_##name; const type name = pd_##name;
-#define DECLARE_NOTPRODUCT_FLAG(type, name, value, doc)   extern "C" type CONST_##name;
+#define DECLARE_DEVELOPER_FLAG(type, name, value, doc)    const type name = value;
+#define DECLARE_PD_DEVELOPER_FLAG(type, name, doc)        const type name = pd_##name;
+#define DECLARE_NOTPRODUCT_FLAG(type, name, value, doc)   const type name = value;
 #else
 #define DECLARE_DEVELOPER_FLAG(type, name, value, doc)    extern "C" type name;
 #define DECLARE_PD_DEVELOPER_FLAG(type, name, doc)        extern "C" type name;
@@ -4306,9 +4303,9 @@
 #define MATERIALIZE_MANAGEABLE_FLAG(type, name, value, doc)   type name = value;
 #define MATERIALIZE_PRODUCT_RW_FLAG(type, name, value, doc)   type name = value;
 #ifdef PRODUCT
-#define MATERIALIZE_DEVELOPER_FLAG(type, name, value, doc)    type CONST_##name = value;
-#define MATERIALIZE_PD_DEVELOPER_FLAG(type, name, doc)        type CONST_##name = pd_##name;
-#define MATERIALIZE_NOTPRODUCT_FLAG(type, name, value, doc)   type CONST_##name = value;
+#define MATERIALIZE_DEVELOPER_FLAG(type, name, value, doc)
+#define MATERIALIZE_PD_DEVELOPER_FLAG(type, name, doc)
+#define MATERIALIZE_NOTPRODUCT_FLAG(type, name, value, doc)
 #else
 #define MATERIALIZE_DEVELOPER_FLAG(type, name, value, doc)    type name = value;
 #define MATERIALIZE_PD_DEVELOPER_FLAG(type, name, doc)        type name = pd_##name;

--- a/hotspot/src/share/vm/runtime/init.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/runtime/init.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -72,6 +72,7 @@
 void InlineCacheBuffer_init();
 void compilerOracle_init();
 bool compileBroker_init();
+void dependencyContext_init();
 
 // Initialization after compiler initialization
 bool universe_post_init();  // must happen after compiler_init
@@ -131,6 +132,8 @@
   vtableStubs_init();
   InlineCacheBuffer_init();
   compilerOracle_init();
+  dependencyContext_init();
+
   if (!compileBroker_init()) {
     return JNI_EINVAL;
   }

--- a/hotspot/src/share/vm/runtime/perfData.hpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/runtime/perfData.hpp	Tue Nov 24 10:30:23 2015 +0100
@@ -424,6 +424,7 @@
   public:
     inline void inc() { (*(jlong*)_valuep)++; }
     inline void inc(jlong val) { (*(jlong*)_valuep) += val; }
+    inline void dec(jlong val) { inc(-val); }
     inline void add(jlong val) { (*(jlong*)_valuep) += val; }
     void clear_sample_helper() { _sample_helper = NULL; }
 };

--- a/hotspot/src/share/vm/runtime/sweeper.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/runtime/sweeper.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -297,7 +297,7 @@
 void NMethodSweeper::handle_safepoint_request() {
   if (SafepointSynchronize::is_synchronizing()) {
     if (PrintMethodFlushing && Verbose) {
-      tty->print_cr("### Sweep at %d out of %d, yielding to safepoint", _seen, CodeCache::nof_nmethods());
+      tty->print_cr("### Sweep at %d out of %d, yielding to safepoint", _seen, CodeCache::nmethod_count());
     }
     MutexUnlockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
 
@@ -401,7 +401,7 @@
   int flushed_c2_count     = 0;
 
   if (PrintMethodFlushing && Verbose) {
-    tty->print_cr("### Sweep at %d out of %d", _seen, CodeCache::nof_nmethods());
+    tty->print_cr("### Sweep at %d out of %d", _seen, CodeCache::nmethod_count());
   }
 
   int swept_count = 0;

--- a/hotspot/src/share/vm/runtime/vmStructs.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -343,10 +343,6 @@
   nonstatic_field(InstanceKlass,               _methods_jmethod_ids,                          jmethodID*)                            \
   volatile_nonstatic_field(InstanceKlass,      _idnum_allocated_count,                        u2)                                    \
   nonstatic_field(InstanceKlass,               _annotations,                                  Annotations*)                          \
-  nonstatic_field(InstanceKlass,               _dependencies,                                 nmethodBucket*)                        \
-  nonstatic_field(nmethodBucket,               _nmethod,                                      nmethod*)                              \
-  nonstatic_field(nmethodBucket,               _count,                                        int)                                   \
-  nonstatic_field(nmethodBucket,               _next,                                         nmethodBucket*)                        \
   nonstatic_field(InstanceKlass,               _method_ordering,                              Array<int>*)                           \
   nonstatic_field(InstanceKlass,               _default_vtable_indices,                       Array<int>*)                           \
   nonstatic_field(Klass,                       _super_check_offset,                           juint)                                 \
@@ -969,6 +965,7 @@
   nonstatic_field(Deoptimization::UnrollBlock, _caller_adjustment,                            int)                                   \
   nonstatic_field(Deoptimization::UnrollBlock, _number_of_frames,                             int)                                   \
   nonstatic_field(Deoptimization::UnrollBlock, _total_frame_sizes,                            int)                                   \
+  nonstatic_field(Deoptimization::UnrollBlock, _unpack_kind,                                  int)                                   \
   nonstatic_field(Deoptimization::UnrollBlock, _frame_sizes,                                  intptr_t*)                             \
   nonstatic_field(Deoptimization::UnrollBlock, _frame_pcs,                                    address*)                              \
   nonstatic_field(Deoptimization::UnrollBlock, _register_block,                               intptr_t*)                             \
@@ -1550,7 +1547,6 @@
   declare_toplevel_type(volatile Metadata*)                               \
                                                                           \
   declare_toplevel_type(DataLayout)                                       \
-  declare_toplevel_type(nmethodBucket)                                    \
                                                                           \
   /********/                                                              \
   /* Oops */                                                              \

--- a/hotspot/src/share/vm/shark/sharkRuntime.cpp	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/src/share/vm/shark/sharkRuntime.cpp	Tue Nov 24 10:30:23 2015 +0100
@@ -213,8 +213,9 @@
   // Initiate the trap
   thread->set_last_Java_frame();
   Deoptimization::UnrollBlock *urb =
-    Deoptimization::uncommon_trap(thread, trap_request);
+    Deoptimization::uncommon_trap(thread, trap_request, Deoptimization::Unpack_uncommon_trap);
   thread->reset_last_Java_frame();
+  assert(urb->unpack_kind() == Deoptimization::Unpack_uncommon_trap, "expected Unpack_uncommon_trap");
 
   // Pop our dummy frame and the frame being deoptimized
   thread->pop_zero_frame();

--- a/hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java	Tue Nov 24 10:30:23 2015 +0100
@@ -25,7 +25,7 @@
  * @test
  * @bug 8072016
  * @summary Infinite deoptimization/recompilation cycles in case of arraycopy with tightly coupled allocation
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.base/sun.misc
  *          java.management
  * @build TestArrayCopyNoInitDeopt
@@ -42,6 +42,7 @@
 import sun.hotspot.code.NMethod;
 import jdk.test.lib.Platform;
 import java.lang.reflect.*;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class TestArrayCopyNoInitDeopt {

--- a/hotspot/test/compiler/floatingpoint/TestPow2.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/floatingpoint/TestPow2.java	Tue Nov 24 10:30:23 2015 +0100
@@ -25,7 +25,7 @@
  * @test
  * @bug 8063086
  * @summary X^2 special case for C2 yields different result than interpreter
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.management
  * @build TestPow2
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
@@ -36,6 +36,7 @@
 
 import java.lang.reflect.*;
 import sun.hotspot.WhiteBox;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class TestPow2 {

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/inlining/InlineAccessors.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+/**
+ * @test
+ * @bug 8140650
+ * @summary Method::is_accessor should cover getters and setters for all types
+ * @library /testlibrary
+ * @run main/othervm InlineAccessors
+ */
+import java.lang.invoke.*;
+import jdk.test.lib.*;
+import static jdk.test.lib.Asserts.*;
+
+public class InlineAccessors {
+    public static void main(String[] args) throws Exception {
+        // try some sanity checks first
+        doTest();
+
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+                "-XX:+IgnoreUnrecognizedVMOptions", "-showversion",
+                "-server", "-XX:-TieredCompilation", "-Xbatch", "-Xcomp",
+                "-XX:+PrintCompilation", "-XX:+UnlockDiagnosticVMOptions", "-XX:+PrintInlining",
+                    "InlineAccessors$Launcher");
+
+        OutputAnalyzer analyzer = new OutputAnalyzer(pb.start());
+
+        analyzer.shouldHaveExitValue(0);
+
+        // The test is applicable only to C2 (present in Server VM).
+        if (analyzer.getStderr().contains("Server VM")) {
+            analyzer.shouldContain("InlineAccessors::setBool (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setByte (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setChar (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setShort (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setInt (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setFloat (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setLong (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setDouble (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setObject (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setArray (6 bytes)   accessor");
+
+            analyzer.shouldContain("InlineAccessors::getBool (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getByte (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getChar (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getShort (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getInt (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getFloat (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getLong (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getDouble (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getObject (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getArray (5 bytes)   accessor");
+        }
+    }
+
+    boolean bool;
+    byte b;
+    char c;
+    short s;
+    int i;
+    float f;
+    long l;
+    double d;
+    Object o;
+    Object[] a;
+
+    public void setBool(boolean v)   { bool = v; }
+    public void setByte(byte v)      { b = v; }
+    public void setChar(char v)      { c = v; }
+    public void setShort(short v)    { s = v; }
+    public void setInt(int v)        { i = v; }
+    public void setFloat(float v)    { f = v; }
+    public void setLong(long v)      { l = v; }
+    public void setDouble(double v)  { d = v; }
+    public void setObject(Object v)  { o = v; }
+    public void setArray(Object[] v) { a = v; }
+
+    public boolean  getBool()        { return bool; }
+    public byte     getByte()        { return b; }
+    public char     getChar()        { return c; }
+    public short    getShort()       { return s; }
+    public int      getInt()         { return i; }
+    public float    getFloat()       { return f; }
+    public long     getLong()        { return l; }
+    public double   getDouble()      { return d; }
+    public Object   getObject()      { return o; }
+    public Object[] getArray()       { return a; }
+
+    static void doTest() {
+        InlineAccessors o = new InlineAccessors();
+        o.setBool(false);
+        o.setByte((byte)0);
+        o.setChar('a');
+        o.setShort((short)0);
+        o.setInt(0);
+        o.setFloat(0F);
+        o.setLong(0L);
+        o.setDouble(0D);
+        o.setObject(new Object());
+        o.setArray(new Object[1]);
+
+        o.getBool();
+        o.getByte();
+        o.getChar();
+        o.getShort();
+        o.getInt();
+        o.getFloat();
+        o.getLong();
+        o.getDouble();
+        o.getObject();
+        o.getArray();
+    }
+
+    static class Launcher {
+        public static void main(String[] args) throws Exception {
+            for (int c = 0; c < 20_000; c++) {
+              doTest();
+            }
+        }
+    }
+}

--- a/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,10 +23,11 @@
 import java.lang.reflect.Executable;
 import java.util.concurrent.Callable;
 import java.util.Objects;
+import compiler.whitebox.CompilerWhiteBoxTest;
 /*
  * @test
  * @bug 8130832
- * @library /testlibrary /test/lib /compiler/whitebox /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox /compiler/testlibrary /
  * @build IntrinsicAvailableTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission

--- a/hotspot/test/compiler/intrinsics/bmi/TestAndnI.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/TestAndnI.java	Tue Nov 24 10:30:23 2015 +0100
@@ -61,15 +61,27 @@
         }
 
         public int intExpr(int src1, Expr.MemI src2) {
-            return ~src1 & src2.value;
+            if (src2 != null) {
+                return ~src1 & src2.value;
+            } else {
+                return 0;
+            }
         }
 
         public int intExpr(Expr.MemI src1, int src2) {
-            return ~src1.value & src2;
+            if (src1 != null) {
+                return ~src1.value & src2;
+            } else {
+                return 0;
+            }
         }
 
         public int intExpr(Expr.MemI src1, Expr.MemI src2) {
-            return ~src1.value & src2.value;
+            if (src1 != null && src2 != null) {
+                return ~src1.value & src2.value;
+            } else {
+                return 0;
+            }
         }
     }
 
@@ -80,15 +92,27 @@
         }
 
         public int intExpr(int src1, Expr.MemI src2) {
-            return src1 & ~src2.value;
+            if (src2 != null) {
+                return src1 & ~src2.value;
+            } else {
+                return 0;
+            }
         }
 
         public int intExpr(Expr.MemI src1, int src2) {
-            return src1.value & ~src2;
+            if (src1 != null) {
+                return src1.value & ~src2;
+            } else {
+                return 0;
+            }
         }
 
         public int intExpr(Expr.MemI src1, Expr.MemI src2) {
-            return src1.value & ~src2.value;
+            if (src1 != null && src2 != null) {
+                return src1.value & ~src2.value;
+            } else {
+                return 0;
+            }
         }
     }
 }

--- a/hotspot/test/compiler/intrinsics/bmi/TestAndnL.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/TestAndnL.java	Tue Nov 24 10:30:23 2015 +0100
@@ -61,15 +61,27 @@
         }
 
         public long longExpr(long src1, Expr.MemL src2) {
-            return ~src1 & src2.value;
+            if (src2 != null) {
+                return ~src1 & src2.value;
+            } else {
+                return 0;
+            }
         }
 
         public long longExpr(Expr.MemL src1, long src2) {
-            return ~src1.value & src2;
+            if (src1 != null) {
+                return ~src1.value & src2;
+            } else {
+                return 0;
+            }
         }
 
         public long longExpr(Expr.MemL src1, Expr.MemL src2) {
-            return ~src1.value & src2.value;
+            if (src1 != null && src2 != null) {
+                return ~src1.value & src2.value;
+            } else {
+                return 0;
+            }
         }
 
 
@@ -82,15 +94,27 @@
         }
 
         public long longExpr(long src1, Expr.MemL src2) {
-            return src1 & ~src2.value;
+            if (src2 != null) {
+                return src1 & ~src2.value;
+            } else {
+                return 0;
+            }
         }
 
         public long longExpr(Expr.MemL src1, long src2) {
-            return src1.value & ~src2;
+            if (src1 != null) {
+                return src1.value & ~src2;
+            } else {
+                return 0;
+            }
         }
 
         public long longExpr(Expr.MemL src1, Expr.MemL src2) {
-            return src1.value & ~src2.value;
+            if (src1 != null && src2 != null) {
+                return src1.value & ~src2.value;
+            } else {
+                return 0;
+            }
         }
 
     }

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestI.java	Tue Nov 17 16:40:52 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * @test
- * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
- * @modules java.base/sun.misc
- *          java.management
- * @build AddnTestI
- * @run main ClassFileInstaller sun.hotspot.WhiteBox
- *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
- *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+UseBMI1Instructions AddnTestI
- */
-
-import java.lang.reflect.Method;
-
-public class AddnTestI extends BmiIntrinsicBase.BmiTestCase {
-
-    protected AddnTestI(Method method) {
-        super(method);
-        // from intel manual VEX.NDS.LZ.0F38.W0 F2 /r, example c4e260f2c2
-        instrMask = new byte[]{
-                (byte) 0xFF,
-                (byte) 0x1F,
-                (byte) 0x00,
-                (byte) 0xFF};
-        instrPattern = new byte[]{
-                (byte) 0xC4, // prefix for 3-byte VEX instruction
-                (byte) 0x02, // 00010 implied 0F 38 leading opcode bytes
-                (byte) 0x00,
-                (byte) 0xF2};
-    }
-
-    public static void main(String[] args) throws Exception {
-        BmiIntrinsicBase.verifyTestCase(AddnTestI::new, TestAndnI.AndnIExpr.class.getDeclaredMethods());
-        BmiIntrinsicBase.verifyTestCase(AddnTestI::new, TestAndnI.AndnICommutativeExpr.class.getDeclaredMethods());
-    }
-}

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestL.java	Tue Nov 17 16:40:52 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * @test
- * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
- * @modules java.base/sun.misc
- *          java.management
- * @build AddnTestL
- * @run main ClassFileInstaller sun.hotspot.WhiteBox
- *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
- *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+UseBMI1Instructions AddnTestL
- */
-
-import java.lang.reflect.Method;
-
-public class AddnTestL extends AddnTestI {
-
-    protected AddnTestL(Method method) {
-        super(method);
-        isLongOperation = true;
-    }
-
-    public static void main(String[] args) throws Exception {
-        BmiIntrinsicBase.verifyTestCase(AddnTestL::new, TestAndnL.AndnLExpr.class.getDeclaredMethods());
-        BmiIntrinsicBase.verifyTestCase(AddnTestL::new, TestAndnL.AndnLCommutativeExpr.class.getDeclaredMethods());
-    }
-}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/AndnTestI.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8031321
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
+ * @modules java.base/sun.misc
+ *          java.management
+ * @build AndnTestI
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/bootclasspath -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                         -XX:+IgnoreUnrecognizedVMOptions -XX:+UseBMI1Instructions AndnTestI
+ */
+
+import java.lang.reflect.Method;
+
+public class AndnTestI extends BmiIntrinsicBase.BmiTestCase {
+
+    protected AndnTestI(Method method) {
+        super(method);
+        // from intel manual VEX.NDS.LZ.0F38.W0 F2 /r, example c4e260f2c2
+        instrMask = new byte[]{
+                (byte) 0xFF,
+                (byte) 0x1F,
+                (byte) 0x00,
+                (byte) 0xFF};
+        instrPattern = new byte[]{
+                (byte) 0xC4, // prefix for 3-byte VEX instruction
+                (byte) 0x02, // 00010 implied 0F 38 leading opcode bytes
+                (byte) 0x00,
+                (byte) 0xF2};
+    }
+
+    public static void main(String[] args) throws Exception {
+        BmiIntrinsicBase.verifyTestCase(AndnTestI::new, TestAndnI.AndnIExpr.class.getDeclaredMethods());
+        BmiIntrinsicBase.verifyTestCase(AndnTestI::new, TestAndnI.AndnICommutativeExpr.class.getDeclaredMethods());
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/AndnTestL.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8031321
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
+ * @modules java.base/sun.misc
+ *          java.management
+ * @build AndnTestL
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/bootclasspath -Xbatch -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                         -XX:+IgnoreUnrecognizedVMOptions -XX:+UseBMI1Instructions AndnTestL
+ */
+
+import java.lang.reflect.Method;
+
+public class AndnTestL extends AndnTestI {
+
+    protected AndnTestL(Method method) {
+        super(method);
+        isLongOperation = true;
+    }
+
+    public static void main(String[] args) throws Exception {
+        BmiIntrinsicBase.verifyTestCase(AndnTestL::new, TestAndnL.AndnLExpr.class.getDeclaredMethods());
+        BmiIntrinsicBase.verifyTestCase(AndnTestL::new, TestAndnL.AndnLCommutativeExpr.class.getDeclaredMethods());
+    }
+}

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestI.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestI.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsiTestI

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestL.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestL.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsiTestL

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestI.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestI.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsmskTestI

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestL.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestL.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsmskTestL

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestI.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestI.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsrTestI

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestL.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestL.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsrTestL

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BmiIntrinsicBase.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BmiIntrinsicBase.java	Tue Nov 24 10:30:23 2015 +0100
@@ -32,6 +32,7 @@
 import java.lang.reflect.Method;
 import java.util.concurrent.Callable;
 import java.util.function.Function;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class BmiIntrinsicBase extends CompilerWhiteBoxTest {

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestI.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestI.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build LZcntTestI
@@ -50,6 +50,8 @@
     public static void main(String[] args) throws Exception {
         // j.l.Integer and Long should be loaded to allow a compilation of the methods that use their methods
         System.out.println("class java.lang.Integer should be loaded. Proof: " + Integer.class);
+        // Avoid uncommon traps.
+        System.out.println("Num leading zeroes: " + new TestLzcntI.LzcntIExpr().intExpr(12341341));
         BmiIntrinsicBase.verifyTestCase(LZcntTestI::new, TestLzcntI.LzcntIExpr.class.getDeclaredMethods());
     }

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestL.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestL.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build LZcntTestL
@@ -46,6 +46,8 @@
     public static void main(String[] args) throws Exception {
         // j.l.Integer and Long should be loaded to allow a compilation of the methods that use their methods
         System.out.println("classes java.lang.Long should be loaded. Proof: " + Long.class);
+        // Avoid uncommon traps.
+        System.out.println("Num leading zeroes: " + new  TestLzcntL.LzcntLExpr().longExpr(12341341));
         BmiIntrinsicBase.verifyTestCase(LZcntTestL::new, TestLzcntL.LzcntLExpr.class.getDeclaredMethods());
     }
 }

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestI.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestI.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build TZcntTestI
@@ -50,6 +50,8 @@
     public static void main(String[] args) throws Exception {
         // j.l.Integer and Long should be loaded to allow a compilation of the methods that use their methods
         System.out.println("class java.lang.Integer should be loaded. Proof: " + Integer.class);
+        // Avoid uncommon traps.
+        System.out.println("Num trailing zeroes: " + new TestTzcntI.TzcntIExpr().intExpr(12341341));
         BmiIntrinsicBase.verifyTestCase(TZcntTestI::new, TestTzcntI.TzcntIExpr.class.getDeclaredMethods());
     }

--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestL.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestL.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build TZcntTestL
@@ -46,6 +46,8 @@
     public static void main(String[] args) throws Exception {
         // j.l.Integer and Long should be loaded to allow a compilation of the methods that use their methods
         System.out.println("classes java.lang.Long should be loaded. Proof: " + Long.class);
+        // Avoid uncommon traps.
+        System.out.println("Num trailing zeroes: " + new TestTzcntL.TzcntLExpr().longExpr(12341341));
         BmiIntrinsicBase.verifyTestCase(TZcntTestL::new, TestTzcntL.TzcntLExpr.class.getDeclaredMethods());
     }
 }

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/intrinsics/crc32/TestCRC32.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8143012
+ * @summary CRC32 Intrinsics support on SPARC
+ *
+ * @run main/othervm/timeout=720 -Xbatch TestCRC32 -m
+ */
+
+import java.nio.ByteBuffer;
+import java.util.zip.Checksum;
+import java.util.zip.CRC32;
+
+public class TestCRC32 {
+    public static void main(String[] args) {
+        int offset = Integer.getInteger("offset", 0);
+        int msgSize = Integer.getInteger("msgSize", 512);
+        boolean multi = false;
+        int iters = 20000;
+        int warmupIters = 20000;
+
+        if (args.length > 0) {
+            if (args[0].equals("-m")) {
+                multi = true;
+            } else {
+                iters = Integer.valueOf(args[0]);
+            }
+            if (args.length > 1) {
+                warmupIters = Integer.valueOf(args[1]);
+            }
+        }
+
+        if (multi) {
+            test_multi(warmupIters);
+            return;
+        }
+
+        System.out.println(" offset = " + offset);
+        System.out.println("msgSize = " + msgSize + " bytes");
+        System.out.println("  iters = " + iters);
+
+        byte[] b = initializedBytes(msgSize, offset);
+
+        CRC32 crc0 = new CRC32();
+        CRC32 crc1 = new CRC32();
+        CRC32 crc2 = new CRC32();
+
+        crc0.update(b, offset, msgSize);
+
+        System.out.println("-------------------------------------------------------");
+
+        /* warm up */
+        for (int i = 0; i < warmupIters; i++) {
+            crc1.reset();
+            crc1.update(b, offset, msgSize);
+        }
+
+        /* measure performance */
+        long start = System.nanoTime();
+        for (int i = 0; i < iters; i++) {
+            crc1.reset();
+            crc1.update(b, offset, msgSize);
+        }
+        long end = System.nanoTime();
+        double total = (double)(end - start)/1e9;         // in seconds
+        double thruput = (double)msgSize*iters/1e6/total; // in MB/s
+        System.out.println("CRC32.update(byte[]) runtime = " + total + " seconds");
+        System.out.println("CRC32.update(byte[]) throughput = " + thruput + " MB/s");
+
+        /* check correctness */
+        for (int i = 0; i < iters; i++) {
+            crc1.reset();
+            crc1.update(b, offset, msgSize);
+            if (!check(crc0, crc1)) break;
+        }
+        report("CRCs", crc0, crc1);
+
+        System.out.println("-------------------------------------------------------");
+
+        ByteBuffer buf = ByteBuffer.allocateDirect(msgSize);
+        buf.put(b, offset, msgSize);
+        buf.flip();
+
+        /* warm up */
+        for (int i = 0; i < warmupIters; i++) {
+            crc2.reset();
+            crc2.update(buf);
+            buf.rewind();
+        }
+
+        /* measure performance */
+        start = System.nanoTime();
+        for (int i = 0; i < iters; i++) {
+            crc2.reset();
+            crc2.update(buf);
+            buf.rewind();
+        }
+        end = System.nanoTime();
+        total = (double)(end - start)/1e9;         // in seconds
+        thruput = (double)msgSize*iters/1e6/total; // in MB/s
+        System.out.println("CRC32.update(ByteBuffer) runtime = " + total + " seconds");
+        System.out.println("CRC32.update(ByteBuffer) throughput = " + thruput + " MB/s");
+
+        /* check correctness */
+        for (int i = 0; i < iters; i++) {
+            crc2.reset();
+            crc2.update(buf);
+            buf.rewind();
+            if (!check(crc0, crc2)) break;
+        }
+        report("CRCs", crc0, crc2);
+
+        System.out.println("-------------------------------------------------------");
+    }
+
+    private static void report(String s, Checksum crc0, Checksum crc1) {
+        System.out.printf("%s: crc0 = %08x, crc1 = %08x\n",
+                          s, crc0.getValue(), crc1.getValue());
+    }
+
+    private static boolean check(Checksum crc0, Checksum crc1) {
+        if (crc0.getValue() != crc1.getValue()) {
+            System.err.printf("ERROR: crc0 = %08x, crc1 = %08x\n",
+                              crc0.getValue(), crc1.getValue());
+            return false;
+        }
+        return true;
+    }
+
+    private static byte[] initializedBytes(int M, int offset) {
+        byte[] bytes = new byte[M + offset];
+        for (int i = 0; i < offset; i++) {
+            bytes[i] = (byte) i;
+        }
+        for (int i = offset; i < bytes.length; i++) {
+            bytes[i] = (byte) (i - offset);
+        }
+        return bytes;
+    }
+
+    private static void test_multi(int iters) {
+        int len1 = 8;    // the  8B/iteration loop
+        int len2 = 32;   // the 32B/iteration loop
+        int len3 = 4096; // the 4KB/iteration loop
+
+        byte[] b = initializedBytes(len3*16, 0);
+        int[] offsets = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512 };
+        int[] sizes = { 0, 1, 2, 3, 4, 5, 6, 7,
+                        len1, len1+1, len1+2, len1+3, len1+4, len1+5, len1+6, len1+7,
+                        len1*2, len1*2+1, len1*2+3, len1*2+5, len1*2+7,
+                        len2, len2+1, len2+3, len2+5, len2+7,
+                        len2*2, len2*4, len2*8, len2*16, len2*32, len2*64,
+                        len3, len3+1, len3+3, len3+5, len3+7,
+                        len3*2, len3*4, len3*8,
+                        len1+len2, len1+len2+1, len1+len2+3, len1+len2+5, len1+len2+7,
+                        len1+len3, len1+len3+1, len1+len3+3, len1+len3+5, len1+len3+7,
+                        len2+len3, len2+len3+1, len2+len3+3, len2+len3+5, len2+len3+7,
+                        len1+len2+len3, len1+len2+len3+1, len1+len2+len3+3,
+                        len1+len2+len3+5, len1+len2+len3+7,
+                        (len1+len2+len3)*2, (len1+len2+len3)*2+1, (len1+len2+len3)*2+3,
+                        (len1+len2+len3)*2+5, (len1+len2+len3)*2+7,
+                        (len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3,
+                        (len1+len2+len3)*3-5, (len1+len2+len3)*3-7 };
+        CRC32[] crc0 = new CRC32[offsets.length*sizes.length];
+        CRC32[] crc1 = new CRC32[offsets.length*sizes.length];
+        int i, j, k;
+
+        System.out.printf("testing %d cases ...\n", offsets.length*sizes.length);
+
+        /* set the result from interpreter as reference */
+        for (i = 0; i < offsets.length; i++) {
+            for (j = 0; j < sizes.length; j++) {
+                crc0[i*sizes.length + j] = new CRC32();
+                crc1[i*sizes.length + j] = new CRC32();
+                crc0[i*sizes.length + j].update(b, offsets[i], sizes[j]);
+            }
+        }
+
+        /* warm up the JIT compiler and get result */
+        for (k = 0; k < iters; k++) {
+            for (i = 0; i < offsets.length; i++) {
+                for (j = 0; j < sizes.length; j++) {
+                    crc1[i*sizes.length + j].reset();
+                    crc1[i*sizes.length + j].update(b, offsets[i], sizes[j]);
+                }
+            }
+        }
+
+        /* check correctness */
+        for (i = 0; i < offsets.length; i++) {
+            for (j = 0; j < sizes.length; j++) {
+                if (!check(crc0[i*sizes.length + j], crc1[i*sizes.length + j])) {
+                    System.out.printf("offsets[%d] = %d", i, offsets[i]);
+                    System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]);
+                }
+            }
+        }
+    }
+}

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build AddExactIntTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build AddExactLongTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build DecrementExactIntTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build DecrementExactLongTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build IncrementExactIntTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build IncrementExactLongTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java	Tue Nov 24 10:30:23 2015 +0100
@@ -27,6 +27,7 @@
 import java.io.FileOutputStream;
 import java.lang.reflect.Executable;
 import java.util.Properties;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public abstract class IntrinsicBase extends CompilerWhiteBoxTest {
     protected String javaVmName;

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,6 +23,7 @@
 
 import java.lang.reflect.Executable;
 import java.util.concurrent.Callable;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class MathIntrinsic {

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build MultiplyExactIntTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build MultiplyExactLongTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build NegateExactIntTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build NegateExactLongTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build SubtractExactIntTest

--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build SubtractExactLongTest

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/intrinsics/unsafe/TestUnsafeMismatchedArrayFieldAccess.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8142386
+ * @library /testlibrary /test/lib
+ * @summary Unsafe access to an array is wrongly marked as mismatched
+ * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:-TieredCompilation TestUnsafeMismatchedArrayFieldAccess
+ *
+ */
+
+import java.lang.reflect.*;
+import jdk.test.lib.Utils;
+import sun.misc.Unsafe;
+
+public class TestUnsafeMismatchedArrayFieldAccess {
+
+    private static final Unsafe UNSAFE = Utils.getUnsafe();
+
+    static {
+        try {
+            array_offset = UNSAFE.objectFieldOffset(TestUnsafeMismatchedArrayFieldAccess.class.getDeclaredField("array"));
+        }
+        catch (Exception e) {
+            throw new AssertionError(e);
+        }
+    }
+
+    int[] array;
+    static final long array_offset;
+
+    void m() {
+        UNSAFE.getObject(this, array_offset);
+    }
+
+    static public void main(String[] args) {
+        TestUnsafeMismatchedArrayFieldAccess test = new TestUnsafeMismatchedArrayFieldAccess();
+
+        for (int i = 0; i < 20000; i++) {
+            test.m();
+        }
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/intrinsics/unsafe/TestUnsafeUnalignedMismatchedAccesses.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8136473
+ * @summary Mismatched stores on same slice possible with Unsafe.Put*Unaligned methods
+ * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation TestUnsafeUnalignedMismatchedAccesses
+ * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:+UnlockDiagnosticVMOptions -XX:-UseUnalignedAccesses TestUnsafeUnalignedMismatchedAccesses
+ *
+ */
+
+import java.lang.reflect.*;
+import jdk.internal.misc.Unsafe;
+
+public class TestUnsafeUnalignedMismatchedAccesses {
+
+    private static final Unsafe UNSAFE;
+
+    static {
+        try {
+            Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
+            unsafeField.setAccessible(true);
+            UNSAFE = (Unsafe) unsafeField.get(null);
+        }
+        catch (Exception e) {
+            throw new AssertionError(e);
+        }
+    }
+
+    static void test1(byte[] array) {
+        array[0] = 0;
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET, 0);
+        array[0] = 0;
+    }
+
+    static void test2(byte[] array) {
+        array[0] = 0;
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+1, 0);
+        array[0] = 0;
+    }
+
+    static void test3(byte[] array) {
+        array[0] = 0;
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+2, 0);
+        array[0] = 0;
+    }
+
+    static void test4(byte[] array) {
+        array[0] = 0;
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+3, 0);
+        array[0] = 0;
+    }
+
+    static void test5(byte[] array) {
+        array[0] = 0;
+        UNSAFE.putInt(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET, 0);
+        array[0] = 0;
+    }
+
+    // unaligned access and non escaping allocation
+    static void test6() {
+        byte[] array = new byte[10];
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+1, -1);
+        array[0] = 0;
+    }
+
+    // unaligned access and non escaping allocation
+    static int test7() {
+        byte[] array = new byte[10];
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+1, -1);
+        array[0] = 0;
+        array[2] = 0;
+        return array[0] + array[1] + array[2] + array[3] + array[4];
+    }
+
+    // unaligned access with vectorization
+    static void test8(int[] src1, int[] src2, int[] dst) {
+        for (int i = 0; i < dst.length-1; i++) {
+            int res = src1[i] + src2[i];
+            UNSAFE.putIntUnaligned(dst, UNSAFE.ARRAY_INT_BASE_OFFSET + i*4+1, res);
+        }
+    }
+
+    static public void main(String[] args) throws Exception {
+        byte[] byte_array = new byte[100];
+        int[] int_array = new int[100];
+        Object[] obj_array = new Object[100];
+        TestUnsafeUnalignedMismatchedAccesses test = new TestUnsafeUnalignedMismatchedAccesses();
+        for (int i = 0; i < 20000; i++) {
+            test1(byte_array);
+            test2(byte_array);
+            test3(byte_array);
+            test4(byte_array);
+            test5(byte_array);
+            test6();
+            test7();
+            test8(int_array, int_array, int_array);
+        }
+    }
+}

--- a/hotspot/test/compiler/jvmci/compilerToVM/AllocateCompileIdTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/jvmci/compilerToVM/AllocateCompileIdTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -34,7 +34,6 @@
  * @run main/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI
  *      -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
  *      -XX:-BackgroundCompilation
-        -XX:+LogCompilation
  *      compiler.jvmci.compilerToVM.AllocateCompileIdTest
  */
 
@@ -45,22 +44,21 @@
 import java.lang.reflect.Executable;
 import java.lang.reflect.Method;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.HashSet;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
-import compiler.jvmci.common.testcases.TestCase;
 import jdk.vm.ci.hotspot.CompilerToVMHelper;
 import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod;
 import jdk.test.lib.Asserts;
 import jdk.test.lib.Pair;
 import jdk.test.lib.Utils;
-import sun.hotspot.WhiteBox;
 import sun.hotspot.code.NMethod;
 
 public class AllocateCompileIdTest {
 
+    private static final int SOME_REPEAT_VALUE = 5;
     private final HashSet<Integer> ids = new HashSet<>();
 
     public static void main(String[] args) {
@@ -69,7 +67,6 @@
         createTestCasesIncorrectBci().forEach(test::runSanityIncorrectTest);
     }
 
-
     private static List<CompileCodeTestCase> createTestCasesCorrectBci() {
         List<CompileCodeTestCase> result = new ArrayList<>();
         try {
@@ -84,29 +81,29 @@
         return result;
     }
 
-
     private static List<Pair<CompileCodeTestCase, Class<? extends Throwable>>>
             createTestCasesIncorrectBci() {
         List<Pair<CompileCodeTestCase, Class<? extends Throwable>>> result
                 = new ArrayList<>();
-
         try {
             Class<?> aClass = DummyClass.class;
             Object receiver = new DummyClass();
             Method method = aClass.getMethod("dummyInstanceFunction");
             // greater than bytecode.length
-            int[] bcis = new int[] {30, 50, 200};
-            for (int bci : bcis) {
-                result.add(new Pair<>(
-                        new CompileCodeTestCase(receiver, method, bci),
-                        IllegalArgumentException.class));
-            }
-            bcis = new int[] {-4, -50, -200};
-            for (int bci : bcis) {
-                result.add(new Pair<>(
-                        new CompileCodeTestCase(receiver, method, bci),
-                        IllegalArgumentException.class));
-            }
+            byte[] bytecode = CompilerToVMHelper.getBytecode(CTVMUtilities
+                    .getResolvedMethod(method));
+            Stream.of(
+                    // greater than bytecode.length
+                    bytecode.length + 4,
+                    bytecode.length + 50,
+                    bytecode.length + 200,
+                    // negative cases
+                    -4, -50, -200)
+                    .map(bci -> new Pair<CompileCodeTestCase,
+                            Class<? extends Throwable>>(
+                            new CompileCodeTestCase(receiver, method, bci),
+                            IllegalArgumentException.class))
+                    .collect(Collectors.toList());
         } catch (NoSuchMethodException e) {
             throw new Error("TEST BUG : " + e.getMessage(), e);
         }
@@ -117,27 +114,20 @@
         System.out.println(testCase);
         Executable aMethod = testCase.executable;
         // to generate ciTypeFlow
-        System.out.println(testCase.invoke(Utils.getNullValues(aMethod.getParameterTypes())));
+        testCase.invoke(Utils.getNullValues(aMethod.getParameterTypes()));
         int bci = testCase.bci;
         HotSpotResolvedJavaMethod method = CTVMUtilities
                 .getResolvedMethod(aMethod);
-        int wbCompileID = getWBCompileID(testCase);
-        int id = CompilerToVMHelper.allocateCompileId(method, bci);
-        Asserts.assertNE(id, 0, testCase + " : zero compile id");
-
-        if (wbCompileID > 0) {
+        for (int i = 0; i < SOME_REPEAT_VALUE; ++i) {
+            int wbCompileID = getWBCompileID(testCase);
+            int id = CompilerToVMHelper.allocateCompileId(method, bci);
+            Asserts.assertNE(id, 0, testCase + " : zero compile id");
             Asserts.assertGT(id, wbCompileID, testCase
                     + " : allocated 'compile id' not  greater than existed");
-            if (!ids.add(wbCompileID)) {
-                throw new AssertionError(String.format(
-                        "%s : vm compilation allocated existed id -- %d",
-                        testCase, id));
-            }
-        }
-        if (!ids.add(id)) {
-            throw new AssertionError(String.format(
-                    "%s : allocateCompileId returned existed id %d",
-                    testCase, id));
+            Asserts.assertTrue(ids.add(wbCompileID), testCase
+                    + " : vm compilation allocated existing id " + id);
+            Asserts.assertTrue(ids.add(id), testCase
+                    + " : allocateCompileId returned existing id " + id);
         }
     }
 
@@ -156,8 +146,8 @@
 
     private int getWBCompileID(CompileCodeTestCase testCase) {
         NMethod nm = testCase.deoptimizeAndCompile();
-        if (nm == null) {
-            throw new Error("[TEST BUG] cannot compile method " + testCase);
+        if (nm == null || nm.compile_id <= 0) {
+            throw new Error("TEST BUG : cannot compile method " + testCase);
         }
         return nm.compile_id;
     }

--- a/hotspot/test/compiler/jvmci/compilerToVM/ReprofileTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/jvmci/compilerToVM/ReprofileTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -45,18 +45,15 @@
 import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Random;
+
+import compiler.whitebox.CompilerWhiteBoxTest;
 import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod;
 import jdk.vm.ci.hotspot.CompilerToVMHelper;
 import jdk.vm.ci.meta.ProfilingInfo;
 import jdk.test.lib.Asserts;
-import jdk.test.lib.Utils;
-import sun.hotspot.WhiteBox;
 
 public class ReprofileTest {
 
-    private static final WhiteBox WB = WhiteBox.getWhiteBox();
-
     public static void main(String[] args) {
         List<Method> testCases = createTestCases();
         testCases.forEach(ReprofileTest::runSanityTest);
@@ -67,10 +64,10 @@
         try {
 
             Class<?> aClass = DummyClass.class;
-            testCases.add(aClass.getMethod("withLoop"));
+            testCases.add(aClass.getMethod("dummyInstanceFunction"));
 
             aClass = DummyClass.class;
-            testCases.add(aClass.getDeclaredMethod("dummyFunction"));
+            testCases.add(aClass.getMethod("dummyFunction"));
         } catch (NoSuchMethodException e) {
             throw new Error("TEST BUG " + e.getMessage(), e);
         }
@@ -78,17 +75,17 @@
     }
 
     private static void runSanityTest(Method aMethod) {
+        System.out.println(aMethod);
         HotSpotResolvedJavaMethod method = CTVMUtilities
                 .getResolvedMethod(aMethod);
         ProfilingInfo startProfile = method.getProfilingInfo();
         Asserts.assertFalse(startProfile.isMature(), aMethod
-                + " : profiling info is mature in the begging");
+                + " : profiling info is mature in the beginning");
 
-        long compileThreshold = (Long) WB.getVMFlag("CompileThreshold");
         // make interpreter to profile this method
         try {
             Object obj = aMethod.getDeclaringClass().newInstance();
-            for (long i = 0; i < compileThreshold; i++) {
+            for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; i++) {
                 aMethod.invoke(obj);
             }
         } catch (ReflectiveOperationException e) {
@@ -99,10 +96,10 @@
         Asserts.assertNE(startProfile.toString(), compProfile.toString(),
                 String.format("%s : profiling info wasn't changed after "
                                 + "%d invocations",
-                        aMethod, compileThreshold));
+                        aMethod, CompilerWhiteBoxTest.THRESHOLD));
         Asserts.assertTrue(compProfile.isMature(),
                 String.format("%s is not mature after %d invocations",
-                        aMethod, compileThreshold));
+                        aMethod, CompilerWhiteBoxTest.THRESHOLD));
 
         CompilerToVMHelper.reprofile(method);
         ProfilingInfo reprofiledProfile = method.getProfilingInfo();

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/jvmci/errors/CodeInstallerTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.jvmci.errors;
+
+import java.lang.reflect.Method;
+
+import jdk.vm.ci.code.Architecture;
+import jdk.vm.ci.code.CodeCacheProvider;
+import jdk.vm.ci.code.CompilationResult;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.meta.MetaAccessProvider;
+import jdk.vm.ci.meta.PlatformKind;
+import jdk.vm.ci.meta.ResolvedJavaMethod;
+import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
+import jdk.vm.ci.runtime.JVMCI;
+import jdk.vm.ci.runtime.JVMCIBackend;
+
+import org.junit.Assert;
+
+public class CodeInstallerTest {
+
+    protected final Architecture arch;
+    protected final CodeCacheProvider codeCache;
+    protected final MetaAccessProvider metaAccess;
+    protected final HotSpotConstantReflectionProvider constantReflection;
+
+    protected final ResolvedJavaMethod dummyMethod;
+
+    public static void dummyMethod() {
+    }
+
+    protected CodeInstallerTest() {
+        JVMCIBackend backend = JVMCI.getRuntime().getHostJVMCIBackend();
+        metaAccess = backend.getMetaAccess();
+        codeCache = backend.getCodeCache();
+        constantReflection = (HotSpotConstantReflectionProvider) backend.getConstantReflection();
+        arch = codeCache.getTarget().arch;
+
+        Method method = null;
+        try {
+            method = CodeInstallerTest.class.getMethod("dummyMethod");
+        } catch (NoSuchMethodException e) {
+            Assert.fail();
+        }
+
+        dummyMethod = metaAccess.lookupJavaMethod(method);
+    }
+
+    protected void installCode(CompilationResult result) {
+        codeCache.addCode(dummyMethod, result, null, null);
+    }
+
+    protected CompilationResult createEmptyCompilationResult() {
+        CompilationResult ret = new CompilationResult();
+        ret.setTotalFrameSize(0);
+        return ret;
+    }
+
+    protected Register getRegister(PlatformKind kind, int index) {
+        Register[] allRegs = arch.getAvailableValueRegisters();
+        for (int i = 0; i < allRegs.length; i++) {
+            if (arch.canStoreValue(allRegs[i].getRegisterCategory(), kind)) {
+                if (index-- == 0) {
+                    return allRegs[i];
+                }
+            }
+        }
+        return null;
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/jvmci/errors/TestInvalidCompilationResult.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "sparcv9") & os.arch != "aarch64"
+ * @compile CodeInstallerTest.java
+ * @run junit/othervm -da:jdk.vm.ci... -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI compiler.jvmci.errors.TestInvalidCompilationResult
+ */
+
+package compiler.jvmci.errors;
+
+import static jdk.vm.ci.code.CompilationResult.ConstantReference;
+import static jdk.vm.ci.code.CompilationResult.DataPatch;
+import static jdk.vm.ci.code.CompilationResult.DataSectionReference;
+import static jdk.vm.ci.code.CompilationResult.Infopoint;
+import static jdk.vm.ci.code.CompilationResult.Reference;
+import static jdk.vm.ci.code.DataSection.Data;
+import static jdk.vm.ci.code.DataSection.DataBuilder;
+import static jdk.vm.ci.meta.Assumptions.Assumption;
+
+import jdk.vm.ci.code.CompilationResult;
+import jdk.vm.ci.code.InfopointReason;
+import jdk.vm.ci.common.JVMCIError;
+import jdk.vm.ci.hotspot.HotSpotConstant;
+import jdk.vm.ci.meta.ResolvedJavaType;
+import jdk.vm.ci.meta.VMConstant;
+
+import org.junit.Test;
+
+/**
+ * Tests for errors in the code installer.
+ */
+public class TestInvalidCompilationResult extends CodeInstallerTest {
+
+    private static class InvalidAssumption extends Assumption {
+    }
+
+    private static class InvalidVMConstant implements VMConstant {
+
+        public boolean isDefaultForKind() {
+            return false;
+        }
+
+        public String toValueString() {
+            return null;
+        }
+    }
+
+    private static class InvalidReference extends Reference {
+
+        @Override
+        public int hashCode() {
+            return 0;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            return false;
+        }
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidAssumption() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.setAssumptions(new Assumption[]{new InvalidAssumption()});
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidAlignment() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.getDataSection().insertData(new Data(7, 1, DataBuilder.zero(1)));
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullDataPatchInDataSection() {
+        CompilationResult result = createEmptyCompilationResult();
+        Data data = new Data(1, 1, (buffer, patch) -> {
+            patch.accept(null);
+            buffer.put((byte) 0);
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullReferenceInDataSection() {
+        CompilationResult result = createEmptyCompilationResult();
+        Data data = new Data(1, 1, (buffer, patch) -> {
+            patch.accept(new DataPatch(buffer.position(), null));
+            buffer.put((byte) 0);
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidDataSectionReference() {
+        CompilationResult result = createEmptyCompilationResult();
+        DataSectionReference ref = result.getDataSection().insertData(new Data(1, 1, DataBuilder.zero(1)));
+        Data data = new Data(1, 1, (buffer, patch) -> {
+            patch.accept(new DataPatch(buffer.position(), ref));
+            buffer.put((byte) 0);
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidNarrowMethodInDataSection() {
+        CompilationResult result = createEmptyCompilationResult();
+        HotSpotConstant c = (HotSpotConstant) dummyMethod.getEncoding();
+        Data data = new Data(4, 4, (buffer, patch) -> {
+            patch.accept(new DataPatch(buffer.position(), new ConstantReference((VMConstant) c.compress())));
+            buffer.putInt(0);
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullConstantInDataSection() {
+        CompilationResult result = createEmptyCompilationResult();
+        Data data = new Data(1, 1, (buffer, patch) -> {
+            patch.accept(new DataPatch(buffer.position(), new ConstantReference(null)));
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidConstantInDataSection() {
+        CompilationResult result = createEmptyCompilationResult();
+        Data data = new Data(1, 1, (buffer, patch) -> {
+            patch.accept(new DataPatch(buffer.position(), new ConstantReference(new InvalidVMConstant())));
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullReferenceInCode() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordDataPatch(0, null);
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullConstantInCode() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordDataPatch(0, new ConstantReference(null));
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidConstantInCode() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordDataPatch(0, new ConstantReference(new InvalidVMConstant()));
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidReference() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordDataPatch(0, new InvalidReference());
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testOutOfBoundsDataSectionReference() {
+        CompilationResult result = createEmptyCompilationResult();
+        DataSectionReference ref = new DataSectionReference();
+        ref.setOffset(0x1000);
+        result.recordDataPatch(0, ref);
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidMark() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordMark(0, new Object());
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidMarkInt() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordMark(0, -1);
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullInfopoint() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(null);
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnknownInfopointReason() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(new Infopoint(0, null, InfopointReason.UNKNOWN));
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInfopointMissingDebugInfo() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(new Infopoint(0, null, InfopointReason.METHOD_START));
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testSafepointMissingDebugInfo() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(new Infopoint(0, null, InfopointReason.SAFEPOINT));
+        installCode(result);
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/jvmci/errors/TestInvalidDebugInfo.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "sparcv9") & os.arch != "aarch64"
+ * @compile CodeInstallerTest.java
+ * @run junit/othervm -da:jdk.vm.ci... -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI compiler.jvmci.errors.TestInvalidDebugInfo
+ */
+
+package compiler.jvmci.errors;
+
+import static jdk.vm.ci.code.CompilationResult.Infopoint;
+
+import jdk.vm.ci.code.BytecodeFrame;
+import jdk.vm.ci.code.CompilationResult;
+import jdk.vm.ci.code.DebugInfo;
+import jdk.vm.ci.code.InfopointReason;
+import jdk.vm.ci.code.Location;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.code.StackSlot;
+import jdk.vm.ci.code.VirtualObject;
+import jdk.vm.ci.hotspot.HotSpotReferenceMap;
+import jdk.vm.ci.meta.JavaConstant;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.JavaValue;
+import jdk.vm.ci.meta.LIRKind;
+import jdk.vm.ci.meta.ResolvedJavaType;
+import jdk.vm.ci.meta.Value;
+import jdk.vm.ci.common.JVMCIError;
+
+import org.junit.Test;
+
+/**
+ * Tests for errors in debug info.
+ */
+public class TestInvalidDebugInfo extends CodeInstallerTest {
+
+    private static class UnknownJavaValue implements JavaValue {
+    }
+
+    private void test(JavaValue[] values, JavaKind[] slotKinds, int locals, int stack, int locks) {
+        test(null, values, slotKinds, locals, stack, locks);
+    }
+
+    private void test(VirtualObject[] vobj, JavaValue[] values, JavaKind[] slotKinds, int locals, int stack, int locks) {
+        BytecodeFrame frame = new BytecodeFrame(null, dummyMethod, 0, false, false, values, slotKinds, locals, stack, locks);
+        DebugInfo info = new DebugInfo(frame, vobj);
+        info.setReferenceMap(new HotSpotReferenceMap(new Location[0], new Location[0], new int[0], 8));
+
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(new Infopoint(0, info, InfopointReason.SAFEPOINT));
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullValues() {
+        test(null, new JavaKind[0], 0, 0, 0);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullSlotKinds() {
+        test(new JavaValue[0], null, 0, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedScopeValuesLength() {
+        test(new JavaValue[]{JavaConstant.FALSE}, new JavaKind[0], 0, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedScopeSlotKindsLength() {
+        test(new JavaValue[0], new JavaKind[]{JavaKind.Boolean}, 0, 0, 0);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullValue() {
+        test(new JavaValue[]{null}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullSlotKind() {
+        test(new JavaValue[]{JavaConstant.INT_0}, new JavaKind[]{null}, 1, 0, 0);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullMonitor() {
+        test(new JavaValue[]{null}, new JavaKind[0], 0, 0, 1);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testWrongMonitorType() {
+        test(new JavaValue[]{JavaConstant.INT_0}, new JavaKind[0], 0, 0, 1);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedIllegalValue() {
+        test(new JavaValue[]{Value.ILLEGAL}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedTypeInCPURegister() {
+        Register reg = getRegister(arch.getPlatformKind(JavaKind.Int), 0);
+        test(new JavaValue[]{reg.asValue()}, new JavaKind[]{JavaKind.Illegal}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedTypeInFloatRegister() {
+        Register reg = getRegister(arch.getPlatformKind(JavaKind.Float), 0);
+        test(new JavaValue[]{reg.asValue()}, new JavaKind[]{JavaKind.Illegal}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedTypeOnStack() {
+        LIRKind kind = codeCache.getTarget().getLIRKind(JavaKind.Int);
+        StackSlot value = StackSlot.get(kind, 8, false);
+        test(new JavaValue[]{value}, new JavaKind[]{JavaKind.Illegal}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testWrongConstantType() {
+        test(new JavaValue[]{JavaConstant.INT_0}, new JavaKind[]{JavaKind.Object}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnsupportedConstantType() {
+        test(new JavaValue[]{JavaConstant.forShort((short) 0)}, new JavaKind[]{JavaKind.Short}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedNull() {
+        test(new JavaValue[]{JavaConstant.NULL_POINTER}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedObject() {
+        JavaValue wrapped = constantReflection.forObject(this);
+        test(new JavaValue[]{wrapped}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnknownJavaValue() {
+        test(new JavaValue[]{new UnknownJavaValue()}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testMissingIllegalAfterDouble() {
+        test(new JavaValue[]{JavaConstant.DOUBLE_0, JavaConstant.INT_0}, new JavaKind[]{JavaKind.Double, JavaKind.Int}, 2, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidVirtualObjectId() {
+        ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class);
+        VirtualObject o = VirtualObject.get(obj, 5);
+        o.setValues(new JavaValue[0], new JavaKind[0]);
+
+        test(new VirtualObject[]{o}, new JavaValue[0], new JavaKind[0], 0, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testDuplicateVirtualObject() {
+        ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class);
+        VirtualObject o1 = VirtualObject.get(obj, 0);
+        o1.setValues(new JavaValue[0], new JavaKind[0]);
+
+        VirtualObject o2 = VirtualObject.get(obj, 0);
+        o2.setValues(new JavaValue[0], new JavaKind[0]);
+
+        test(new VirtualObject[]{o1, o2}, new JavaValue[0], new JavaKind[0], 0, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedVirtualObject() {
+        ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class);
+        VirtualObject o = VirtualObject.get(obj, 0);
+        o.setValues(new JavaValue[0], new JavaKind[0]);
+
+        test(new VirtualObject[]{o}, new JavaValue[]{o}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUndefinedVirtualObject() {
+        ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class);
+        VirtualObject o0 = VirtualObject.get(obj, 0);
+        o0.setValues(new JavaValue[0], new JavaKind[0]);
+
+        VirtualObject o1 = VirtualObject.get(obj, 1);
+        o1.setValues(new JavaValue[0], new JavaKind[0]);
+
+        test(new VirtualObject[]{o0}, new JavaValue[]{o1}, new JavaKind[]{JavaKind.Object}, 1, 0, 0);
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/jvmci/errors/TestInvalidOopMap.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "sparcv9") & os.arch != "aarch64"
+ * @compile CodeInstallerTest.java
+ * @run junit/othervm -da:jdk.vm.ci... -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI compiler.jvmci.errors.TestInvalidOopMap
+ */
+
+package compiler.jvmci.errors;
+
+import static jdk.vm.ci.code.CompilationResult.Infopoint;
+
+import jdk.vm.ci.code.BytecodePosition;
+import jdk.vm.ci.code.CompilationResult;
+import jdk.vm.ci.code.DebugInfo;
+import jdk.vm.ci.code.InfopointReason;
+import jdk.vm.ci.code.Location;
+import jdk.vm.ci.code.ReferenceMap;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.hotspot.HotSpotReferenceMap;
+import jdk.vm.ci.hotspot.HotSpotVMConfig;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.LIRKind;
+import jdk.vm.ci.meta.PlatformKind;
+import jdk.vm.ci.common.JVMCIError;
+
+import org.junit.Test;
+
+/**
+ * Tests for errors in oop maps.
+ */
+public class TestInvalidOopMap extends CodeInstallerTest {
+
+    private static class InvalidReferenceMap extends ReferenceMap {
+    }
+
+    private void test(ReferenceMap refMap) {
+        BytecodePosition pos = new BytecodePosition(null, dummyMethod, 0);
+        DebugInfo info = new DebugInfo(pos);
+        info.setReferenceMap(refMap);
+
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(new Infopoint(0, info, InfopointReason.SAFEPOINT));
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testMissingReferenceMap() {
+        test(null);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidReferenceMap() {
+        test(new InvalidReferenceMap());
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullOops() {
+        test(new HotSpotReferenceMap(null, new Location[0], new int[0], 8));
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullBase() {
+        test(new HotSpotReferenceMap(new Location[0], null, new int[0], 8));
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullSize() {
+        test(new HotSpotReferenceMap(new Location[0], new Location[0], null, 8));
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidLength() {
+        test(new HotSpotReferenceMap(new Location[1], new Location[2], new int[3], 8));
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidShortOop() {
+        PlatformKind kind = arch.getPlatformKind(JavaKind.Short);
+        Register reg = getRegister(kind, 0);
+
+        Location[] oops = new Location[]{Location.register(reg)};
+        Location[] base = new Location[]{null};
+        int[] size = new int[]{kind.getSizeInBytes()};
+
+        test(new HotSpotReferenceMap(oops, base, size, 8));
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidNarrowDerivedOop() {
+        if (!HotSpotVMConfig.config().useCompressedOops) {
+            throw new JVMCIError("skipping test");
+        }
+
+        PlatformKind kind = arch.getPlatformKind(JavaKind.Int);
+        Register reg = getRegister(kind, 0);
+        Register baseReg = getRegister(arch.getPlatformKind(JavaKind.Object), 1);
+
+        Location[] oops = new Location[]{Location.register(reg)};
+        Location[] base = new Location[]{Location.register(baseReg)};
+        int[] size = new int[]{kind.getSizeInBytes()};
+
+        test(new HotSpotReferenceMap(oops, base, size, 8));
+    }
+}

--- a/hotspot/test/compiler/jvmci/events/JvmciNotifyInstallEventTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/jvmci/events/JvmciNotifyInstallEventTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -111,6 +111,8 @@
         Asserts.assertEQ(gotInstallNotification, 1,
                 "Got unexpected event count after 1st install attempt");
         // since "empty" compilation result is ok, a second attempt should be ok
+        compResult = new CompilationResult(METHOD_NAME); // create another instance with fresh state
+        compResult.setTotalFrameSize(0);
         codeCache.installCode(compRequest, compResult, /* installedCode = */ null, /* speculationLog = */ null,
                 /* isDefault = */ false);
         Asserts.assertEQ(gotInstallNotification, 2,

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/loopopts/superword/TestBestAlign.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2015 SAP AG.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8141624
+ * @summary Limit calculation of pre loop during super word optimization is wrong
+ * @run main/othervm TestBestAlign
+ * @author gunter.haug@sap.com
+ */
+
+public class TestBestAlign {
+
+    static final int initVal = -1;
+    static int intArray [];
+    static boolean boolArray[];
+    static int limit;
+    static public void clear() {
+        for (int i = 0; i < limit; i++) {
+            boolArray[1] = true;
+            intArray[i] = initVal;
+            boolArray[2] = true;
+        }
+    }
+
+    public static void main(String argv[]) throws Exception {
+        limit = 64;
+        boolArray = new boolean[8];
+        intArray = new int[limit + 4];
+        for (int i = 0; i < 10000000; ++i) {
+            if(i % 1000000 == 0)
+                System.out.println(i);
+            clear();
+        }
+    }
+}

--- a/hotspot/test/compiler/rangechecks/TestExplicitRangeChecks.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/rangechecks/TestExplicitRangeChecks.java	Tue Nov 24 10:30:23 2015 +0100
@@ -25,7 +25,7 @@
  * @test
  * @bug 8073480
  * @summary explicit range checks should be recognized by C2
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @build  TestExplicitRangeChecks
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  * @run main ClassFileInstaller jdk.test.lib.Platform
@@ -41,6 +41,7 @@
 import sun.hotspot.code.NMethod;
 import jdk.test.lib.Platform;
 import sun.misc.Unsafe;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class TestExplicitRangeChecks {

--- a/hotspot/test/compiler/rangechecks/TestRangeCheckSmearing.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/rangechecks/TestRangeCheckSmearing.java	Tue Nov 24 10:30:23 2015 +0100
@@ -25,7 +25,7 @@
  * @test
  * @bug 8066103
  * @summary C2's range check smearing allows out of bound array accesses
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.base/sun.misc
  *          java.management
  * @build TestRangeCheckSmearing
@@ -42,6 +42,7 @@
 import sun.hotspot.WhiteBox;
 import sun.hotspot.code.NMethod;
 import jdk.test.lib.Platform;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class TestRangeCheckSmearing {
     private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/rangechecks/TestUncommonTrapMerging.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8140574
+ * @summary Verify proper re-execution of checks after merging of uncommon traps
+ * @run main/othervm -Xcomp -XX:-TieredCompilation -XX:CompileCommand=compileonly,TestUncommonTrapMerging::test* TestUncommonTrapMerging Test1
+ * @run main/othervm -XX:CompileCommand=compileonly,TestUncommonTrapMerging::test* TestUncommonTrapMerging Test2
+ */
+public class TestUncommonTrapMerging {
+
+    public static void main(String[] args) throws Throwable {
+        if (args.length < 1) {
+            throw new RuntimeException("Not enough arguments!");
+        }
+        TestUncommonTrapMerging mytest = new TestUncommonTrapMerging();
+        String testcase = args[0];
+        if (testcase.equals("Test1")) {
+            try {
+                // '42' should hit the 'arg > 0' check
+                mytest.test(42);
+
+            } catch (OutOfMemoryError e) {
+                // expected
+            }
+        } else if (testcase.equals("Test2")) {
+            // Compile test2 with uncommon traps at path 1 and path 2
+            for (int i = 0; i < 100_000; i++) {
+                mytest.test2(-1, 0);
+            }
+
+            // Compile test3 which inlines test2 with uncommon traps at
+            // path 1 and path 2. Because test3 always passes 'value = 1',
+            // C2 will remove the 'value > 0' check and then merge the two
+            // uncommon traps.
+            for (int i = 0; i < 100_000; i++) {
+                mytest.test3(0);
+            }
+
+            // This should return through path 2
+            if (!mytest.test3(42)) {
+                throw new RuntimeException("test2 returned through wrong path!");
+            }
+        }
+    }
+
+    public void test(int arg) throws Throwable {
+        // The following two checks should not be merged if the
+        // uncommon trap of the dominating if has 'Reason_unloaded'
+        // because we need to re-execute both checks after deopt.
+        if (arg < 0) {
+            throw new RuntimeException("Should not reach here");
+        } else if (arg > 0) {
+            throw new OutOfMemoryError();
+        }
+        throw new RuntimeException("Should not reach here");
+    }
+
+    public boolean test2(int arg, int value) {
+        if (arg < 0) {
+            if (value > 0) {
+                // path 1
+                return false;
+            }
+        } else if (arg > 0) {
+            // path 2
+            return true;
+        }
+        // path 3
+        return false;
+    }
+
+    public boolean test3(int arg) {
+        int i;
+        for (i = 0; i < 1; ++i) { }
+        // i == 1
+        return test2(arg, i);
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/stable/TestStableMemoryBarrier.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestStableMemoryBarrier
+ * @bug 8139758
+ * @summary tests memory barrier correctly inserted for stable fields
+ * @library /testlibrary /../../test/lib
+ *
+ * @run main/bootclasspath -Xcomp -XX:CompileOnly=::testCompile
+ *                   java.lang.invoke.TestStableMemoryBarrier
+ *
+ * @author hui.shi@linaro.org
+ */
+package java.lang.invoke;
+
+import java.lang.reflect.InvocationTargetException;
+
+public class TestStableMemoryBarrier {
+
+    public static void main(String[] args) throws Exception {
+        run(NotDominate.class);
+
+    }
+
+    /* ====================================================
+     * Stable field initialized in method, but its allocation
+     * doesn't dominate MemBar Release at the end of method.
+     */
+
+    static class NotDominate{
+        public @Stable int v;
+        public static int[] array = new int[100];
+        public static NotDominate testCompile(int n) {
+           if ((n % 2) == 0) return null;
+           // add a loop here, trigger PhaseIdealLoop::verify_dominance
+           for (int i = 0; i < 100; i++) {
+              array[i] = n;
+           }
+           NotDominate nm = new NotDominate();
+           nm.v = n;
+           return nm;
+        }
+
+        public static void test() throws Exception {
+           for (int i = 0; i < 1000000; i++)
+               testCompile(i);
+        }
+    }
+
+    public static void run(Class<?> test) {
+        Throwable ex = null;
+        System.out.print(test.getName()+": ");
+        try {
+            test.getMethod("test").invoke(null);
+        } catch (InvocationTargetException e) {
+            ex = e.getCause();
+        } catch (Throwable e) {
+            ex = e;
+        } finally {
+            if (ex == null) {
+                System.out.println("PASSED");
+            } else {
+                System.out.println("FAILED");
+                ex.printStackTrace(System.out);
+            }
+        }
+    }
+}

--- a/hotspot/test/compiler/tiered/CompLevelsTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/tiered/CompLevelsTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -26,6 +26,9 @@
  *
  * @author igor.ignatyev@oracle.com
  */
+
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 public abstract class CompLevelsTest extends CompilerWhiteBoxTest {
     protected CompLevelsTest(TestCase testCase) {
         super(testCase);

--- a/hotspot/test/compiler/tiered/ConstantGettersTransitionsTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/tiered/ConstantGettersTransitionsTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -23,10 +23,11 @@
 
 import java.lang.reflect.Executable;
 import java.util.concurrent.Callable;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /**
  * @test ConstantGettersTransitionsTest
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.base/sun.misc
  *          java.management
  * @build TransitionsTestExecutor ConstantGettersTransitionsTest

--- a/hotspot/test/compiler/tiered/LevelTransitionTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/tiered/LevelTransitionTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -25,10 +25,12 @@
 import java.lang.reflect.Method;
 import java.util.Objects;
 import java.util.concurrent.Callable;
+import compiler.whitebox.CompilerWhiteBoxTest;
+import compiler.whitebox.SimpleTestCase;
 
 /**
  * @test LevelTransitionTest
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.base/sun.misc
  *          java.management
  * @ignore 8067651
@@ -36,7 +38,7 @@
  * @run main ClassFileInstaller sun.hotspot.WhiteBox sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main/othervm/timeout=240 -Xmixed -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+WhiteBoxAPI -XX:+TieredCompilation
- *                   -XX:CompileCommand=compileonly,SimpleTestCase$Helper::*
+ *                   -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::*
  *                   -XX:CompileCommand=compileonly,ExtendedTestCase$CompileMethodHolder::*
  *                   TransitionsTestExecutor LevelTransitionTest
  * @summary Test the correctness of compilation level transitions for different methods

--- a/hotspot/test/compiler/tiered/NonTieredLevelsTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/tiered/NonTieredLevelsTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -22,17 +22,18 @@
  */
 
 import java.util.function.IntPredicate;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /**
  * @test NonTieredLevelsTest
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.management
  * @build NonTieredLevelsTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main/othervm -Xbootclasspath/a:. -XX:-TieredCompilation
  *                   -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
- *                   -XX:CompileCommand=compileonly,SimpleTestCase$Helper::*
+ *                   -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::*
  *                   NonTieredLevelsTest
  * @summary Verify that only one level can be used
  * @author igor.ignatyev@oracle.com

--- a/hotspot/test/compiler/tiered/TieredLevelsTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/tiered/TieredLevelsTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -21,16 +21,18 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /**
  * @test TieredLevelsTest
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.management
  * @build TieredLevelsTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main/othervm -Xbootclasspath/a:. -XX:+TieredCompilation
  *                   -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
- *                   -XX:CompileCommand=compileonly,SimpleTestCase$Helper::*
+ *                   -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::*
  *                   TieredLevelsTest
  * @summary Verify that all levels &lt; 'TieredStopAtLevel' can be used
  * @author igor.ignatyev@oracle.com

--- a/hotspot/test/compiler/tiered/TransitionsTestExecutor.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/tiered/TransitionsTestExecutor.java	Tue Nov 24 10:30:23 2015 +0100
@@ -29,6 +29,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /**
  * Executes given test in a separate VM with enabled Tiered Compilation for

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/types/TestMeetIncompatibleInterfaceArrays.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,353 @@
+/*
+ * Copyright 2015 SAP AG.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8141551
+ * @summary C2 can not handle returns with inccompatible interface arrays
+ * @modules java.base/jdk.internal.org.objectweb.asm
+ *          java.base/sun.misc
+ * @library /testlibrary /../../test/lib
+ * @build sun.hotspot.WhiteBox
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm
+ *        -Xbootclasspath/a:.
+ *        -XX:+UnlockDiagnosticVMOptions
+ *        -XX:+WhiteBoxAPI
+ *        -Xbatch
+ *        -XX:CompileThreshold=1
+ *        -XX:-TieredCompilation
+ *        -XX:CICompilerCount=1
+ *        -XX:+PrintCompilation
+ *        -XX:+PrintInlining
+ *        -XX:CompileCommand=compileonly,MeetIncompatibleInterfaceArrays*.run
+ *        -XX:CompileCommand=dontinline,TestMeetIncompatibleInterfaceArrays$Helper.createI2*
+ *        -XX:CompileCommand=quiet
+ *        TestMeetIncompatibleInterfaceArrays 0
+ * @run main/othervm
+ *        -Xbootclasspath/a:.
+ *        -XX:+UnlockDiagnosticVMOptions
+ *        -XX:+WhiteBoxAPI
+ *        -Xbatch
+ *        -XX:CompileThreshold=1
+ *        -XX:-TieredCompilation
+ *        -XX:CICompilerCount=1
+ *        -XX:+PrintCompilation
+ *        -XX:+PrintInlining
+ *        -XX:CompileCommand=compileonly,MeetIncompatibleInterfaceArrays*.run
+ *        -XX:CompileCommand=inline,TestMeetIncompatibleInterfaceArrays$Helper.createI2*
+ *        -XX:CompileCommand=quiet
+ *        TestMeetIncompatibleInterfaceArrays 1
+ * @run main/othervm
+ *        -Xbootclasspath/a:.
+ *        -XX:+UnlockDiagnosticVMOptions
+ *        -XX:+WhiteBoxAPI
+ *        -Xbatch
+ *        -XX:CompileThreshold=1
+ *        -XX:Tier0InvokeNotifyFreqLog=0 -XX:Tier2InvokeNotifyFreqLog=0 -XX:Tier3InvokeNotifyFreqLog=0 -XX:Tier23InlineeNotifyFreqLog=0
+ *        -XX:Tier3InvocationThreshold=2 -XX:Tier3MinInvocationThreshold=2 -XX:Tier3CompileThreshold=2
+ *        -XX:Tier4InvocationThreshold=1 -XX:Tier4MinInvocationThreshold=1 -XX:Tier4CompileThreshold=1
+ *        -XX:+TieredCompilation
+ *        -XX:CICompilerCount=2
+ *        -XX:+PrintCompilation
+ *        -XX:+PrintInlining
+ *        -XX:CompileCommand=compileonly,MeetIncompatibleInterfaceArrays*.run
+ *        -XX:CompileCommand=compileonly,TestMeetIncompatibleInterfaceArrays$Helper.createI2*
+ *        -XX:CompileCommand=inline,TestMeetIncompatibleInterfaceArrays$Helper.createI2*
+ *        -XX:CompileCommand=quiet
+ *        TestMeetIncompatibleInterfaceArrays 2
+ *
+ * @author volker.simonis@gmail.com
+ */
+
+import java.io.FileOutputStream;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import jdk.internal.org.objectweb.asm.ClassWriter;
+import jdk.internal.org.objectweb.asm.MethodVisitor;
+import static jdk.internal.org.objectweb.asm.Opcodes.*;
+import sun.hotspot.WhiteBox;
+
+public class TestMeetIncompatibleInterfaceArrays extends ClassLoader {
+
+    private static final WhiteBox WB = WhiteBox.getWhiteBox();
+
+    public static interface I1 { public String getName(); }
+    public static interface I2 { public String getName(); }
+    public static class I2C implements I2 { public String getName() { return "I2";} }
+    public static class I21C implements I2, I1 { public String getName() { return "I2 and I1";} }
+
+    public static class Helper {
+        public static I2 createI2Array0() {
+            return new I2C();
+        }
+        public static I2[] createI2Array1() {
+            return new I2C[] { new I2C() };
+        }
+        public static I2[][] createI2Array2() {
+            return new I2C[][] { new I2C[] { new I2C() } };
+        }
+        public static I2[][][] createI2Array3() {
+            return new I2C[][][] { new I2C[][] { new I2C[] { new I2C() } } };
+        }
+        public static I2[][][][] createI2Array4() {
+            return new I2C[][][][] { new I2C[][][] { new I2C[][] { new I2C[] { new I2C() } } } };
+        }
+        public static I2[][][][][] createI2Array5() {
+            return new I2C[][][][][] { new I2C[][][][] { new I2C[][][] { new I2C[][] { new I2C[] { new I2C() } } } } };
+        }
+        public static I2 createI21Array0() {
+            return new I21C();
+        }
+        public static I2[] createI21Array1() {
+            return new I21C[] { new I21C() };
+        }
+        public static I2[][] createI21Array2() {
+            return new I21C[][] { new I21C[] { new I21C() } };
+        }
+        public static I2[][][] createI21Array3() {
+            return new I21C[][][] { new I21C[][] { new I21C[] { new I21C() } } };
+        }
+        public static I2[][][][] createI21Array4() {
+            return new I21C[][][][] { new I21C[][][] { new I21C[][] { new I21C[] { new I21C() } } } };
+        }
+        public static I2[][][][][] createI21Array5() {
+            return new I21C[][][][][] { new I21C[][][][] { new I21C[][][] { new I21C[][] { new I21C[] { new I21C() } } } } };
+        }
+    }
+
+    // Location for the generated class files
+    public static final String PATH = System.getProperty("test.classes", ".") + java.io.File.separator;
+
+    /*
+     * With 'good == false' this helper method creates the following classes
+     * (using the nested 'Helper' class and the nested interfaces 'I1' and 'I2').
+     * For brevity I omit the enclosing class 'TestMeetIncompatibleInterfaceArrays' in the
+     * following examples:
+     *
+     * public class MeetIncompatibleInterfaceArrays0ASM {
+     *   public static I1 run() {
+     *     return Helper.createI2Array0(); // returns I2
+     *   }
+     *   public static void test() {
+     *     I1 i1 = run();
+     *     System.out.println(i1.getName());
+     *   }
+     * }
+     * public class MeetIncompatibleInterfaceArrays1ASM {
+     *   public static I1[] run() {
+     *     return Helper.createI2Array1(); // returns I2[]
+     *   }
+     *   public static void test() {
+     *     I1[] i1 = run();
+     *     System.out.println(i1[0].getName());
+     *   }
+     * }
+     * ...
+     * // MeetIncompatibleInterfaceArrays4ASM is special because it creates
+     * // an illegal class which will be rejected by the verifier.
+     * public class MeetIncompatibleInterfaceArrays4ASM {
+     *   public static I1[][][][] run() {
+     *     return Helper.createI2Array3(); // returns I1[][][] which gives a verifier error because return expects I1[][][][]
+     *   }
+     *   public static void test() {
+     *     I1[][][][][] i1 = run();
+     *     System.out.println(i1[0][0][0][0][0].getName());
+     *   }
+     * ...
+     * public class MeetIncompatibleInterfaceArrays5ASM {
+     *   public static I1[][][][][] run() {
+     *     return Helper.createI2Array5(); // returns I2[][][][][]
+     *   }
+     *   public static void test() {
+     *     I1[][][][][] i1 = run();
+     *     System.out.println(i1[0][0][0][0][0].getName());
+     *   }
+     * }
+     *
+     * Notice that this is not legal Java code. We would have to use a cast in "run()" to make it legal:
+     *
+     *   public static I1[] run() {
+     *     return (I1[])Helper.createI2Array1(); // returns I2[]
+     *   }
+     *
+     * But in pure bytecode, the "run()" methods are perfectly legal:
+     *
+     *   public static I1[] run();
+     *     Code:
+     *       0: invokestatic  #16  // Method Helper.createI2Array1:()[LI2;
+     *       3: areturn
+     *
+     * The "test()" method calls the "getName()" function from I1 on the objects returned by "run()".
+     * This will epectedly fail with an "IncompatibleClassChangeError" because the objects returned
+     * by "run()" (and by createI2Array()) are actually of type "I2C" and only implement "I2" but not "I1".
+     *
+     *
+     * With 'good == true' this helper method will create the following classes:
+     *
+     * public class MeetIncompatibleInterfaceArraysGood0ASM {
+     *   public static I1 run() {
+     *     return Helper.createI21Array0(); // returns I2
+     *   }
+     *   public static void test() {
+     *     I1 i1 = run();
+     *     System.out.println(i1.getName());
+     *   }
+     * }
+     *
+     * Calling "test()" on these objects will succeed and output "I2 and I1" because now the "run()"
+     * method calls "createI21Array()" which actually return an object (or an array of objects) of
+     * type "I21C" which implements both "I2" and "I1".
+     *
+     * Notice that at the bytecode level, the code for the "run()" and "test()" methods in
+     * "MeetIncompatibleInterfaceArraysASM" and "MeetIncompatibleInterfaceArraysGoodASM" look exactly
+     * the same. I.e. the verifier has no chance to verify if the I2 object returned by "createI1Array()"
+     * or "createI21Array()" implements "I1" or not. That's actually the reason why both versions of
+     * generated classes are legal from a verifier point of view.
+     *
+     */
+    static void generateTestClass(int dim, boolean good) throws Exception {
+        String baseClassName = "MeetIncompatibleInterfaceArrays";
+        if (good)
+            baseClassName += "Good";
+        String createName = "createI2" + (good ? "1" : "") + "Array";
+        String a = "";
+        for (int i = 0; i < dim; i++)
+            a += "[";
+        ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES);
+        cw.visit(V1_8, ACC_PUBLIC, baseClassName + dim + "ASM", null, "java/lang/Object", null);
+        MethodVisitor constr = cw.visitMethod(ACC_PUBLIC, "<init>", "()V", null, null);
+        constr.visitCode();
+        constr.visitVarInsn(ALOAD, 0);
+        constr.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V", false);
+        constr.visitInsn(RETURN);
+        constr.visitMaxs(0, 0);
+        constr.visitEnd();
+        MethodVisitor run = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, "run",
+                "()" + a + "LTestMeetIncompatibleInterfaceArrays$I1;", null, null);
+        run.visitCode();
+        if (dim == 4) {
+            run.visitMethodInsn(INVOKESTATIC, "TestMeetIncompatibleInterfaceArrays$Helper", createName + 3,
+                    "()" + "[[[" + "LTestMeetIncompatibleInterfaceArrays$I2;", false);
+        } else {
+            run.visitMethodInsn(INVOKESTATIC, "TestMeetIncompatibleInterfaceArrays$Helper", createName + dim,
+                    "()" + a + "LTestMeetIncompatibleInterfaceArrays$I2;", false);
+        }
+        run.visitInsn(ARETURN);
+        run.visitMaxs(0, 0);
+        run.visitEnd();
+        MethodVisitor test = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, "test", "()V", null, null);
+        test.visitCode();
+        test.visitMethodInsn(INVOKESTATIC, baseClassName + dim + "ASM", "run",
+                "()" + a + "LTestMeetIncompatibleInterfaceArrays$I1;", false);
+        test.visitVarInsn(ASTORE, 0);
+        if (dim > 0) {
+            test.visitVarInsn(ALOAD, 0);
+            for (int i = 1; i <= dim; i++) {
+                test.visitInsn(ICONST_0);
+                test.visitInsn(AALOAD);
+            }
+            test.visitVarInsn(ASTORE, 1);
+        }
+        test.visitFieldInsn(GETSTATIC, "java/lang/System", "out", "Ljava/io/PrintStream;");
+        test.visitVarInsn(ALOAD, dim > 0 ? 1 : 0);
+        test.visitMethodInsn(INVOKEINTERFACE, "TestMeetIncompatibleInterfaceArrays$I1", "getName",
+                "()Ljava/lang/String;", true);
+        test.visitMethodInsn(INVOKEVIRTUAL, "java/io/PrintStream", "println", "(Ljava/lang/Object;)V", false);
+        test.visitInsn(RETURN);
+        test.visitMaxs(0, 0);
+        test.visitEnd();
+
+        // Get the bytes of the class..
+        byte[] b = cw.toByteArray();
+        // ..and write them into a class file (for debugging)
+        FileOutputStream fos = new FileOutputStream(PATH + baseClassName + dim + "ASM.class");
+        fos.write(b);
+        fos.close();
+
+    }
+
+    public static String[][] tier = { { "interpreted", "C2 (tier 4) without inlining", "C2 (tier4) without inlining" },
+            { "interpreted", "C2 (tier 4) with inlining", "C2 (tier4) with inlining" },
+            { "interpreted", "C1 (tier 3) with inlining", "C2 (tier4) with inlining" } };
+
+    public static void main(String[] args) throws Exception {
+        final int pass = Integer.parseInt(args.length > 0 ? args[0] : "0");
+
+        // Load and initialize some classes required for compilation
+        Class.forName("TestMeetIncompatibleInterfaceArrays$I1");
+        Class.forName("TestMeetIncompatibleInterfaceArrays$I2");
+        Class.forName("TestMeetIncompatibleInterfaceArrays$Helper");
+
+        for (int g = 0; g < 2; g++) {
+            String baseClassName = "MeetIncompatibleInterfaceArrays";
+            boolean good = (g == 0) ? false : true;
+            if (good)
+                baseClassName += "Good";
+            for (int i = 0; i < 6; i++) {
+                System.out.println();
+                System.out.println("Creating " + baseClassName + i + "ASM.class");
+                System.out.println("========================================" + "=" + "=========");
+                // Create the "MeetIncompatibleInterfaceArrays<i>ASM" class
+                generateTestClass(i, good);
+                Class<?> c = null;
+                try {
+                    c = Class.forName(baseClassName + i + "ASM");
+                } catch (VerifyError ve) {
+                    if (i == 4) {
+                        System.out.println("OK - must be (" + ve.getMessage() + ").");
+                    } else {
+                        throw ve;
+                    }
+                    continue;
+                }
+                // Call MeetIncompatibleInterfaceArrays<i>ASM.test()
+                Method m = c.getMethod("test");
+                Method r = c.getMethod("run");
+                for (int j = 0; j < 3; j++) {
+                    System.out.println((j + 1) + ". invokation of " + baseClassName + i + "ASM.test() [should be "
+                            + tier[pass][j] + "]");
+                    try {
+                        m.invoke(null);
+                    } catch (InvocationTargetException ite) {
+                        if (good) {
+                            throw ite;
+                        } else {
+                            if (ite.getCause() instanceof IncompatibleClassChangeError) {
+                                System.out.println("  OK - catched InvocationTargetException("
+                                        + ite.getCause().getMessage() + ").");
+                            } else {
+                                throw ite;
+                            }
+                        }
+                    }
+                }
+                System.out.println("Method " + r + (WB.isMethodCompiled(r) ? " has" : " has not") + " been compiled.");
+                if (!WB.isMethodCompiled(r)) {
+                    throw new Exception("Method " + r + " must be compiled!");
+                }
+            }
+        }
+    }
+}

--- a/hotspot/test/compiler/whitebox/ClearMethodStateTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/ClearMethodStateTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -22,16 +22,17 @@
  */
 
 import java.util.function.Function;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /*
  * @test ClearMethodStateTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build ClearMethodStateTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* ClearMethodStateTest
+ * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* ClearMethodStateTest
  * @summary testing of WB::clearMethodState()
  * @author igor.ignatyev@oracle.com
  */

--- a/hotspot/test/compiler/whitebox/CompilerWhiteBoxTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/CompilerWhiteBoxTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -20,13 +20,11 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
+package compiler.whitebox;
 
 import sun.hotspot.WhiteBox;
 import sun.hotspot.code.NMethod;
-
-import java.lang.reflect.Constructor;
 import java.lang.reflect.Executable;
-import java.lang.reflect.Method;
 import java.util.Objects;
 import java.util.concurrent.Callable;
 import java.util.function.Function;
@@ -38,19 +36,19 @@
  */
 public abstract class CompilerWhiteBoxTest {
     /** {@code CompLevel::CompLevel_none} -- Interpreter */
-    protected static final int COMP_LEVEL_NONE = 0;
+    public static final int COMP_LEVEL_NONE = 0;
     /** {@code CompLevel::CompLevel_any}, {@code CompLevel::CompLevel_all} */
-    protected static final int COMP_LEVEL_ANY = -1;
+    public static final int COMP_LEVEL_ANY = -1;
     /** {@code CompLevel::CompLevel_simple} -- C1 */
-    protected static final int COMP_LEVEL_SIMPLE = 1;
+    public static final int COMP_LEVEL_SIMPLE = 1;
     /** {@code CompLevel::CompLevel_limited_profile} -- C1, invocation &amp; backedge counters */
-    protected static final int COMP_LEVEL_LIMITED_PROFILE = 2;
+    public static final int COMP_LEVEL_LIMITED_PROFILE = 2;
     /** {@code CompLevel::CompLevel_full_profile} -- C1, invocation &amp; backedge counters + mdo */
-    protected static final int COMP_LEVEL_FULL_PROFILE = 3;
+    public static final int COMP_LEVEL_FULL_PROFILE = 3;
     /** {@code CompLevel::CompLevel_full_optimization} -- C2 or Shark */
-    protected static final int COMP_LEVEL_FULL_OPTIMIZATION = 4;
+    public static final int COMP_LEVEL_FULL_OPTIMIZATION = 4;
     /** Maximal value for CompLevel */
-    protected static final int COMP_LEVEL_MAX = COMP_LEVEL_FULL_OPTIMIZATION;
+    public static final int COMP_LEVEL_MAX = COMP_LEVEL_FULL_OPTIMIZATION;
 
     /** Instance of WhiteBox */
     protected static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
@@ -70,7 +68,7 @@
     protected static final boolean IS_VERBOSE
             = System.getProperty("verbose") != null;
     /** invocation count to trigger compilation */
-    protected static final int THRESHOLD;
+    public static final int THRESHOLD;
     /** invocation count to trigger OSR compilation */
     protected static final long BACKEDGE_THRESHOLD;
     /** Value of {@code java.vm.info} (interpreted|mixed|comp mode) */
@@ -312,7 +310,7 @@
      *
      * @param executable Executable
      */
-    protected static final void waitBackgroundCompilation(Executable executable) {
+    public static final void waitBackgroundCompilation(Executable executable) {
         if (!BACKGROUND_COMPILATION) {
             return;
         }
@@ -441,7 +439,7 @@
      * @return {@code true} if the test should be skipped,
      *         {@code false} otherwise
      */
-    protected static boolean skipOnTieredCompilation(boolean value) {
+    public static boolean skipOnTieredCompilation(boolean value) {
         if (value == CompilerWhiteBoxTest.TIERED_COMPILATION) {
             System.err.println("Test isn't applicable w/ "
                     + (value ? "enabled" : "disabled")
@@ -452,256 +450,3 @@
     }
 }
 
-enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase {
-    /** constructor test case */
-    CONSTRUCTOR_TEST(Helper.CONSTRUCTOR, Helper.CONSTRUCTOR_CALLABLE, false),
-    /** method test case */
-    METHOD_TEST(Helper.METHOD, Helper.METHOD_CALLABLE, false),
-    /** static method test case */
-    STATIC_TEST(Helper.STATIC, Helper.STATIC_CALLABLE, false),
-    /** OSR constructor test case */
-    OSR_CONSTRUCTOR_TEST(Helper.OSR_CONSTRUCTOR,
-            Helper.OSR_CONSTRUCTOR_CALLABLE, true),
-    /** OSR method test case */
-    OSR_METHOD_TEST(Helper.OSR_METHOD, Helper.OSR_METHOD_CALLABLE, true),
-    /** OSR static method test case */
-    OSR_STATIC_TEST(Helper.OSR_STATIC, Helper.OSR_STATIC_CALLABLE, true);
-
-    private final Executable executable;
-    private final Callable<Integer> callable;
-    private final boolean isOsr;
-
-    private SimpleTestCase(Executable executable, Callable<Integer> callable,
-            boolean isOsr) {
-        this.executable = executable;
-        this.callable = callable;
-        this.isOsr = isOsr;
-    }
-
-    @Override
-    public Executable getExecutable() {
-        return executable;
-    }
-
-    @Override
-    public Callable<Integer> getCallable() {
-        return callable;
-    }
-
-    @Override
-    public boolean isOsr() {
-        return isOsr;
-    }
-
-    private static class Helper {
-
-        private static final Callable<Integer> CONSTRUCTOR_CALLABLE
-                = new Callable<Integer>() {
-            @Override
-            public Integer call() throws Exception {
-                return new Helper(1337).hashCode();
-            }
-        };
-
-        private static final Callable<Integer> METHOD_CALLABLE
-                = new Callable<Integer>() {
-            private final Helper helper = new Helper();
-
-            @Override
-            public Integer call() throws Exception {
-                return helper.method();
-            }
-        };
-
-        private static final Callable<Integer> STATIC_CALLABLE
-                = new Callable<Integer>() {
-            @Override
-            public Integer call() throws Exception {
-                return staticMethod();
-            }
-        };
-
-        private static final Callable<Integer> OSR_CONSTRUCTOR_CALLABLE
-                = new Callable<Integer>() {
-            @Override
-            public Integer call() throws Exception {
-                return new Helper(null, CompilerWhiteBoxTest.BACKEDGE_THRESHOLD).hashCode();
-            }
-        };
-
-        private static final Callable<Integer> OSR_METHOD_CALLABLE
-                = new Callable<Integer>() {
-            private final Helper helper = new Helper();
-
-            @Override
-            public Integer call() throws Exception {
-                return helper.osrMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD);
-            }
-        };
-
-        private static final Callable<Integer> OSR_STATIC_CALLABLE
-                = new Callable<Integer>() {
-            @Override
-            public Integer call() throws Exception {
-                return osrStaticMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD);
-            }
-        };
-
-        private static final Constructor CONSTRUCTOR;
-        private static final Constructor OSR_CONSTRUCTOR;
-        private static final Method METHOD;
-        private static final Method STATIC;
-        private static final Method OSR_METHOD;
-        private static final Method OSR_STATIC;
-
-        static {
-            try {
-                CONSTRUCTOR = Helper.class.getDeclaredConstructor(int.class);
-            } catch (NoSuchMethodException | SecurityException e) {
-                throw new RuntimeException(
-                        "exception on getting method Helper.<init>(int)", e);
-            }
-            try {
-                OSR_CONSTRUCTOR = Helper.class.getDeclaredConstructor(
-                        Object.class, long.class);
-            } catch (NoSuchMethodException | SecurityException e) {
-                throw new RuntimeException(
-                        "exception on getting method Helper.<init>(Object, long)", e);
-            }
-            METHOD = getMethod("method");
-            STATIC = getMethod("staticMethod");
-            OSR_METHOD = getMethod("osrMethod", long.class);
-            OSR_STATIC = getMethod("osrStaticMethod", long.class);
-        }
-
-        private static Method getMethod(String name, Class<?>... parameterTypes) {
-            try {
-                return Helper.class.getDeclaredMethod(name, parameterTypes);
-            } catch (NoSuchMethodException | SecurityException e) {
-                throw new RuntimeException(
-                        "exception on getting method Helper." + name, e);
-            }
-        }
-
-        private static int staticMethod() {
-            return 1138;
-        }
-
-        private int method() {
-            return 42;
-        }
-
-        /**
-         * Deoptimizes all non-osr versions of the given executable after
-         * compilation finished.
-         *
-         * @param e Executable
-         * @throws Exception
-         */
-        private static void waitAndDeoptimize(Executable e) {
-            CompilerWhiteBoxTest.waitBackgroundCompilation(e);
-            if (WhiteBox.getWhiteBox().isMethodQueuedForCompilation(e)) {
-                throw new RuntimeException(e + " must not be in queue");
-            }
-            // Deoptimize non-osr versions of executable
-            WhiteBox.getWhiteBox().deoptimizeMethod(e, false);
-        }
-
-        /**
-         * Executes the method multiple times to make sure we have
-         * enough profiling information before triggering an OSR
-         * compilation. Otherwise the C2 compiler may add uncommon traps.
-         *
-         * @param m Method to be executed
-         * @return Number of times the method was executed
-         * @throws Exception
-         */
-        private static int warmup(Method m) throws Exception {
-            waitAndDeoptimize(m);
-            Helper helper = new Helper();
-            int result = 0;
-            for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) {
-                result += (int)m.invoke(helper, 1);
-            }
-            // Wait to make sure OSR compilation is not blocked by
-            // non-OSR compilation in the compile queue
-            CompilerWhiteBoxTest.waitBackgroundCompilation(m);
-            return result;
-        }
-
-        /**
-         * Executes the constructor multiple times to make sure we
-         * have enough profiling information before triggering an OSR
-         * compilation. Otherwise the C2 compiler may add uncommon traps.
-         *
-         * @param c Constructor to be executed
-         * @return Number of times the constructor was executed
-         * @throws Exception
-         */
-        private static int warmup(Constructor c) throws Exception {
-            waitAndDeoptimize(c);
-            int result = 0;
-            for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) {
-                result += c.newInstance(null, 1).hashCode();
-            }
-            // Wait to make sure OSR compilation is not blocked by
-            // non-OSR compilation in the compile queue
-            CompilerWhiteBoxTest.waitBackgroundCompilation(c);
-            return result;
-        }
-
-        private static int osrStaticMethod(long limit) throws Exception {
-            int result = 0;
-            if (limit != 1) {
-                result = warmup(OSR_STATIC);
-            }
-            // Trigger osr compilation
-            for (long i = 0; i < limit; ++i) {
-                result += staticMethod();
-            }
-            return result;
-        }
-
-        private int osrMethod(long limit) throws Exception {
-            int result = 0;
-            if (limit != 1) {
-                result = warmup(OSR_METHOD);
-            }
-            // Trigger osr compilation
-            for (long i = 0; i < limit; ++i) {
-                result += method();
-            }
-            return result;
-        }
-
-        private final int x;
-
-        // for method and OSR method test case
-        public Helper() {
-            x = 0;
-        }
-
-        // for OSR constructor test case
-        private Helper(Object o, long limit) throws Exception {
-            int result = 0;
-            if (limit != 1) {
-                result = warmup(OSR_CONSTRUCTOR);
-            }
-            // Trigger osr compilation
-            for (long i = 0; i < limit; ++i) {
-                result += method();
-            }
-            x = result;
-        }
-
-        // for constructor test case
-        private Helper(int x) {
-            this.x = x;
-        }
-
-        @Override
-        public int hashCode() {
-            return x;
-        }
-    }
-}

--- a/hotspot/test/compiler/whitebox/DeoptimizeAllTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/DeoptimizeAllTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test DeoptimizeAllTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build DeoptimizeAllTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* DeoptimizeAllTest
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* DeoptimizeAllTest
  * @summary testing of WB::deoptimizeAll()
  * @author igor.ignatyev@oracle.com
  */

--- a/hotspot/test/compiler/whitebox/DeoptimizeFramesTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/DeoptimizeFramesTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -21,10 +21,12 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test DeoptimizeFramesTest
  * @bug 8028595
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build DeoptimizeFramesTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox

--- a/hotspot/test/compiler/whitebox/DeoptimizeMethodTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/DeoptimizeMethodTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test DeoptimizeMethodTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build DeoptimizeMethodTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* DeoptimizeMethodTest
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* DeoptimizeMethodTest
  * @summary testing of WB::deoptimizeMethod()
  * @author igor.ignatyev@oracle.com
  */

--- a/hotspot/test/compiler/whitebox/DeoptimizeMultipleOSRTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/DeoptimizeMultipleOSRTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,11 +24,12 @@
 import sun.hotspot.WhiteBox;
 import java.lang.reflect.Executable;
 import java.lang.reflect.Method;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /*
  * @test DeoptimizeMultipleOSRTest
  * @bug 8061817
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build DeoptimizeMultipleOSRTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox

--- a/hotspot/test/compiler/whitebox/EnqueueMethodForCompilationTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/EnqueueMethodForCompilationTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test EnqueueMethodForCompilationTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build EnqueueMethodForCompilationTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* EnqueueMethodForCompilationTest
+ * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* EnqueueMethodForCompilationTest
  * @summary testing of WB::enqueueMethodForCompilation()
  * @author igor.ignatyev@oracle.com
  */

--- a/hotspot/test/compiler/whitebox/ForceNMethodSweepTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/ForceNMethodSweepTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -30,18 +30,19 @@
 
 import jdk.test.lib.Asserts;
 import jdk.test.lib.InfiniteLoop;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /*
  * @test
  * @bug 8059624 8064669
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build ForceNMethodSweepTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:-TieredCompilation -XX:+WhiteBoxAPI
- *                   -XX:CompileCommand=compileonly,SimpleTestCase$Helper::*
+ *                   -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::*
  *                   -XX:-BackgroundCompilation ForceNMethodSweepTest
  * @summary testing of WB::forceNMethodSweep
  */

--- a/hotspot/test/compiler/whitebox/GetNMethodTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/GetNMethodTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -25,16 +25,17 @@
 import sun.hotspot.code.BlobType;
 import sun.hotspot.code.NMethod;
 import jdk.test.lib.Asserts;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /*
  * @test GetNMethodTest
  * @bug 8038240
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build GetNMethodTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* GetNMethodTest
+ * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* GetNMethodTest
  * @summary testing of WB::getNMethod()
  * @author igor.ignatyev@oracle.com
  */

--- a/hotspot/test/compiler/whitebox/IsMethodCompilableTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/IsMethodCompilableTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test IsMethodCompilableTest
  * @bug 8007270 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.base/sun.misc
  *          java.management
  * @build jdk.test.lib.* sun.hotspot.WhiteBox
@@ -32,12 +32,13 @@
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main ClassFileInstaller jdk.test.lib.Platform
- * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -Xmixed -XX:-TieredCompilation -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:PerMethodRecompilationCutoff=3 -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* IsMethodCompilableTest
+ * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -Xmixed -XX:-TieredCompilation -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:PerMethodRecompilationCutoff=3 -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* IsMethodCompilableTest
  * @summary testing of WB::isMethodCompilable()
  * @author igor.ignatyev@oracle.com
  */
 
 import jdk.test.lib.Platform;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class IsMethodCompilableTest extends CompilerWhiteBoxTest {
     /**

--- a/hotspot/test/compiler/whitebox/LockCompilationTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/LockCompilationTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -24,12 +24,12 @@
 /*
  * @test LockCompilationTest
  * @bug 8059624
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build LockCompilationTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* LockCompilationTest
+ * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* LockCompilationTest
  * @summary testing of WB::lock/unlockCompilation()
  */
 
@@ -37,7 +37,7 @@
 import java.io.PrintWriter;
 import java.util.concurrent.BrokenBarrierException;
 import java.util.concurrent.CyclicBarrier;
-
+import compiler.whitebox.CompilerWhiteBoxTest;
 import jdk.test.lib.Asserts;
 
 public class LockCompilationTest extends CompilerWhiteBoxTest {

--- a/hotspot/test/compiler/whitebox/MakeMethodNotCompilableTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/MakeMethodNotCompilableTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test MakeMethodNotCompilableTest
  * @bug 8012322 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build MakeMethodNotCompilableTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* MakeMethodNotCompilableTest
+ * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* MakeMethodNotCompilableTest
  * @summary testing of WB::makeMethodNotCompilable()
  * @author igor.ignatyev@oracle.com
  */

--- a/hotspot/test/compiler/whitebox/SetDontInlineMethodTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/SetDontInlineMethodTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test SetDontInlineMethodTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build SetDontInlineMethodTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* SetDontInlineMethodTest
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* SetDontInlineMethodTest
  * @summary testing of WB::testSetDontInlineMethod()
  * @author igor.ignatyev@oracle.com
  */

--- a/hotspot/test/compiler/whitebox/SetForceInlineMethodTest.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/compiler/whitebox/SetForceInlineMethodTest.java	Tue Nov 24 10:30:23 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test SetForceInlineMethodTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build SetForceInlineMethodTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* SetForceInlineMethodTest
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* SetForceInlineMethodTest
  * @summary testing of WB::testSetForceInlineMethod()
  * @author igor.ignatyev@oracle.com
  */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/whitebox/SimpleTestCase.java	Tue Nov 24 10:30:23 2015 +0100
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.whitebox;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Executable;
+import java.lang.reflect.Method;
+import java.util.concurrent.Callable;
+import sun.hotspot.WhiteBox;
+
+public enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase {
+    /** constructor test case */
+    CONSTRUCTOR_TEST(Helper.CONSTRUCTOR, Helper.CONSTRUCTOR_CALLABLE, false),
+    /** method test case */
+    METHOD_TEST(Helper.METHOD, Helper.METHOD_CALLABLE, false),
+    /** static method test case */
+    STATIC_TEST(Helper.STATIC, Helper.STATIC_CALLABLE, false),
+    /** OSR constructor test case */
+    OSR_CONSTRUCTOR_TEST(Helper.OSR_CONSTRUCTOR,
+            Helper.OSR_CONSTRUCTOR_CALLABLE, true),
+    /** OSR method test case */
+    OSR_METHOD_TEST(Helper.OSR_METHOD, Helper.OSR_METHOD_CALLABLE, true),
+    /** OSR static method test case */
+    OSR_STATIC_TEST(Helper.OSR_STATIC, Helper.OSR_STATIC_CALLABLE, true);
+
+    private final Executable executable;
+    private final Callable<Integer> callable;
+    private final boolean isOsr;
+
+    private SimpleTestCase(Executable executable, Callable<Integer> callable,
+            boolean isOsr) {
+        this.executable = executable;
+        this.callable = callable;
+        this.isOsr = isOsr;
+    }
+
+    @Override
+    public Executable getExecutable() {
+        return executable;
+    }
+
+    @Override
+    public Callable<Integer> getCallable() {
+        return callable;
+    }
+
+    @Override
+    public boolean isOsr() {
+        return isOsr;
+    }
+
+    private static class Helper {
+
+        private static final Callable<Integer> CONSTRUCTOR_CALLABLE
+                = new Callable<Integer>() {
+            @Override
+            public Integer call() throws Exception {
+                return new Helper(1337).hashCode();
+            }
+        };
+
+        private static final Callable<Integer> METHOD_CALLABLE
+                = new Callable<Integer>() {
+            private final Helper helper = new Helper();
+
+            @Override
+            public Integer call() throws Exception {
+                return helper.method();
+            }
+        };
+
+        private static final Callable<Integer> STATIC_CALLABLE
+                = new Callable<Integer>() {
+            @Override
+            public Integer call() throws Exception {
+                return staticMethod();
+            }
+        };
+
+        private static final Callable<Integer> OSR_CONSTRUCTOR_CALLABLE
+                = new Callable<Integer>() {
+            @Override
+            public Integer call() throws Exception {
+                return new Helper(null, CompilerWhiteBoxTest.BACKEDGE_THRESHOLD).hashCode();
+            }
+        };
+
+        private static final Callable<Integer> OSR_METHOD_CALLABLE
+                = new Callable<Integer>() {
+            private final Helper helper = new Helper();
+
+            @Override
+            public Integer call() throws Exception {
+                return helper.osrMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD);
+            }
+        };
+
+        private static final Callable<Integer> OSR_STATIC_CALLABLE
+                = new Callable<Integer>() {
+            @Override
+            public Integer call() throws Exception {
+                return osrStaticMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD);
+            }
+        };
+
+        private static final Constructor CONSTRUCTOR;
+        private static final Constructor OSR_CONSTRUCTOR;
+        private static final Method METHOD;
+        private static final Method STATIC;
+        private static final Method OSR_METHOD;
+        private static final Method OSR_STATIC;
+
+        static {
+            try {
+                CONSTRUCTOR = Helper.class.getDeclaredConstructor(int.class);
+            } catch (NoSuchMethodException | SecurityException e) {
+                throw new RuntimeException(
+                        "exception on getting method Helper.<init>(int)", e);
+            }
+            try {
+                OSR_CONSTRUCTOR = Helper.class.getDeclaredConstructor(
+                        Object.class, long.class);
+            } catch (NoSuchMethodException | SecurityException e) {
+                throw new RuntimeException(
+                        "exception on getting method Helper.<init>(Object, long)", e);
+            }
+            METHOD = getMethod("method");
+            STATIC = getMethod("staticMethod");
+            OSR_METHOD = getMethod("osrMethod", long.class);
+            OSR_STATIC = getMethod("osrStaticMethod", long.class);
+        }
+
+        private static Method getMethod(String name, Class<?>... parameterTypes) {
+            try {
+                return Helper.class.getDeclaredMethod(name, parameterTypes);
+            } catch (NoSuchMethodException | SecurityException e) {
+                throw new RuntimeException(
+                        "exception on getting method Helper." + name, e);
+            }
+        }
+
+        private static int staticMethod() {
+            return 1138;
+        }
+
+        private int method() {
+            return 42;
+        }
+
+        /**
+         * Deoptimizes all non-osr versions of the given executable after
+         * compilation finished.
+         *
+         * @param e Executable
+         * @throws Exception
+         */
+        private static void waitAndDeoptimize(Executable e) {
+            CompilerWhiteBoxTest.waitBackgroundCompilation(e);
+            if (WhiteBox.getWhiteBox().isMethodQueuedForCompilation(e)) {
+                throw new RuntimeException(e + " must not be in queue");
+            }
+            // Deoptimize non-osr versions of executable
+            WhiteBox.getWhiteBox().deoptimizeMethod(e, false);
+        }
+
+        /**
+         * Executes the method multiple times to make sure we have
+         * enough profiling information before triggering an OSR
+         * compilation. Otherwise the C2 compiler may add uncommon traps.
+         *
+         * @param m Method to be executed
+         * @return Number of times the method was executed
+         * @throws Exception
+         */
+        private static int warmup(Method m) throws Exception {
+            waitAndDeoptimize(m);
+            Helper helper = new Helper();
+            int result = 0;
+            for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) {
+                result += (int)m.invoke(helper, 1);
+            }
+            // Wait to make sure OSR compilation is not blocked by
+            // non-OSR compilation in the compile queue
+            CompilerWhiteBoxTest.waitBackgroundCompilation(m);
+            return result;
+        }
+
+        /**
+         * Executes the constructor multiple times to make sure we
+         * have enough profiling information before triggering an OSR
+         * compilation. Otherwise the C2 compiler may add uncommon traps.
+         *
+         * @param c Constructor to be executed
+         * @return Number of times the constructor was executed
+         * @throws Exception
+         */
+        private static int warmup(Constructor c) throws Exception {
+            waitAndDeoptimize(c);
+            int result = 0;
+            for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) {
+                result += c.newInstance(null, 1).hashCode();
+            }
+            // Wait to make sure OSR compilation is not blocked by
+            // non-OSR compilation in the compile queue
+            CompilerWhiteBoxTest.waitBackgroundCompilation(c);
+            return result;
+        }
+
+        private static int osrStaticMethod(long limit) throws Exception {
+            int result = 0;
+            if (limit != 1) {
+                result = warmup(OSR_STATIC);
+            }
+            // Trigger osr compilation
+            for (long i = 0; i < limit; ++i) {
+                result += staticMethod();
+            }
+            return result;
+        }
+
+        private int osrMethod(long limit) throws Exception {
+            int result = 0;
+            if (limit != 1) {
+                result = warmup(OSR_METHOD);
+            }
+            // Trigger osr compilation
+            for (long i = 0; i < limit; ++i) {
+                result += method();
+            }
+            return result;
+        }
+
+        private final int x;
+
+        // for method and OSR method test case
+        public Helper() {
+            x = 0;
+        }
+
+        // for OSR constructor test case
+        private Helper(Object o, long limit) throws Exception {
+            int result = 0;
+            if (limit != 1) {
+                result = warmup(OSR_CONSTRUCTOR);
+            }
+            // Trigger osr compilation
+            for (long i = 0; i < limit; ++i) {
+                result += method();
+            }
+            x = result;
+        }
+
+        // for constructor test case
+        private Helper(int x) {
+            this.x = x;
+        }
+
+        @Override
+        public int hashCode() {
+            return x;
+        }
+    }
+}

--- a/hotspot/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java	Tue Nov 24 10:30:23 2015 +0100
@@ -59,6 +59,13 @@
         allOptionsAsMap.remove("ThreadStackSize");
 
         /*
+         * Remove the flag controlling the size of the stack because the
+         * flag has direct influence on the physical memory usage of
+         * the VM.
+         */
+        allOptionsAsMap.remove("CompilerThreadStackSize");
+
+        /*
          * Exclude MallocMaxTestWords as it is expected to exit VM at small values (>=0)
          */
         allOptionsAsMap.remove("MallocMaxTestWords");

--- a/hotspot/test/serviceability/dcmd/compiler/control2.txt	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/serviceability/dcmd/compiler/control2.txt	Tue Nov 24 10:30:23 2015 +0100
@@ -11,7 +11,7 @@
     },
     inline : [ "+javax/util.*", "-comx/sun.*"],
     PrintAssembly: false,
-    IGVPrintLevel: 2
+    MaxNodeLimit: 80001
   },
   {
     match: ["baz.*","frob.*"],

--- a/hotspot/test/testlibrary/jdk/test/lib/Utils.java	Tue Nov 17 16:40:52 2015 -0500
+++ b/hotspot/test/testlibrary/jdk/test/lib/Utils.java	Tue Nov 24 10:30:23 2015 +0100
@@ -44,6 +44,7 @@
 import java.util.Map;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Objects;
 import java.util.Random;
 import java.util.function.BooleanSupplier;
 import java.util.concurrent.TimeUnit;
@@ -82,6 +83,16 @@
      */
     public static final String TEST_SRC = System.getProperty("test.src", ".").trim();
 
+    /*
+     * Returns the value of 'test.jdk' system property
+     */
+    public static final String TEST_JDK = System.getProperty("test.jdk");
+
+    /**
+     * Returns the value of 'test.classes' system property
+     */
+    public static final String TEST_CLASSES = System.getProperty("test.classes", ".");
+
     private static Unsafe unsafe = null;
 
     /**
@@ -616,5 +627,18 @@
         NULL_VALUES.put(float.class, 0.0f);
         NULL_VALUES.put(double.class, 0.0d);
     }
+
+    /**
+     * Returns mandatory property value
+     * @param propName is a name of property to request
+     * @return a String with requested property value
+     */
+    public static String getMandatoryProperty(String propName) {
+        Objects.requireNonNull(propName, "Requested null property");
+        String prop = System.getProperty(propName);
+        Objects.requireNonNull(prop,
+                String.format("A mandatory property '%s' isn't set", propName));
+        return prop;
+    }
 }

author	neliasso
	Tue, 24 Nov 2015 10:30:23 +0100
changeset 34211	d25c2fc1e248
parent 34148	6efbc7ffd767 (current diff)
parent 34210	2dafc56da253 (diff)
child 34212	36bf267af0f8
child 34220	1ba69cb5585c

hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java		file \| annotate \| diff \| comparison \| revisions
hotspot/src/share/vm/c1/c1_LIRGenerator.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/share/vm/opto/compile.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/share/vm/opto/graphKit.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/share/vm/runtime/arguments.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/share/vm/runtime/globals.hpp		file \| annotate \| diff \| comparison \| revisions
hotspot/src/share/vm/runtime/vmStructs.cpp		file \| annotate \| diff \| comparison \| revisions
hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestI.java		file \| annotate \| diff \| comparison \| revisions
hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestL.java		file \| annotate \| diff \| comparison \| revisions