Merge
authorneliasso
Fri, 13 Nov 2015 13:31:48 +0100
changeset 34185 ee71c590a456
parent 33813 4f376e851453 (current diff)
parent 34175 5737d346238e (diff)
child 34186 69c8391e72e1
Merge
hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
hotspot/src/cpu/ppc/vm/ppc.ad
hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp
hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
hotspot/src/cpu/x86/vm/vm_version_x86.hpp
hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java
hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
hotspot/src/share/vm/ci/ciMethod.cpp
hotspot/src/share/vm/code/nmethod.cpp
hotspot/src/share/vm/code/nmethod.hpp
hotspot/src/share/vm/compiler/compileBroker.cpp
hotspot/src/share/vm/gc/cms/parNewGeneration.cpp
hotspot/src/share/vm/gc/serial/defNewGeneration.cpp
hotspot/src/share/vm/interpreter/interpreter.cpp
hotspot/src/share/vm/jvmci/jvmciCompiler.cpp
hotspot/src/share/vm/jvmci/jvmciCompilerToVM.cpp
hotspot/src/share/vm/jvmci/jvmciCompilerToVM.hpp
hotspot/src/share/vm/jvmci/jvmciEnv.cpp
hotspot/src/share/vm/jvmci/jvmciEnv.hpp
hotspot/src/share/vm/jvmci/jvmciJavaClasses.hpp
hotspot/src/share/vm/jvmci/jvmciRuntime.cpp
hotspot/src/share/vm/oops/method.cpp
hotspot/src/share/vm/oops/method.hpp
hotspot/src/share/vm/opto/compile.cpp
hotspot/src/share/vm/opto/library_call.cpp
hotspot/src/share/vm/opto/memnode.hpp
hotspot/src/share/vm/opto/parse1.cpp
hotspot/src/share/vm/opto/split_if.cpp
hotspot/src/share/vm/opto/superword.cpp
hotspot/src/share/vm/prims/jni.cpp
hotspot/src/share/vm/runtime/arguments.cpp
hotspot/src/share/vm/runtime/deoptimization.cpp
hotspot/src/share/vm/runtime/deoptimization.hpp
hotspot/src/share/vm/runtime/globals.cpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/vmStructs.cpp
hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java
hotspot/test/compiler/floatingpoint/TestPow2.java
hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java
hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestI.java
hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestL.java
hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestI.java
hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestL.java
hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestI.java
hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestL.java
hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestI.java
hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestL.java
hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestI.java
hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestL.java
hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestI.java
hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestL.java
hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java
hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java
hotspot/test/compiler/jvmci/compilerToVM/AllocateCompileIdTest.java
hotspot/test/compiler/jvmci/compilerToVM/ReprofileTest.java
hotspot/test/compiler/rangechecks/TestExplicitRangeChecks.java
hotspot/test/compiler/rangechecks/TestRangeCheckSmearing.java
hotspot/test/compiler/tiered/ConstantGettersTransitionsTest.java
hotspot/test/compiler/tiered/LevelTransitionTest.java
hotspot/test/compiler/tiered/NonTieredLevelsTest.java
hotspot/test/compiler/tiered/TieredLevelsTest.java
hotspot/test/compiler/whitebox/ClearMethodStateTest.java
hotspot/test/compiler/whitebox/DeoptimizeAllTest.java
hotspot/test/compiler/whitebox/DeoptimizeFramesTest.java
hotspot/test/compiler/whitebox/DeoptimizeMethodTest.java
hotspot/test/compiler/whitebox/DeoptimizeMultipleOSRTest.java
hotspot/test/compiler/whitebox/EnqueueMethodForCompilationTest.java
hotspot/test/compiler/whitebox/ForceNMethodSweepTest.java
hotspot/test/compiler/whitebox/GetNMethodTest.java
hotspot/test/compiler/whitebox/IsMethodCompilableTest.java
hotspot/test/compiler/whitebox/LockCompilationTest.java
hotspot/test/compiler/whitebox/MakeMethodNotCompilableTest.java
hotspot/test/compiler/whitebox/SetDontInlineMethodTest.java
hotspot/test/compiler/whitebox/SetForceInlineMethodTest.java
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Fri Nov 13 13:31:48 2015 +0100
@@ -1079,10 +1079,10 @@
   // and for a volatile write we need
   //
   //   stlr<x>
-  // 
+  //
   // Alternatively, we can implement them by pairing a normal
   // load/store with a memory barrier. For a volatile read we need
-  // 
+  //
   //   ldr<x>
   //   dmb ishld
   //
@@ -1240,7 +1240,7 @@
   // Alternatively, we can elide generation of the dmb instructions
   // and plant the alternative CompareAndSwap macro-instruction
   // sequence (which uses ldaxr<x>).
-  // 
+  //
   // Of course, the above only applies when we see these signature
   // configurations. We still want to plant dmb instructions in any
   // other cases where we may see a MemBarAcquire, MemBarRelease or
@@ -1367,7 +1367,7 @@
     opcode = parent->Opcode();
     return opcode == Op_MemBarRelease;
   }
- 
+
   // 2) card mark detection helper
 
   // helper predicate which can be used to detect a volatile membar
@@ -1383,7 +1383,7 @@
   // true
   //
   // iii) the node's Mem projection feeds a StoreCM node.
-  
+
   bool is_card_mark_membar(const MemBarNode *barrier)
   {
     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
@@ -1402,7 +1402,7 @@
 	return true;
       }
     }
-  
+
     return false;
   }
 
@@ -1430,7 +1430,7 @@
   // where
   //  || and \\ represent Ctl and Mem feeds via Proj nodes
   //  | \ and / indicate further routing of the Ctl and Mem feeds
-  // 
+  //
   // this is the graph we see for non-object stores. however, for a
   // volatile Object store (StoreN/P) we may see other nodes below the
   // leading membar because of the need for a GC pre- or post-write
@@ -1592,7 +1592,7 @@
   // ordering but neither will a releasing store (stlr). The latter
   // guarantees that the object put is visible but does not guarantee
   // that writes by other threads have also been observed.
-  // 
+  //
   // So, returning to the task of translating the object put and the
   // leading/trailing membar nodes: what do the non-normal node graph
   // look like for these 2 special cases? and how can we determine the
@@ -1731,7 +1731,7 @@
   //       |         |         |            |
   //     C |       M |       M |          M |
   //        \        |         |           /
-  //                  . . . 
+  //                  . . .
   //          (post write subtree elided)
   //                    . . .
   //             C \         M /
@@ -1812,12 +1812,12 @@
   //   |    |                 |  /        /
   //   |  Region  . . .     Phi[M]  _____/
   //   |    /                 |    /
-  //   |                      |   /   
+  //   |                      |   /
   //   | . . .   . . .        |  /
   //   | /                    | /
   // Region           |  |  Phi[M]
   //   |              |  |  / Bot
-  //    \            MergeMem 
+  //    \            MergeMem
   //     \            /
   //     MemBarVolatile
   //
@@ -1858,7 +1858,7 @@
   // to a trailing barrier via a MergeMem. That feed is either direct
   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
   // memory flow (for G1).
-  // 
+  //
   // The predicates controlling generation of instructions for store
   // and barrier nodes employ a few simple helper functions (described
   // below) which identify the presence or absence of all these
@@ -2112,8 +2112,8 @@
       x = x->in(MemNode::Memory);
     } else {
       // the merge should get its Bottom mem feed from the leading membar
-      x = mm->in(Compile::AliasIdxBot);      
-    } 
+      x = mm->in(Compile::AliasIdxBot);
+    }
 
     // ensure this is a non control projection
     if (!x->is_Proj() || x->is_CFG()) {
@@ -2190,12 +2190,12 @@
   //     . . .
   //       |
   //   MemBarVolatile (card mark)
-  //      |          |     
+  //      |          |
   //      |        StoreCM
   //      |          |
   //      |        . . .
-  //  Bot |  / 
-  //   MergeMem 
+  //  Bot |  /
+  //   MergeMem
   //      |
   //      |
   //    MemBarVolatile {trailing}
@@ -2203,10 +2203,10 @@
   // 2)
   //   MemBarRelease/CPUOrder (leading)
   //    |
-  //    | 
+  //    |
   //    |\       . . .
-  //    | \        | 
-  //    |  \  MemBarVolatile (card mark) 
+  //    | \        |
+  //    |  \  MemBarVolatile (card mark)
   //    |   \   |     |
   //     \   \  |   StoreCM    . . .
   //      \   \ |
@@ -2231,7 +2231,7 @@
   //    |  \   \  |   StoreCM    . . .
   //    |   \   \ |
   //     \   \  Phi
-  //      \   \ /  
+  //      \   \ /
   //       \  Phi
   //        \ /
   //        Phi  . . .
@@ -2506,7 +2506,7 @@
 
     return (x->is_Load() && x->as_Load()->is_acquire());
   }
-  
+
   // now check for an unsafe volatile get
 
   // need to check for
@@ -2644,7 +2644,7 @@
   }
 
   membar = child_membar(membar);
-  
+
   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
     return false;
   }
@@ -2703,7 +2703,7 @@
 
   // first we check if this is part of a card mark. if so then we have
   // to generate a StoreLoad barrier
-  
+
   if (is_card_mark_membar(mbvol)) {
       return false;
   }
@@ -2769,7 +2769,7 @@
   if (!is_card_mark_membar(mbvol)) {
     return true;
   }
-  
+
   // we found a card mark -- just make sure we have a trailing barrier
 
   return (card_mark_to_trailing(mbvol) != NULL);
@@ -2808,7 +2808,7 @@
 
   assert(barrier->Opcode() == Op_MemBarCPUOrder,
 	 "CAS not fed by cpuorder membar!");
-      
+
   MemBarNode *b = parent_membar(barrier);
   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
 	  "CAS not fed by cpuorder+release membar pair!");
@@ -3463,6 +3463,17 @@
   return true;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   return default_pressure_threshold;
 }
@@ -4663,7 +4674,7 @@
       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
     }
     if (call == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full"); 
+      ciEnv::current()->record_failure("CodeCache is full");
       return;
     }
 
@@ -4671,7 +4682,7 @@
       // Emit stub for static call
       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
       if (stub == NULL) {
-        ciEnv::current()->record_failure("CodeCache is full"); 
+        ciEnv::current()->record_failure("CodeCache is full");
         return;
       }
     }
@@ -4681,7 +4692,7 @@
     MacroAssembler _masm(&cbuf);
     address call = __ ic_call((address)$meth$$method);
     if (call == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full"); 
+      ciEnv::current()->record_failure("CodeCache is full");
       return;
     }
   %}
@@ -4706,7 +4717,7 @@
     if (cb) {
       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
       if (call == NULL) {
-        ciEnv::current()->record_failure("CodeCache is full"); 
+        ciEnv::current()->record_failure("CodeCache is full");
         return;
       }
     } else {
--- a/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -73,6 +73,7 @@
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
 
 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
--- a/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -29,16 +29,16 @@
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_aarch64.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   Unimplemented();
   return 0;
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
@@ -46,20 +46,20 @@
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   Unimplemented();
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   return NULL;
 }
 
--- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -2384,6 +2384,7 @@
   }
 #endif // ASSERT
   __ mov(c_rarg0, rthread);
+  __ mov(c_rarg1, rcpool);
   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
   __ blrt(rscratch1, 1, 0, 1);
   __ bind(retaddr);
@@ -2397,6 +2398,7 @@
   // Load UnrollBlock* into rdi
   __ mov(r5, r0);
 
+  __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
    Label noException;
   __ cmpw(rcpool, Deoptimization::Unpack_exception);   // Was exception pending?
   __ br(Assembler::NE, noException);
@@ -2609,6 +2611,7 @@
   // n.b. 2 gp args, 0 fp args, integral return type
 
   __ mov(c_rarg0, rthread);
+  __ movw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
   __ lea(rscratch1,
          RuntimeAddress(CAST_FROM_FN_PTR(address,
                                          Deoptimization::uncommon_trap)));
@@ -2628,6 +2631,16 @@
   // move UnrollBlock* into r4
   __ mov(r4, r0);
 
+#ifdef ASSERT
+  { Label L;
+    __ ldrw(rscratch1, Address(r4, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+    __ cmpw(rscratch1, (unsigned)Deoptimization::Unpack_uncommon_trap);
+    __ br(Assembler::EQ, L);
+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
+    __ bind(L);
+  }
+#endif
+
   // Pop all the frames we must move/replace.
   //
   // Frame picture (youngest to oldest)
--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -61,6 +61,7 @@
 define_pd_global(bool, UseCISCSpill,                 false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
 // GL:
 // Detected a problem with unscaled compressed oops and
 // narrow_oop_use_complex_address() == false.
--- a/hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -2697,7 +2697,7 @@
   // Provide a debugger breakpoint in the frame manager if breakpoints
   // in osr'd methods are requested.
 #ifdef COMPILER2
-  NOT_PRODUCT( if (OptoBreakpointOSR) { __ illtrap(); } )
+  if (OptoBreakpointOSR) { __ illtrap(); }
 #endif
 
   // Load callee's pointer to locals array from callee's state.
--- a/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -29,16 +29,16 @@
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_ppc.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   Unimplemented();
   return 0;
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
@@ -46,20 +46,20 @@
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   Unimplemented();
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   return NULL;
 }
 
--- a/hotspot/src/cpu/ppc/vm/ppc.ad	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad	Fri Nov 13 13:31:48 2015 +0100
@@ -2064,6 +2064,17 @@
   return true;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   return default_pressure_threshold;
 }
@@ -3416,7 +3427,7 @@
       // The stub for call to interpreter.
       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
       if (stub == NULL) {
-        ciEnv::current()->record_failure("CodeCache is full"); 
+        ciEnv::current()->record_failure("CodeCache is full");
         return;
       }
     }
@@ -3465,7 +3476,7 @@
     // The stub for call to interpreter.
     address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
     if (stub == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full"); 
+      ciEnv::current()->record_failure("CodeCache is full");
       return;
     }
 
@@ -6911,7 +6922,7 @@
     n_compare->_opnds[0] = op_crx;
     n_compare->_opnds[1] = op_src;
     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
-    
+
     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
     n2->add_req(n_region, n_src, n1);
     n2->_opnds[0] = op_dst;
@@ -10588,7 +10599,7 @@
 
 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
   // Needs matchrule, see cmpDUnordered.
-  match(Set crx (CmpF src1 src2)); 
+  match(Set crx (CmpF src1 src2));
   // no match-rule, false predicate
   predicate(false);
 
@@ -10697,13 +10708,13 @@
 %}
 
 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
-  // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the 
-  // node right before the conditional move using it. 
+  // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
+  // node right before the conditional move using it.
   // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
   // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
   // crashed in register allocation where the flags Reg between cmpDUnoredered and a
   // conditional move was supposed to be spilled.
-  match(Set crx (CmpD src1 src2)); 
+  match(Set crx (CmpD src1 src2));
   // False predicate, shall not be matched.
   predicate(false);
 
--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -2802,7 +2802,7 @@
   __ set_last_Java_frame(R1_SP, noreg);
 
   // With EscapeAnalysis turned on, this call may safepoint!
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg);
   address calls_return_pc = __ last_calls_return_pc();
   // Set an oopmap for the call site that describes all our saved registers.
   oop_maps->add_gc_map(calls_return_pc - start, map);
@@ -2815,6 +2815,8 @@
   // by save_volatile_registers(...).
   RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes);
 
+  // reload the exec mode from the UnrollBlock (it might have changed)
+  __ lwz(exec_mode_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg);
   // In excp_deopt_mode, restore and clear exception oop which we
   // stored in the thread during exception entry above. The exception
   // oop will be the return value of this stub.
@@ -2945,8 +2947,9 @@
   __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
 
   __ mr(klass_index_reg, R3);
+  __ li(R5, Deoptimization::Unpack_exception);
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
-                  R16_thread, klass_index_reg);
+                  R16_thread, klass_index_reg, R5);
 
   // Set an oopmap for the call site.
   oop_maps->add_gc_map(gc_map_pc - start, map);
@@ -2966,6 +2969,12 @@
 
   // stack: (caller_of_deoptee, ...).
 
+#ifdef ASSERT
+  __ lwz(R22_tmp2, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg);
+  __ cmpdi(CCR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap);
+  __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
+#endif
+
   // Allocate new interpreter frame(s) and possibly a c2i adapter
   // frame.
   push_skeleton_frames(masm, false/*deopt*/,
--- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -65,6 +65,7 @@
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoScheduling,               true);
 define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
 
 #ifdef _LP64
 // We need to make sure that all generated code is within
--- a/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -29,7 +29,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_sparc.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   if (inst->is_call() || inst->is_jump()) {
     return pc_offset + NativeCall::instruction_size;
   } else if (inst->is_call_reg()) {
@@ -37,12 +37,12 @@
   } else if (inst->is_sethi()) {
     return pc_offset + NativeFarCall::instruction_size;
   } else {
-    fatal("unsupported type of instruction for call site");
+    JVMCI_ERROR_0("unsupported type of instruction for call site");
     return 0;
   }
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   Handle obj = HotSpotObjectConstantImpl::object(constant);
   jobject value = JNIHandles::make_local(obj());
@@ -52,7 +52,7 @@
     RelocationHolder rspec = oop_Relocation::spec(oop_index);
     _instructions->relocate(pc, rspec, 1);
 #else
-    fatal("compressed oop on 32bit");
+    JVMCI_ERROR("compressed oop on 32bit");
 #endif
   } else {
     NativeMovConstReg* move = nativeMovConstReg_at(pc);
@@ -66,20 +66,20 @@
   }
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
 #ifdef _LP64
     NativeMovConstReg32* move = nativeMovConstReg32_at(pc);
-    narrowKlass narrowOop = record_narrow_metadata_reference(constant);
+    narrowKlass narrowOop = record_narrow_metadata_reference(constant, CHECK);
     move->set_data((intptr_t)narrowOop);
     TRACE_jvmci_3("relocating (narrow metaspace constant) at %p/%p", pc, narrowOop);
 #else
-    fatal("compressed Klass* on 32bit");
+    JVMCI_ERROR("compressed Klass* on 32bit");
 #endif
   } else {
     NativeMovConstReg* move = nativeMovConstReg_at(pc);
-    Metadata* reference = record_metadata_reference(constant);
+    Metadata* reference = record_metadata_reference(constant, CHECK);
     move->set_data((intptr_t)reference);
     TRACE_jvmci_3("relocating (metaspace constant) at %p/%p", pc, reference);
   }
@@ -106,7 +106,7 @@
   }
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   address pc = (address) inst;
   if (inst->is_call()) {
     NativeCall* call = nativeCall_at(pc);
@@ -117,17 +117,17 @@
     jump->set_jump_destination((address) foreign_call_destination);
     _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
   } else {
-    fatal(err_msg("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)));
+    JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc));
   }
   TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
 #ifdef ASSERT
   Method* method = NULL;
   // we need to check, this might also be an unresolved method
   if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
-    method = getMethodFromHotSpotMethod(hotspot_method);
+    method = getMethodFromHotSpotMethod(hotspot_method());
   }
 #endif
   switch (_next_call_type) {
@@ -156,33 +156,33 @@
       break;
     }
     default:
-      fatal("invalid _next_call_type value");
+      JVMCI_ERROR("invalid _next_call_type value");
       break;
   }
 }
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   switch (mark) {
     case POLL_NEAR:
-      fatal("unimplemented");
+      JVMCI_ERROR("unimplemented");
       break;
     case POLL_FAR:
       _instructions->relocate(pc, relocInfo::poll_type);
       break;
     case POLL_RETURN_NEAR:
-      fatal("unimplemented");
+      JVMCI_ERROR("unimplemented");
       break;
     case POLL_RETURN_FAR:
       _instructions->relocate(pc, relocInfo::poll_return_type);
       break;
     default:
-      fatal("invalid mark value");
+      JVMCI_ERROR("invalid mark value");
       break;
   }
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   // JVMCI Registers are numbered as follows:
   //   0..31: Thirty-two General Purpose registers (CPU Registers)
   //   32..63: Thirty-two single precision float registers
@@ -199,7 +199,7 @@
     } else if(jvmci_reg < 112) {
       floatRegisterNumber = 4 * (jvmci_reg - 96);
     } else {
-      fatal("Unknown jvmci register");
+      JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
     }
     return as_FloatRegister(floatRegisterNumber)->as_VMReg();
   }
--- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -3036,6 +3036,7 @@
 
     __ mov((int32_t)Deoptimization::Unpack_reexecute, L0deopt_mode);
     __ mov(G2_thread, O0);
+    __ mov(L0deopt_mode, O2);
     __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
     __ delayed()->nop();
     oop_maps->add_gc_map( __ offset()-start, map->deep_copy());
@@ -3121,6 +3122,7 @@
   // do the call by hand so we can get the oopmap
 
   __ mov(G2_thread, L7_thread_cache);
+  __ mov(L0deopt_mode, O1);
   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
   __ delayed()->mov(G2_thread, O0);
 
@@ -3146,6 +3148,7 @@
 
   RegisterSaver::restore_result_registers(masm);
 
+  __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), G4deopt_mode);
   Label noException;
   __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException);
 
@@ -3269,7 +3272,8 @@
   __ save_frame(0);
   __ set_last_Java_frame(SP, noreg);
   __ mov(I0, O2klass_index);
-  __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index);
+  __ mov(Deoptimization::Unpack_uncommon_trap, O3); // exec mode
+  __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index, O3);
   __ reset_last_Java_frame();
   __ mov(O0, O2UnrollBlock->after_save());
   __ restore();
@@ -3278,6 +3282,15 @@
   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
   __ restore();
 
+#ifdef ASSERT
+  { Label L;
+    __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), O1);
+    __ cmp_and_br_short(O1, Deoptimization::Unpack_uncommon_trap, Assembler::equal, Assembler::pt, L);
+    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
+    __ bind(L);
+  }
+#endif
+
   // Allocate new interpreter frame(s) and possible c2i adapter frame
 
   make_new_frames(masm, false);
--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Fri Nov 13 13:31:48 2015 +0100
@@ -1860,6 +1860,17 @@
   return true;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   return default_pressure_threshold;
 }
@@ -1905,7 +1916,7 @@
 }
 
 // Current (2013) SPARC platforms need to read original key
-// to construct decryption expanded key 
+// to construct decryption expanded key
 const bool Matcher::pass_original_key_for_aes() {
   return true;
 }
@@ -2612,7 +2623,7 @@
       if (stub == NULL && !(TraceJumps && Compile::current()->in_scratch_emit_size())) {
         ciEnv::current()->record_failure("CodeCache is full");
         return;
-      } 
+      }
     }
   %}
 
@@ -3132,10 +3143,10 @@
 // AVOID_NONE   - instruction can be placed anywhere
 // AVOID_BEFORE - instruction cannot be placed after an
 //                instruction with MachNode::AVOID_AFTER
-// AVOID_AFTER  - the next instruction cannot be the one 
+// AVOID_AFTER  - the next instruction cannot be the one
 //                with MachNode::AVOID_BEFORE
-// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at 
-//                          the same time                                
+// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at
+//                          the same time
 ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE);
 
 ins_attrib ins_short_branch(0);    // Required flag: is this instruction a
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -358,7 +358,6 @@
     FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
   }
 
-#ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
     tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
     tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size());
@@ -391,7 +390,6 @@
       tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
     }
   }
-#endif // PRODUCT
 }
 
 void VM_Version::print_features() {
@@ -400,7 +398,7 @@
 
 int VM_Version::determine_features() {
   if (UseV8InstrsOnly) {
-    NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-V8");)
+    if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-V8"); }
     return generic_v8_m;
   }
 
@@ -416,12 +414,12 @@
     if (is_T_family(features)) {
       // Happy to accomodate...
     } else {
-      NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Niagara");)
+      if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-Niagara"); }
       features |= T_family_m;
     }
   } else {
     if (is_T_family(features) && !FLAG_IS_DEFAULT(UseNiagaraInstrs)) {
-      NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Not-Niagara");)
+      if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-Not-Niagara"); }
       features &= ~(T_family_m | T1_model_m);
     } else {
       // Happy to accomodate...
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -313,7 +313,7 @@
     switch (cur_tuple_type) {
     case EVEX_FV:
       if ((cur_encoding & VEX_W) == VEX_W) {
-        mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+        mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
       } else {
         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
       }
@@ -394,25 +394,27 @@
   int mod_idx = 0;
   // We will test if the displacement fits the compressed format and if so
   // apply the compression to the displacment iff the result is8bit.
-  if (VM_Version::supports_evex() && _is_evex_instruction) {
-    switch (_tuple_type) {
+  if (VM_Version::supports_evex() && (_attributes != NULL) && _attributes->is_evex_instruction()) {
+    int evex_encoding = _attributes->get_evex_encoding();
+    int tuple_type = _attributes->get_tuple_type();
+    switch (tuple_type) {
     case EVEX_FV:
-      if ((_evex_encoding & VEX_W) == VEX_W) {
-        mod_idx += 2 + ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      if ((evex_encoding & VEX_W) == VEX_W) {
+        mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
       } else {
-        mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+        mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
       }
       break;
 
     case EVEX_HV:
-      mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
       break;
 
     case EVEX_FVM:
       break;
 
     case EVEX_T1S:
-      switch (_input_size_in_bits) {
+      switch (_attributes->get_input_size()) {
       case EVEX_8bit:
         break;
 
@@ -433,7 +435,7 @@
     case EVEX_T1F:
     case EVEX_T2:
     case EVEX_T4:
-      mod_idx = (_input_size_in_bits == EVEX_64bit) ? 1 : 0;
+      mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
       break;
 
     case EVEX_T8:
@@ -459,8 +461,9 @@
       break;
     }
 
-    if (_avx_vector_len >= AVX_128bit && _avx_vector_len <= AVX_512bit) {
-      int disp_factor = tuple_table[_tuple_type + mod_idx][_avx_vector_len];
+    int vector_len = _attributes->get_vector_len();
+    if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
+      int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
       if ((disp % disp_factor) == 0) {
         int new_disp = disp / disp_factor;
         if (is8bit(new_disp)) {
@@ -591,7 +594,6 @@
       emit_data(disp, rspec, disp32_operand);
     }
   }
-  _is_evex_instruction = false;
 }
 
 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
@@ -770,7 +772,7 @@
     case 0x55: // andnps
     case 0x56: // orps
     case 0x57: // xorps
-    case 0x59: //mulpd
+    case 0x59: // mulpd
     case 0x6E: // movd
     case 0x7E: // movd
     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
@@ -1234,51 +1236,53 @@
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::addsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::addss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::addss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::aesdec(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDE);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDE);
   emit_int8(0xC0 | encode);
 }
@@ -1286,16 +1290,16 @@
 void Assembler::aesdeclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDF);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDF);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1303,16 +1307,16 @@
 void Assembler::aesenc(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDC);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDC);
   emit_int8(0xC0 | encode);
 }
@@ -1320,16 +1324,16 @@
 void Assembler::aesenclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDD);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDD);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1361,15 +1365,17 @@
 
 void Assembler::andnl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andnl(Register dst, Register src1, Address src2) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(dst, src1, src2);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2);
 }
@@ -1396,45 +1402,51 @@
 
 void Assembler::blsil(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsil(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(rbx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src);
 }
 
 void Assembler::blsmskl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsmskl(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(rdx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rdx, src);
 }
 
 void Assembler::blsrl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsrl(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(rcx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src);
 }
@@ -1581,36 +1593,38 @@
   // NOTE: dbx seems to decode this as comiss even though the
   // 0x66 is there. Strangly ucomisd comes out correct
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_operand(dst, src);
 }
 
 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::comiss(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_operand(dst, src);
 }
 
 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cpuid() {
@@ -1699,100 +1713,113 @@
 
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE6);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5B);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1F;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VM_Version::supports_evex());
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-    emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2A);
+  emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2A);
+  emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_operand(dst, src);
 }
 
 
 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1807,36 +1834,38 @@
 
 void Assembler::divsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::divss(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::divss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::emms() {
@@ -2082,36 +2111,26 @@
 
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512novl()) {
-    int vector_len = AVX_512bit;
-    int dst_enc = dst->encoding();
-    int src_enc = src->encoding();
-    int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F,
-                                       /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
-    emit_int8(0x28);
-    emit_int8((unsigned char)(0xC0 | encode));
-  } else if (VM_Version::supports_evex()) {
-    emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
-  }
+  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x28);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_avx512novl()) {
-    int vector_len = AVX_512bit;
-    int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, vector_len);
-    emit_int8(0x28);
-    emit_int8((unsigned char)(0xC0 | encode));
-  } else {
-    emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
-  }
+  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x28);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -2125,39 +2144,36 @@
 }
 
 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false, VEX_OPCODE_0F,
-                                      /* rex_w */ VM_Version::supports_evex(), AVX_128bit, /* legacy_mode */ false);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x12);
   emit_int8(0xC0 | encode);
-
 }
 
 void Assembler::kmovql(KRegister dst, KRegister src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
-                                      /* no_mask_reg */ true, VEX_OPCODE_0F, /* rex_w */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::kmovql(KRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int dst_enc = dst->encoding();
-  int nds_enc = 0;
-  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE,
-             VEX_OPCODE_0F, /* vex_w */  true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_operand((Register)dst, src);
 }
 
 void Assembler::kmovql(Address dst, KRegister src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int src_enc = src->encoding();
-  int nds_enc = 0;
-  vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE,
-             VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_operand((Register)src, dst);
 }
@@ -2165,8 +2181,8 @@
 void Assembler::kmovql(KRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
   VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* legacy_mode */ !_legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_bw, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x92);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -2174,14 +2190,16 @@
 void Assembler::kmovdl(KRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
   VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x92);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::kmovwl(KRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x92);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -2205,190 +2223,174 @@
 
 void Assembler::movdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6E);
   emit_operand(dst, src);
 }
 
 void Assembler::movdl(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_operand(src, dst);
 }
 
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
+  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdqa(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
 }
 
 void Assembler::movdqu(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
 }
 
 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdqu(Address dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
 // Move Unaligned 256bit Vector
 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmovdqu(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  int vector_len = AVX_256bit;
-  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::vmovdqu(Address dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  int vector_len = AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   // swap src<->dst for encoding
   assert(src != xnoreg, "sanity");
-  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(src != xnoreg, "sanity");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  // swap src<->dst for encoding
-  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 2, "");
-  InstructionMark im(this);
-  _tuple_type = EVEX_FVM;
-  vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 2, "");
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(src != xnoreg, "sanity");
-  _tuple_type = EVEX_FVM;
-  // swap src<->dst for encoding
-  vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
@@ -2434,13 +2436,12 @@
 // The selection is done in MacroAssembler::movdbl() and movflt().
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-    emit_simd_arith_q(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x12);
+  emit_operand(dst, src);
 }
 
 void Assembler::movq( MMXRegister dst, Address src ) {
@@ -2466,13 +2467,9 @@
 void Assembler::movq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, /* no_mask_reg */ true);
-  } else {
-    simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_operand(dst, src);
 }
@@ -2480,14 +2477,9 @@
 void Assembler::movq(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    simd_prefix(src, xnoreg, dst, VEX_SIMD_66, /* no_mask_reg */ true,
-                VEX_OPCODE_0F, /* rex_w */ true);
-  } else {
-    simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xD6);
   emit_operand(src, dst);
 }
@@ -2510,60 +2502,56 @@
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_operand(dst, src);
 }
 
 void Assembler::movsd(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
-  } else {
-    simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, /* no_mask_reg */ false);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x11);
   emit_operand(src, dst);
 }
 
 void Assembler::movss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_operand(dst, src);
 }
 
 void Assembler::movss(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x11);
   emit_operand(src, dst);
 }
@@ -2655,36 +2643,38 @@
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::mulss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::negl(Register dst) {
@@ -2985,28 +2975,35 @@
 void Assembler::packuswb(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x67);
+  emit_operand(dst, src);
 }
 
 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x67);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "some form of AVX must be enabled");
-  emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x67);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx2(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ true, vector_len);
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x00);
   emit_int8(0xC0 | encode);
   emit_int8(imm8);
@@ -3020,8 +3017,8 @@
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
   InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_3A,
-              /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x61);
   emit_operand(dst, src);
   emit_int8(imm8);
@@ -3029,8 +3026,8 @@
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x61);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3038,37 +3035,42 @@
 
 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x75, dst, src, VEX_SIMD_66,
-                  false, (VM_Version::supports_avx512dq() == false));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x75);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  assert(UseAVX > 0, "some form of AVX must be enabled");
-  emit_vex_arith(0x75, dst, nds, src, VEX_SIMD_66, vector_len,
-                 false, (VM_Version::supports_avx512dq() == false));
+  assert(VM_Version::supports_avx(), "");
+  assert(!VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x75);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pmovmskb(Register dst, XMMRegister src) {
   assert(VM_Version::supports_sse2(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F,
-                                      false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xD7);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66,
-                                     vector_len, VEX_OPCODE_0F, true, false);
+  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xD7);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3076,8 +3078,8 @@
 
 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */  true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3085,8 +3087,8 @@
 
 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC5);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3094,8 +3096,8 @@
 
 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x22);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3103,8 +3105,8 @@
 
 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x22);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3112,8 +3114,8 @@
 
 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC4);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3121,18 +3123,18 @@
 
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_HVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_operand(dst, src);
 }
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3140,10 +3142,10 @@
 void Assembler::vpmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx(), "");
   InstructionMark im(this);
-  bool vector256 = true;
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_operand(dst, src);
 }
@@ -3246,43 +3248,41 @@
 
 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_ssse3(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x00);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pshufb(XMMRegister dst, Address src) {
   assert(VM_Version::supports_ssse3(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x00);
   emit_operand(dst, src);
 }
 
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
-  _instruction_uses_vl = true;
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x70);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(mode & 0xFF);
 }
 
 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
-  _instruction_uses_vl = true;
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x70);
   emit_operand(dst, src);
   emit_int8(mode & 0xFF);
@@ -3291,7 +3291,10 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x70);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(mode & 0xFF);
 }
 
@@ -3299,12 +3302,10 @@
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false,
-              VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x70);
   emit_operand(dst, src);
   emit_int8(mode & 0xFF);
@@ -3313,9 +3314,9 @@
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
   // XMM3 is for /3 encoding: 66 0F 73 /3 ib
-  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift);
@@ -3324,9 +3325,9 @@
 void Assembler::pslldq(XMMRegister dst, int shift) {
   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
-  int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift);
@@ -3336,16 +3337,16 @@
   assert(VM_Version::supports_sse4_1(), "");
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3353,20 +3354,18 @@
 void Assembler::vptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx(), "");
   InstructionMark im(this);
-  int vector_len = AVX_256bit;
+  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* rex_w */ false,
-             vector_len, /* legacy_mode  */ true, /* no_mask_reg */ false);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3374,42 +3373,47 @@
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x60);
+  emit_operand(dst, src);
 }
 
 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x60);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::punpckldq(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x62);
+  emit_operand(dst, src);
 }
 
 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x62);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x6C, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::push(int32_t imm32) {
@@ -3454,16 +3458,18 @@
 
 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8(0x53);
-  emit_int8(0xC0 | encode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x53);
-  emit_int8(0xC0 | encode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::rdtsc() {
@@ -3622,27 +3628,28 @@
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_operand(dst, src);
 }
 
 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::std() {
@@ -3651,11 +3658,12 @@
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_operand(dst, src);
 }
 
 void Assembler::stmxcsr( Address dst) {
@@ -3705,38 +3713,38 @@
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::subsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-  }
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::subss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::subss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::testb(Register dst, int imm8) {
@@ -3765,7 +3773,7 @@
   emit_arith(0x85, 0xC0, dst, src);
 }
 
-void Assembler::testl(Register dst, Address  src) {
+void Assembler::testl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
   emit_int8((unsigned char)0x85);
@@ -3792,36 +3800,38 @@
 
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_operand(dst, src);
 }
 
 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::ucomiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_operand(dst, src);
 }
 
 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xabort(int8_t imm8) {
@@ -3903,138 +3913,162 @@
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 //====================VECTOR ARITHMETIC=====================================
@@ -4042,414 +4076,433 @@
 // Float-point vector arithmetic
 
 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::addps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::subps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::mulpd(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::divps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_operand(dst, src);
 }
 
 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::andpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false,  /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x15);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x14);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xorpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 void Assembler::xorps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 // Integer vector arithmetic
 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx() && (vector_len == 0) ||
          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x01);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -4457,280 +4510,324 @@
 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx() && (vector_len == 0) ||
          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x02);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFD, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD4);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x01);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x02);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD4);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFC);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFD);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFE);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD4);
+  emit_operand(dst, src);
 }
 
 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF8, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF9, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF9);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFA);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF9);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFA);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF8);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF9);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFA);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFB);
+  emit_operand(dst, src);
 }
 
 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD5);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66,
-                                      /* no_mask_reg */ false, VEX_OPCODE_0F_38);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD5);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 2, "requires some form of AVX");
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD5);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
-             VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_operand(dst, src);
 }
 
 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-  }
-  InstructionMark im(this);
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
-             VEX_OPCODE_0F_38, /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq);
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_operand(dst, src);
 }
@@ -4738,29 +4835,29 @@
 // Shift packed integers left by specified number of bits.
 void Assembler::psllw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F,
-                                      /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::pslld(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psllq(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ true);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -4768,102 +4865,111 @@
 
 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
-  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
-  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
+  int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
-  }
+  int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Shift packed integers logically right by specified number of bits.
 void Assembler::psrlw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrld(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrlq(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   // Do not confuse it with psrldq SSE2 instruction which
   // shifts 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F, /* rex_w */ VM_Version::supports_evex());
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -4871,89 +4977,98 @@
 
 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
-  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
-  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
+  int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
-  }
+  int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Shift packed integers arithmetically right by specified number of bits.
 void Assembler::psraw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrad(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
-  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -4961,128 +5076,157 @@
 
 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
+  int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 // logical operations packed integers
 void Assembler::pand(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDB);
+  emit_operand(dst, src);
 }
 
 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
-  }
-  else {
-    emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::por(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEB);
+  emit_operand(dst, src);
 }
 
 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEF);
+  emit_operand(dst, src);
 }
 
 
 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5090,45 +5234,38 @@
   emit_int8(0x01);
 }
 
-void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1A);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 256 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vinsertf64x4h(XMMRegister dst, Address src) {
+  emit_int8(value & 0x01);
+}
+
+void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) {
   assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ true, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1A);
   emit_operand(dst, src);
+  // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 128 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into q0 128 bits (0..127)
@@ -5139,15 +5276,14 @@
 }
 
 void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) {
-  assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
+  assert(VM_Version::supports_avx(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
   // 0x00 - insert into q0 128 bits (0..127)
@@ -5159,17 +5295,13 @@
 
 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
@@ -5178,11 +5310,9 @@
 
 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5192,16 +5322,12 @@
 
 void Assembler::vextractf128h(Address dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
@@ -5210,11 +5336,10 @@
 
 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5222,34 +5347,27 @@
   emit_int8(0x01);
 }
 
-void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_reg_mask */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 256 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vinserti128h(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
@@ -5258,11 +5376,9 @@
 
 void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5272,48 +5388,33 @@
 
 void Assembler::vextracti128h(Address dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
   emit_int8(0x01);
 }
 
-void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
+void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x3B);
   emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode;
-  if (VM_Version::supports_avx512dq()) {
-    encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                   /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
-  } else {
-    encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                   /* vex_w */ false, vector_len, /* legacy_mode */ true, /* no_mask_reg */ false);
-  }
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x01 - extract from bits 255:128
@@ -5322,42 +5423,36 @@
   emit_int8(value & 0x3);
 }
 
-void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
+void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1B);
   emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vextractf64x4h(Address dst, XMMRegister src) {
+  emit_int8(value & 0x1);
+}
+
+void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-             /* vex_w */ true, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1B);
   emit_operand(src, dst);
+  // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
-  assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_avx(), "");
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - extract from bits 127:0
@@ -5369,13 +5464,11 @@
 
 void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_operand(src, dst);
   // 0x00 - extract from bits 127:0
@@ -5387,11 +5480,8 @@
 
 void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ !_legacy_mode_dq, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x01 - extract from bits 255:128
@@ -5402,10 +5492,9 @@
 
 // duplicate 4-bytes integer data from src into 8 locations in dest
 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_avx2(), "");
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x58);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5413,189 +5502,170 @@
 // duplicate 2-bytes integer data from src into 16 locations in dest
 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  bool vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
-                                     vector_len, VEX_OPCODE_0F_38, false);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x79);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x78);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_8bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x78);
   emit_operand(dst, src);
 }
 
 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x79);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_16bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x79);
   emit_operand(dst, src);
 }
 
 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x58);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x58);
   emit_operand(dst, src);
 }
 
 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x59);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x59);
   emit_operand(dst, src);
 }
 
 // duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
 }
 
 // duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /*vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x19);
   emit_operand(dst, src);
 }
 
 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /*vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7B);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5603,8 +5673,8 @@
 // Carry-Less Multiplication Quadword
 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
   assert(VM_Version::supports_clmul(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x44);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)mask);
@@ -5613,8 +5683,9 @@
 // Carry-Less Multiplication Quadword
 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
-  int vector_len = AVX_128bit;
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x44);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)mask);
@@ -5622,11 +5693,9 @@
 
 void Assembler::vzeroupper() {
   assert(VM_Version::supports_avx(), "");
-  if (UseAVX < 3)
-  {
-    (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
-    emit_int8(0x77);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x77);
 }
 
 
@@ -6130,8 +6199,7 @@
   if (pre > 0) {
     emit_int8(simd_pre[pre]);
   }
-  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
-                          prefix_and_encode(dst_enc, src_enc);
+  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
   if (opc > 0) {
     emit_int8(0x0F);
     int opc2 = simd_opc[opc];
@@ -6143,7 +6211,9 @@
 }
 
 
-void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
+void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
+  int vector_len = _attributes->get_vector_len();
+  bool vex_w = _attributes->is_rex_vex_w();
   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
     prefix(VEX_3bytes);
 
@@ -6167,13 +6237,13 @@
 }
 
 // This is a 4 byte encoding
-void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
-                            int nds_enc, VexSimdPrefix pre, VexOpcode opc,
-                            bool is_extended_context, bool is_merge_context,
-                            int vector_len, bool no_mask_reg ){
+void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
   // EVEX 0x62 prefix
   prefix(EVEX_4bytes);
-  _evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
+  bool vex_w = _attributes->is_rex_vex_w();
+  int evex_encoding = (vex_w ? VEX_W : 0);
+  // EVEX.b is not currently used for broadcast of single element or data rounding modes
+  _attributes->set_evex_encoding(evex_encoding);
 
   // P0: byte 2, initialized to RXBR`00mm
   // instead of not'd
@@ -6195,214 +6265,127 @@
   emit_int8(byte3);
 
   // P2: byte 4 as zL'Lbv'aaa
-  int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
+  int byte4 = (_attributes->is_no_reg_mask()) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
   // EVEX.v` for extending EVEX.vvvv or VIDX
   byte4 |= (evex_v ? 0: EVEX_V);
   // third EXEC.b for broadcast actions
-  byte4 |= (is_extended_context ? EVEX_Rb : 0);
+  byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
-  byte4 |= ((vector_len) & 0x3) << 5;
+  byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
   // last is EVEX.z for zero/merge actions
-  byte4 |= (is_merge_context ? EVEX_Z : 0);
+  byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
   emit_int8(byte4);
 }
 
-void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
-                           VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
+void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
   bool vex_b = adr.base_needs_rex();
   bool vex_x = adr.index_needs_rex();
-  _avx_vector_len = vector_len;
+  set_attributes(attributes);
+  attributes->set_current_assembler(this);
 
   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
-  if (_legacy_mode_vl && _instruction_uses_vl) {
-    switch (vector_len) {
+  if ((UseAVX > 2) && _legacy_mode_vl && attributes->uses_vl()) {
+    switch (attributes->get_vector_len()) {
     case AVX_128bit:
     case AVX_256bit:
-      legacy_mode = true;
+      attributes->set_is_legacy_mode();
       break;
     }
   }
 
-  if ((UseAVX > 2) && (legacy_mode == false))
+  if ((UseAVX > 2) && !attributes->is_legacy_mode())
   {
     bool evex_r = (xreg_enc >= 16);
     bool evex_v = (nds_enc >= 16);
-    _is_evex_instruction = true;
-    evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
+    attributes->set_is_evex_instruction();
+    evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
   } else {
-    vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
+    vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
   }
-  _instruction_uses_vl = false;
-}
-
-int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
-                                     bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
+}
+
+int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
   bool vex_x = false;
-  _avx_vector_len = vector_len;
+  set_attributes(attributes);
+  attributes->set_current_assembler(this);
 
   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
-  if (_legacy_mode_vl && _instruction_uses_vl) {
-    switch (vector_len) {
+  if ((UseAVX > 2) && _legacy_mode_vl && attributes->uses_vl()) {
+    switch (attributes->get_vector_len()) {
     case AVX_128bit:
     case AVX_256bit:
-      legacy_mode = true;
+      if ((dst_enc >= 16) | (nds_enc >= 16) | (src_enc >= 16)) {
+        // up propagate arithmetic instructions to meet RA requirements
+        attributes->set_vector_len(AVX_512bit);
+      } else {
+        attributes->set_is_legacy_mode();
+      }
       break;
     }
   }
 
-  if ((UseAVX > 2) && (legacy_mode == false))
+  if ((UseAVX > 2) && !attributes->is_legacy_mode())
   {
     bool evex_r = (dst_enc >= 16);
     bool evex_v = (nds_enc >= 16);
     // can use vex_x as bank extender on rm encoding
     vex_x = (src_enc >= 16);
-    evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
+    attributes->set_is_evex_instruction();
+    evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
   } else {
-    vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
+    vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
   }
 
-  _instruction_uses_vl = false;
-
   // return modrm byte components for operands
   return (((dst_enc & 7) << 3) | (src_enc & 7));
 }
 
 
 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
-                            bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
+                            VexOpcode opc, InstructionAttr *attributes) {
   if (UseAVX > 0) {
     int xreg_enc = xreg->encoding();
-    int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
+    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
   } else {
     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
-    rex_prefix(adr, xreg, pre, opc, rex_w);
+    rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
   }
 }
 
 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
-                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
+                                      VexOpcode opc, InstructionAttr *attributes) {
   int dst_enc = dst->encoding();
   int src_enc = src->encoding();
   if (UseAVX > 0) {
     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
+    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
   } else {
     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
-    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
+    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
   }
 }
 
 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
-                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
+                                      VexOpcode opc, InstructionAttr *attributes) {
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
+  return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes);
 }
 
 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
-                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
+                                      VexOpcode opc, InstructionAttr *attributes) {
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
-}
-
-void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, pre, no_mask_reg);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
-  int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
-  int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-// Versions with no second source register (non-destructive source).
-void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
-  InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
-  InstructionMark im(this);
-  simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-// 3-operands AVX instructions
-void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src,
-                               VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
-                                 Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
-  InstructionMark im(this);
-  vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
-                               VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
-  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, legacy_mode, no_mask_reg);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
-                                 VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
+  return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes);
 }
 
 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   assert(!VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F, /* no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC2);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)(0xF & cop));
@@ -6411,7 +6394,9 @@
 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   assert(!VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src1, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0x4B);
   emit_int8((unsigned char)(0xC0 | encode));
   int src2_enc = src2->encoding();
@@ -6430,7 +6415,7 @@
   leal(dst, src);
 }
 
-void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
+void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst);
@@ -6948,15 +6933,17 @@
 
 void Assembler::andnq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andnq(Register dst, Register src1, Address src2) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(dst, src1, src2);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2);
 }
@@ -6983,45 +6970,51 @@
 
 void Assembler::blsiq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsiq(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(rbx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src);
 }
 
 void Assembler::blsmskq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsmskq(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(rdx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rdx, src);
 }
 
 void Assembler::blsrq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsrq(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(rcx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src);
 }
@@ -7095,45 +7088,44 @@
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7316,7 +7308,8 @@
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7324,8 +7317,9 @@
 void Assembler::movdq(Register dst, XMMRegister src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7458,8 +7452,8 @@
 
 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38,
-                                    /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF6);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7621,8 +7615,8 @@
 
 void Assembler::rorxq(Register dst, Register src, int imm8) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0xF0);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -438,6 +438,8 @@
 
 };
 
+class InstructionAttr;
+
 // 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
 // See fxsave and xsave(EVEX enabled) documentation for layout
 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);
@@ -568,7 +570,8 @@
     EVEX_8bit  = 0,
     EVEX_16bit = 1,
     EVEX_32bit = 2,
-    EVEX_64bit = 3
+    EVEX_64bit = 3,
+    EVEX_NObit = 4
   };
 
   enum WhichOperand {
@@ -598,16 +601,12 @@
 
 private:
 
-  int _evex_encoding;
-  int _input_size_in_bits;
-  int _avx_vector_len;
-  int _tuple_type;
-  bool _is_evex_instruction;
   bool _legacy_mode_bw;
   bool _legacy_mode_dq;
   bool _legacy_mode_vl;
   bool _legacy_mode_vlbw;
-  bool _instruction_uses_vl;
+
+  class InstructionAttr *_attributes;
 
   // 64bit prefixes
   int prefix_and_encode(int reg_enc, bool byteinst = false);
@@ -637,181 +636,30 @@
   int  rex_prefix_and_encode(int dst_enc, int src_enc,
                              VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 
-  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
-                  int nds_enc, VexSimdPrefix pre, VexOpcode opc,
-                  int vector_len);
+  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 
-  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
-                   int nds_enc, VexSimdPrefix pre, VexOpcode opc,
-                   bool is_extended_context, bool is_merge_context,
-                   int vector_len, bool no_mask_reg );
+  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
+                   int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 
   void vex_prefix(Address adr, int nds_enc, int xreg_enc,
                   VexSimdPrefix pre, VexOpcode opc,
-                  bool vex_w, int vector_len,
-                  bool legacy_mode = false, bool no_mask_reg = false);
-
-  void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
-                  VexSimdPrefix pre, int vector_len = AVX_128bit,
-                  bool no_mask_reg = false, bool legacy_mode = false) {
-    int dst_enc = dst->encoding();
-    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
-  }
-
-  void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
-                    VexSimdPrefix pre, int vector_len = AVX_128bit,
-                    bool no_mask_reg = false) {
-    int dst_enc = dst->encoding();
-    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
-  }
-
-  void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, no_mask_reg);
-  }
-
-  void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, true, no_mask_reg);
-  }
-
-  void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, no_mask_reg);
-  }
-
-  void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, true, no_mask_reg);
-  }
+                  InstructionAttr *attributes);
 
   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
                              VexSimdPrefix pre, VexOpcode opc,
-                             bool vex_w, int vector_len,
-                             bool legacy_mode, bool no_mask_reg);
-
-  int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-                                 false, no_mask_reg);
-  }
-
-  int  vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-      VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-      true, no_mask_reg);
-  }
-
-  int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-                                 false, no_mask_reg);
-  }
+                             InstructionAttr *attributes);
 
-  int  vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-                                 true, no_mask_reg);
-  }
-
-  int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
-                             VexSimdPrefix pre, int vector_len = AVX_128bit,
-                             VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
-                             bool no_mask_reg = false) {
-    int src_enc = src->encoding();
-    int dst_enc = dst->encoding();
-    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
-  }
-
-  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
-                   VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
-                   bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
-
-  void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
-                   bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
-    simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
-  }
+  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
+                   VexOpcode opc, InstructionAttr *attributes);
 
-  void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
-    simd_prefix(src, dst, pre, no_mask_reg);
-  }
-  void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
-                     VexSimdPrefix pre, bool no_mask_reg = false) {
-    bool rex_w = true;
-    simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
-  }
-
-  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
-                             VexSimdPrefix pre, bool no_mask_reg,
-                             VexOpcode opc = VEX_OPCODE_0F,
-                             bool rex_w = false, int vector_len = AVX_128bit,
-                             bool legacy_mode = false);
-
-  int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
-                             VexSimdPrefix pre, bool no_mask_reg,
-                             VexOpcode opc = VEX_OPCODE_0F,
-                             bool rex_w = false, int vector_len = AVX_128bit);
-
-  int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
-                             VexSimdPrefix pre, bool no_mask_reg,
-                             VexOpcode opc = VEX_OPCODE_0F,
-                             bool rex_w = false, int vector_len = AVX_128bit);
+  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
+                             VexOpcode opc, InstructionAttr *attributes);
 
-  // Move/convert 32-bit integer value.
-  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
-                             VexSimdPrefix pre, bool no_mask_reg) {
-    // It is OK to cast from Register to XMMRegister to pass argument here
-    // since only encoding is used in simd_prefix_and_encode() and number of
-    // Gen and Xmm registers are the same.
-    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
-  }
-  int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
-    return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
-  }
-  int simd_prefix_and_encode(Register dst, XMMRegister src,
-                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
-                             bool no_mask_reg = false) {
-    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
-  }
+  int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
+                             VexOpcode opc, InstructionAttr *attributes);
 
-  // Move/convert 64-bit integer value.
-  int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
-                               VexSimdPrefix pre, bool no_mask_reg = false) {
-    bool rex_w = true;
-    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
-  }
-  int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
-    return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
-  }
-  int simd_prefix_and_encode_q(Register dst, XMMRegister src,
-                               VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
-                               bool no_mask_reg = false) {
-    bool rex_w = true;
-    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
-  }
+  int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
+                             VexOpcode opc, InstructionAttr *attributes);
 
   // Helper functions for groups of instructions
   void emit_arith_b(int op1, int op2, Register dst, int imm8);
@@ -821,27 +669,6 @@
   void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
   void emit_arith(int op1, int op2, Register dst, Register src);
 
-  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
-                      Address src, VexSimdPrefix pre, int vector_len,
-                      bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
-                        Address src, VexSimdPrefix pre, int vector_len,
-                        bool no_mask_reg = false);
-  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
-                      XMMRegister src, VexSimdPrefix pre, int vector_len,
-                      bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
-                        XMMRegister src, VexSimdPrefix pre, int vector_len,
-                        bool no_mask_reg = false);
-
   bool emit_compressed_disp_byte(int &disp);
 
   void emit_operand(Register reg,
@@ -986,18 +813,16 @@
   // belong in macro assembler but there is no need for both varieties to exist
 
   void init_attributes(void) {
-    _evex_encoding = 0;
-    _input_size_in_bits = 0;
-    _avx_vector_len = AVX_NoVec;
-    _tuple_type = EVEX_ETUP;
-    _is_evex_instruction = false;
     _legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
     _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
     _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
     _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
-    _instruction_uses_vl = false;
+    _attributes = NULL;
   }
 
+  void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
+  void clear_attributes(void) { _attributes = NULL; }
+
   void lea(Register dst, Address src);
 
   void mov(Register dst, Register src);
@@ -2106,12 +1931,12 @@
   void vextracti128h(Address dst, XMMRegister src);
 
   // Copy low 256bit into high 256bit of ZMM registers.
-  void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vextracti64x4h(XMMRegister dst, XMMRegister src);
-  void vextractf64x4h(XMMRegister dst, XMMRegister src);
-  void vextractf64x4h(Address dst, XMMRegister src);
-  void vinsertf64x4h(XMMRegister dst, Address src);
+  void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
+  void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
+  void vextracti64x4h(XMMRegister dst, XMMRegister src, int value);
+  void vextractf64x4h(XMMRegister dst, XMMRegister src, int value);
+  void vextractf64x4h(Address dst, XMMRegister src, int value);
+  void vinsertf64x4h(XMMRegister dst, Address src, int value);
 
   // Copy targeted 128bit segments of the ZMM registers
   void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
@@ -2173,4 +1998,95 @@
 
 };
 
+// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
+// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
+// are applied.
+class InstructionAttr {
+public:
+  InstructionAttr(
+    int vector_len,
+    bool rex_vex_w,
+    bool legacy_mode,
+    bool no_reg_mask,
+    bool uses_vl)
+    :
+      _avx_vector_len(vector_len),
+      _rex_vex_w(rex_vex_w),
+      _legacy_mode(legacy_mode),
+      _no_reg_mask(no_reg_mask),
+      _uses_vl(uses_vl),
+      _tuple_type(Assembler::EVEX_ETUP),
+      _input_size_in_bits(Assembler::EVEX_NObit),
+      _is_evex_instruction(false),
+      _evex_encoding(0),
+      _is_clear_context(false),
+      _is_extended_context(false),
+      _current_assembler(NULL) {
+    if (UseAVX < 3) _legacy_mode = true;
+  }
+
+  ~InstructionAttr() {
+    if (_current_assembler != NULL) {
+      _current_assembler->clear_attributes();
+    }
+    _current_assembler = NULL;
+  }
+
+private:
+  int  _avx_vector_len;
+  bool _rex_vex_w;
+  bool _legacy_mode;
+  bool _no_reg_mask;
+  bool _uses_vl;
+  int  _tuple_type;
+  int  _input_size_in_bits;
+  bool _is_evex_instruction;
+  int  _evex_encoding;
+  bool _is_clear_context;
+  bool _is_extended_context;
+
+  Assembler *_current_assembler;
+
+public:
+  // query functions for field accessors
+  int  get_vector_len(void) const { return _avx_vector_len; }
+  bool is_rex_vex_w(void) const { return _rex_vex_w; }
+  bool is_legacy_mode(void) const { return _legacy_mode; }
+  bool is_no_reg_mask(void) const { return _no_reg_mask; }
+  bool uses_vl(void) const { return _uses_vl; }
+  int  get_tuple_type(void) const { return _tuple_type; }
+  int  get_input_size(void) const { return _input_size_in_bits; }
+  int  is_evex_instruction(void) const { return _is_evex_instruction; }
+  int  get_evex_encoding(void) const { return _evex_encoding; }
+  bool is_clear_context(void) const { return _is_clear_context; }
+  bool is_extended_context(void) const { return _is_extended_context; }
+
+  // Set the vector len manually
+  void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
+
+  // Set the instruction to be encoded in AVX mode
+  void set_is_legacy_mode(void) { _legacy_mode = true; }
+
+  // Set the current instuction to be encoded as an EVEX instuction
+  void set_is_evex_instruction(void) { _is_evex_instruction = true; }
+
+  // Internal encoding data used in compressed immediate offset programming
+  void set_evex_encoding(int value) { _evex_encoding = value; }
+
+  // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
+  void set_is_clear_context(void) { _is_clear_context = true; }
+
+  // Map back to current asembler so that we can manage object level assocation
+  void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
+
+  // Address modifiers used for compressed displacement calculation
+  void set_address_attributes(int tuple_type, int input_size_in_bits) {
+    if (VM_Version::supports_evex()) {
+      _tuple_type = tuple_type;
+      _input_size_in_bits = input_size_in_bits;
+    }
+  }
+
+};
+
 #endif // CPU_X86_VM_ASSEMBLER_X86_HPP
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -3711,7 +3711,7 @@
     if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
       __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
     }
-    if (UseAVX > 1) {
+    if (UseAVX > 0) {
       __ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(),
                    ExternalAddress((address)float_signflip_pool));
     } else {
@@ -3722,7 +3722,7 @@
     if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
       __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
     }
-    if (UseAVX > 1) {
+    if (UseAVX > 0) {
       __ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(),
                    ExternalAddress((address)double_signflip_pool));
     } else {
--- a/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -84,6 +84,7 @@
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoRegScheduling,            true);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
 
 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
--- a/hotspot/src/cpu/x86/vm/c2_init_x86.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/c2_init_x86.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -58,6 +58,4 @@
       OptoReg::invalidate(i);
     }
   }
-
-  SuperWordLoopUnrollAnalysis = true;
 }
--- a/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -36,7 +36,7 @@
 #include "code/vmreg.hpp"
 #include "vmreg_x86.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   if (inst->is_call() || inst->is_jump()) {
     assert(NativeCall::instruction_size == (int)NativeJump::instruction_size, "unexpected size");
     return (pc_offset + NativeCall::instruction_size);
@@ -53,18 +53,17 @@
     return (offset);
   } else if (inst->is_call_reg()) {
     // the inlined vtable stub contains a "call register" instruction
-    assert(method != NULL, "only valid for virtual calls");
+    assert(method.not_null(), "only valid for virtual calls");
     return (pc_offset + ((NativeCallReg *) inst)->next_instruction_offset());
   } else if (inst->is_cond_jump()) {
     address pc = (address) (inst);
     return pc_offset + (jint) (Assembler::locate_next_instruction(pc) - pc);
   } else {
-    fatal("unsupported type of instruction for call site");
-    return 0;
+    JVMCI_ERROR_0("unsupported type of instruction for call site");
   }
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   Handle obj = HotSpotObjectConstantImpl::object(constant);
   jobject value = JNIHandles::make_local(obj());
@@ -75,7 +74,7 @@
     _instructions->relocate(pc, oop_Relocation::spec(oop_index), Assembler::narrow_oop_operand);
     TRACE_jvmci_3("relocating (narrow oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
 #else
-    fatal("compressed oop on 32bit");
+    JVMCI_ERROR("compressed oop on 32bit");
 #endif
   } else {
     address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
@@ -85,19 +84,19 @@
   }
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
 #ifdef _LP64
     address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand);
-    *((narrowKlass*) operand) = record_narrow_metadata_reference(constant);
+    *((narrowKlass*) operand) = record_narrow_metadata_reference(constant, CHECK);
     TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
 #else
-    fatal("compressed Klass* on 32bit");
+    JVMCI_ERROR("compressed Klass* on 32bit");
 #endif
   } else {
     address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
-    *((Metadata**) operand) = record_metadata_reference(constant);
+    *((Metadata**) operand) = record_metadata_reference(constant, CHECK);
     TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
   }
 }
@@ -117,7 +116,7 @@
   TRACE_jvmci_3("relocating at " PTR_FORMAT "/" PTR_FORMAT " with destination at " PTR_FORMAT " (%d)", p2i(pc), p2i(operand), p2i(dest), data_offset);
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   address pc = (address) inst;
   if (inst->is_call()) {
     // NOTE: for call without a mov, the offset must fit a 32-bit immediate
@@ -139,18 +138,18 @@
     *(jint*) disp += ((address) foreign_call_destination) - old_dest;
     _instructions->relocate(pc, runtime_call_Relocation::spec(), Assembler::call32_operand);
   } else {
-    fatal("unsupported relocation for foreign call");
+    JVMCI_ERROR("unsupported relocation for foreign call");
   }
 
   TRACE_jvmci_3("relocating (foreign call)  at " PTR_FORMAT, p2i(inst));
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
 #ifdef ASSERT
   Method* method = NULL;
   // we need to check, this might also be an unresolved method
   if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
-    method = getMethodFromHotSpotMethod(hotspot_method);
+    method = getMethodFromHotSpotMethod(hotspot_method());
   }
 #endif
   switch (_next_call_type) {
@@ -185,6 +184,7 @@
       break;
     }
     default:
+      JVMCI_ERROR("invalid _next_call_type value");
       break;
   }
 }
@@ -198,7 +198,7 @@
 }
 
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   switch (mark) {
     case POLL_NEAR: {
       relocate_poll_near(pc);
@@ -222,13 +222,13 @@
       _instructions->relocate(pc, relocInfo::poll_return_type, Assembler::imm_operand);
       break;
     default:
-      fatal("invalid mark value");
+      JVMCI_ERROR("invalid mark value: %d", mark);
       break;
   }
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   if (jvmci_reg < RegisterImpl::number_of_registers) {
     return as_Register(jvmci_reg)->as_VMReg();
   } else {
@@ -236,8 +236,7 @@
     if (floatRegisterNumber < XMMRegisterImpl::number_of_registers) {
       return as_XMMRegister(floatRegisterNumber)->as_VMReg();
     }
-    ShouldNotReachHere();
-    return NULL;
+    JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
   }
 }
 
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -3651,12 +3651,71 @@
   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
 }
 
+void MacroAssembler::movdqu(Address dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
+    Assembler::vextractf32x4h(dst, src, 0);
+  } else {
+    Assembler::movdqu(dst, src);
+  }
+}
+
+void MacroAssembler::movdqu(XMMRegister dst, Address src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
+    Assembler::vinsertf32x4h(dst, src, 0);
+  } else {
+    Assembler::movdqu(dst, src);
+  }
+}
+
+void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+    Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
+  } else {
+    Assembler::movdqu(dst, src);
+  }
+}
+
 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
-    Assembler::movdqu(dst, as_Address(src));
+    movdqu(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    Assembler::movdqu(dst, Address(rscratch1, 0));
+    movdqu(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
+    Assembler::vextractf64x4h(dst, src, 0);
+  } else {
+    Assembler::vmovdqu(dst, src);
+  }
+}
+
+void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
+    Assembler::vinsertf64x4h(dst, src, 0);
+  } else {
+    Assembler::vmovdqu(dst, src);
+  }
+}
+
+void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+    Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
+  }
+  else {
+    Assembler::vmovdqu(dst, src);
+  }
+}
+
+void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    vmovdqu(dst, as_Address(src));
+  }
+  else {
+    lea(rscratch1, src);
+    vmovdqu(dst, Address(rscratch1, 0));
   }
 }
 
@@ -3726,6 +3785,10 @@
   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 }
 
+#ifdef _LP64
+#define XSTATE_BV 0x200
+#endif
+
 void MacroAssembler::pop_CPU_state() {
   pop_FPU_state();
   pop_IU_state();
@@ -3735,27 +3798,7 @@
 #ifndef _LP64
   frstor(Address(rsp, 0));
 #else
-  // AVX will continue to use the fxsave area.
-  // EVEX needs to utilize the xsave area, which is under different
-  // management.
-  if(VM_Version::supports_evex()) {
-    // EDX:EAX describe the XSAVE header and
-    // are obtained while fetching info for XCR0 via cpuid.
-    // These two registers make up 64-bits in the header for which bits
-    // 62:10 are currently reserved for future implementations and unused.  Bit 63
-    // is unused for our implementation as we do not utilize
-    // compressed XSAVE areas.  Bits 9..8 are currently ignored as we do not use
-    // the functionality for PKRU state and MSR tracing.
-    // Ergo we are primarily concerned with bits 7..0, which define
-    // which ISA extensions and features are enabled for a given machine and are
-    // defined in XemXcr0Eax and is used to map the XSAVE area
-    // for restoring registers as described via XCR0.
-    movl(rdx,VM_Version::get_xsave_header_upper_segment());
-    movl(rax,VM_Version::get_xsave_header_lower_segment());
-    xrstor(Address(rsp, 0));
-  } else {
-    fxrstor(Address(rsp, 0));
-  }
+  fxrstor(Address(rsp, 0));
 #endif
   addptr(rsp, FPUStateSizeInWords * wordSize);
 }
@@ -3773,49 +3816,13 @@
   push_FPU_state();
 }
 
-#ifdef _LP64
-#define XSTATE_BV 0x200
-#endif
-
 void MacroAssembler::push_FPU_state() {
   subptr(rsp, FPUStateSizeInWords * wordSize);
 #ifndef _LP64
   fnsave(Address(rsp, 0));
   fwait();
 #else
-  // AVX will continue to use the fxsave area.
-  // EVEX needs to utilize the xsave area, which is under different
-  // management.
-  if(VM_Version::supports_evex()) {
-    // Save a copy of EAX and EDX
-    push(rax);
-    push(rdx);
-    // EDX:EAX describe the XSAVE header and
-    // are obtained while fetching info for XCR0 via cpuid.
-    // These two registers make up 64-bits in the header for which bits
-    // 62:10 are currently reserved for future implementations and unused.  Bit 63
-    // is unused for our implementation as we do not utilize
-    // compressed XSAVE areas.  Bits 9..8 are currently ignored as we do not use
-    // the functionality for PKRU state and MSR tracing.
-    // Ergo we are primarily concerned with bits 7..0, which define
-    // which ISA extensions and features are enabled for a given machine and are
-    // defined in XemXcr0Eax and is used to program XSAVE area
-    // for saving the required registers as defined in XCR0.
-    int xcr0_edx = VM_Version::get_xsave_header_upper_segment();
-    int xcr0_eax = VM_Version::get_xsave_header_lower_segment();
-    movl(rdx,xcr0_edx);
-    movl(rax,xcr0_eax);
-    xsave(Address(rsp, wordSize*2));
-    // now Apply control bits and clear bytes 8..23 in the header
-    pop(rdx);
-    pop(rax);
-    movl(Address(rsp, XSTATE_BV), xcr0_eax);
-    movl(Address(rsp, XSTATE_BV+4), xcr0_edx);
-    andq(Address(rsp, XSTATE_BV+8), 0);
-    andq(Address(rsp, XSTATE_BV+16), 0);
-  } else {
-    fxsave(Address(rsp, 0));
-  }
+  fxsave(Address(rsp, 0));
 #endif // LP64
 }
 
@@ -4007,6 +4014,23 @@
   }
 }
 
+void MacroAssembler::xorpd(XMMRegister dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) {
+    Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit);
+  }
+  else {
+    Assembler::xorpd(dst, src);
+  }
+}
+
+void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) {
+    Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit);
+  } else {
+    Assembler::xorps(dst, src);
+  }
+}
+
 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
   // Used in sign-bit flipping with aligned address.
   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
@@ -4050,6 +4074,682 @@
   }
 }
 
+void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if ((dst_enc < 16) && (nds_enc < 16)) {
+    vandps(dst, nds, negate_field, vector_len);
+  } else if ((src_enc < 16) && (dst_enc < 16)) {
+    movss(src, nds);
+    vandps(dst, src, negate_field, vector_len);
+  } else if (src_enc < 16) {
+    movss(src, nds);
+    vandps(src, src, negate_field, vector_len);
+    movss(dst, src);
+  } else if (dst_enc < 16) {
+    movdqu(src, xmm0);
+    movss(xmm0, nds);
+    vandps(dst, xmm0, negate_field, vector_len);
+    movdqu(xmm0, src);
+  } else if (nds_enc < 16) {
+    movdqu(src, xmm0);
+    vandps(xmm0, nds, negate_field, vector_len);
+    movss(dst, xmm0);
+    movdqu(xmm0, src);
+  } else {
+    movdqu(src, xmm0);
+    movss(xmm0, nds);
+    vandps(xmm0, xmm0, negate_field, vector_len);
+    movss(dst, xmm0);
+    movdqu(xmm0, src);
+  }
+}
+
+void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if ((dst_enc < 16) && (nds_enc < 16)) {
+    vandpd(dst, nds, negate_field, vector_len);
+  } else if ((src_enc < 16) && (dst_enc < 16)) {
+    movsd(src, nds);
+    vandpd(dst, src, negate_field, vector_len);
+  } else if (src_enc < 16) {
+    movsd(src, nds);
+    vandpd(src, src, negate_field, vector_len);
+    movsd(dst, src);
+  } else if (dst_enc < 16) {
+    movdqu(src, xmm0);
+    movsd(xmm0, nds);
+    vandpd(dst, xmm0, negate_field, vector_len);
+    movdqu(xmm0, src);
+  } else if (nds_enc < 16) {
+    movdqu(src, xmm0);
+    vandpd(xmm0, nds, negate_field, vector_len);
+    movsd(dst, xmm0);
+    movdqu(xmm0, src);
+  } else {
+    movdqu(src, xmm0);
+    movsd(xmm0, nds);
+    vandpd(xmm0, xmm0, negate_field, vector_len);
+    movsd(dst, xmm0);
+    movdqu(xmm0, src);
+  }
+}
+
+void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpaddb(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpaddb(dst, dst, src, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for src
+    evmovdqul(nds, src, Assembler::AVX_512bit);
+    Assembler::vpaddb(dst, dst, nds, vector_len);
+  } else if ((src_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpaddb(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds as scatch for xmm0 to hold src
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpaddb(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, src, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpaddb(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpaddb(dst, nds, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpaddb(dst, dst, src, vector_len);
+  } else if (nds_enc < 16) {
+    // implies dst_enc in upper bank with src as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpaddb(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs in upper bank
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpaddb(xmm0, xmm0, src, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpaddw(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpaddw(dst, dst, src, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for src
+    evmovdqul(nds, src, Assembler::AVX_512bit);
+    Assembler::vpaddw(dst, dst, nds, vector_len);
+  } else if ((src_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpaddw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds as scatch for xmm0 to hold src
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpaddw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, src, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpaddw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpaddw(dst, nds, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpaddw(dst, dst, src, vector_len);
+  } else if (nds_enc < 16) {
+    // implies dst_enc in upper bank with src as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpaddw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs in upper bank
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpaddw(xmm0, xmm0, src, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsubb(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpsubb(dst, dst, src, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for src
+    evmovdqul(nds, src, Assembler::AVX_512bit);
+    Assembler::vpsubb(dst, dst, nds, vector_len);
+  } else if ((src_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsubb(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds as scatch for xmm0 to hold src
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpsubb(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, src, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsubb(dst, nds, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpsubb(dst, dst, src, vector_len);
+  } else if (nds_enc < 16) {
+    // implies dst_enc in upper bank with src as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsubb(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs in upper bank
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsubw(xmm0, xmm0, src, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsubw(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpsubw(dst, dst, src, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for src
+    evmovdqul(nds, src, Assembler::AVX_512bit);
+    Assembler::vpsubw(dst, dst, nds, vector_len);
+  } else if ((src_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsubw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds as scatch for xmm0 to hold src
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpsubw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, src, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsubw(dst, nds, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpsubw(dst, dst, src, vector_len);
+  } else if (nds_enc < 16) {
+    // implies dst_enc in upper bank with src as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsubw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs in upper bank
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsubw(xmm0, xmm0, src, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+
+void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpmullw(dst, nds, src, vector_len);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::vpmullw(dst, dst, src, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for src
+    evmovdqul(nds, src, Assembler::AVX_512bit);
+    Assembler::vpmullw(dst, dst, nds, vector_len);
+  } else if ((src_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch for dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpmullw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds as scatch for xmm0 to hold src
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::vpmullw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, src, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpmullw(dst, nds, src, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpmullw(dst, dst, src, vector_len);
+  } else if (nds_enc < 16) {
+    // implies dst_enc in upper bank with src as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpmullw(nds, nds, src, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs in upper bank
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpmullw(xmm0, xmm0, src, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int shift_enc = shift->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsraw(dst, nds, shift, vector_len);
+  } else if ((dst_enc < 16) && (shift_enc < 16)) {
+    Assembler::vpsraw(dst, dst, shift, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds_enc as scratch with shift
+    evmovdqul(nds, shift, Assembler::AVX_512bit);
+    Assembler::vpsraw(dst, dst, nds, vector_len);
+  } else if ((shift_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch with dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsraw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds to save a copy of xmm0 and hold shift
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsraw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else if (nds_enc < 16) {
+    // use nds as dest as temps
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsraw(nds, nds, xmm0, vector_len);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, shift, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(xmm1, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsraw(dst, nds, shift, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpsraw(dst, dst, shift, vector_len);
+  } else if (nds_enc < 16) {
+    // use nds as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsraw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // use nds as scratch for xmm0
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsraw(xmm0, xmm0, shift, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int shift_enc = shift->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsrlw(dst, nds, shift, vector_len);
+  } else if ((dst_enc < 16) && (shift_enc < 16)) {
+    Assembler::vpsrlw(dst, dst, shift, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds_enc as scratch with shift
+    evmovdqul(nds, shift, Assembler::AVX_512bit);
+    Assembler::vpsrlw(dst, dst, nds, vector_len);
+  } else if ((shift_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch with dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsrlw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds to save a copy of xmm0 and hold shift
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsrlw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else if (nds_enc < 16) {
+    // use nds as dest as temps
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsrlw(nds, nds, xmm0, vector_len);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, shift, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(xmm1, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsrlw(dst, nds, shift, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpsrlw(dst, dst, shift, vector_len);
+  } else if (nds_enc < 16) {
+    // use nds as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsrlw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // use nds as scratch for xmm0
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsrlw(xmm0, xmm0, shift, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  int shift_enc = shift->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsllw(dst, nds, shift, vector_len);
+  } else if ((dst_enc < 16) && (shift_enc < 16)) {
+    Assembler::vpsllw(dst, dst, shift, vector_len);
+  } else if ((dst_enc < 16) && (nds_enc < 16)) {
+    // use nds_enc as scratch with shift
+    evmovdqul(nds, shift, Assembler::AVX_512bit);
+    Assembler::vpsllw(dst, dst, nds, vector_len);
+  } else if ((shift_enc < 16) && (nds_enc < 16)) {
+    // use nds as scratch with dst
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else if (dst_enc < 16) {
+    // use nds to save a copy of xmm0 and hold shift
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsllw(dst, dst, xmm0, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  } else if (nds_enc < 16) {
+    // use nds as dest as temps
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
+    Assembler::vpsllw(nds, nds, xmm0, vector_len);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // worse case scenario, all regs are in the upper bank
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm1, shift, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
+    evmovdqul(xmm1, dst, Assembler::AVX_512bit);
+    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::vpsllw(dst, nds, shift, vector_len);
+  } else if (dst_enc < 16) {
+    Assembler::vpsllw(dst, dst, shift, vector_len);
+  } else if (nds_enc < 16) {
+    // use nds as scratch
+    evmovdqul(nds, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(nds, nds, shift, vector_len);
+    evmovdqul(dst, nds, Assembler::AVX_512bit);
+  } else {
+    // use nds as scratch for xmm0
+    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::vpsllw(xmm0, xmm0, shift, vector_len);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+  }
+}
+
+// This instruction exists within macros, ergo we cannot control its input
+// when emitted through those patterns.
+void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) {
+  if (VM_Version::supports_avx512nobw()) {
+    int dst_enc = dst->encoding();
+    int src_enc = src->encoding();
+    if (dst_enc == src_enc) {
+      if (dst_enc < 16) {
+        Assembler::punpcklbw(dst, src);
+      } else {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        Assembler::punpcklbw(xmm0, xmm0);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      }
+    } else {
+      if ((src_enc < 16) && (dst_enc < 16)) {
+        Assembler::punpcklbw(dst, src);
+      } else if (src_enc < 16) {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        Assembler::punpcklbw(xmm0, src);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      } else if (dst_enc < 16) {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, src, Assembler::AVX_512bit);
+        Assembler::punpcklbw(dst, xmm0);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      } else {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        evmovdqul(xmm1, src, Assembler::AVX_512bit);
+        Assembler::punpcklbw(xmm0, xmm1);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      }
+    }
+  } else {
+    Assembler::punpcklbw(dst, src);
+  }
+}
+
+// This instruction exists within macros, ergo we cannot control its input
+// when emitted through those patterns.
+void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
+  if (VM_Version::supports_avx512nobw()) {
+    int dst_enc = dst->encoding();
+    int src_enc = src->encoding();
+    if (dst_enc == src_enc) {
+      if (dst_enc < 16) {
+        Assembler::pshuflw(dst, src, mode);
+      } else {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        Assembler::pshuflw(xmm0, xmm0, mode);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      }
+    } else {
+      if ((src_enc < 16) && (dst_enc < 16)) {
+        Assembler::pshuflw(dst, src, mode);
+      } else if (src_enc < 16) {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        Assembler::pshuflw(xmm0, src, mode);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      } else if (dst_enc < 16) {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm0, src, Assembler::AVX_512bit);
+        Assembler::pshuflw(dst, xmm0, mode);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      } else {
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        subptr(rsp, 64);
+        evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+        evmovdqul(xmm1, src, Assembler::AVX_512bit);
+        Assembler::pshuflw(xmm0, xmm1, mode);
+        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+        evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+        addptr(rsp, 64);
+      }
+    }
+  } else {
+    Assembler::pshuflw(dst, src, mode);
+  }
+}
+
 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
   if (reachable(src)) {
     vandpd(dst, nds, as_Address(src), vector_len);
@@ -4133,31 +4833,16 @@
       subptr(rsp, 64);
       evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       movflt(xmm0, nds);
-      if (reachable(src)) {
-        vxorps(xmm0, xmm0, as_Address(src), Assembler::AVX_128bit);
-      } else {
-        lea(rscratch1, src);
-        vxorps(xmm0, xmm0, Address(rscratch1, 0), Assembler::AVX_128bit);
-      }
+      vxorps(xmm0, xmm0, src, Assembler::AVX_128bit);
       movflt(dst, xmm0);
       evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       addptr(rsp, 64);
     } else {
       movflt(dst, nds);
-      if (reachable(src)) {
-        vxorps(dst, dst, as_Address(src), Assembler::AVX_128bit);
-      } else {
-        lea(rscratch1, src);
-        vxorps(dst, dst, Address(rscratch1, 0), Assembler::AVX_128bit);
-      }
-    }
-  } else {
-    if (reachable(src)) {
-      vxorps(dst, nds, as_Address(src), Assembler::AVX_128bit);
-    } else {
-      lea(rscratch1, src);
-      vxorps(dst, nds, Address(rscratch1, 0), Assembler::AVX_128bit);
-    }
+      vxorps(dst, dst, src, Assembler::AVX_128bit);
+    }
+  } else {
+    vxorps(dst, nds, src, Assembler::AVX_128bit);
   }
 }
 
@@ -4172,31 +4857,16 @@
       subptr(rsp, 64);
       evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       movdbl(xmm0, nds);
-      if (reachable(src)) {
-        vxorps(xmm0, xmm0, as_Address(src), Assembler::AVX_128bit);
-      } else {
-        lea(rscratch1, src);
-        vxorps(xmm0, xmm0, Address(rscratch1, 0), Assembler::AVX_128bit);
-      }
+      vxorpd(xmm0, xmm0, src, Assembler::AVX_128bit);
       movdbl(dst, xmm0);
       evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       addptr(rsp, 64);
     } else {
       movdbl(dst, nds);
-      if (reachable(src)) {
-        vxorps(dst, dst, as_Address(src), Assembler::AVX_128bit);
-      } else {
-        lea(rscratch1, src);
-        vxorps(dst, dst, Address(rscratch1, 0), Assembler::AVX_128bit);
-      }
-    }
-  } else {
-    if (reachable(src)) {
-      vxorpd(dst, nds, as_Address(src), Assembler::AVX_128bit);
-    } else {
-      lea(rscratch1, src);
-      vxorpd(dst, nds, Address(rscratch1, 0), Assembler::AVX_128bit);
-    }
+      vxorpd(dst, dst, src, Assembler::AVX_128bit);
+    }
+  } else {
+    vxorpd(dst, nds, src, Assembler::AVX_128bit);
   }
 }
 
@@ -4688,7 +5358,6 @@
   pusha();
 
   // if we are coming from c1, xmm registers may be live
-  int off = 0;
   int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
   if (UseAVX > 2) {
     num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
@@ -4697,7 +5366,7 @@
   if (UseSSE == 1)  {
     subptr(rsp, sizeof(jdouble)*8);
     for (int n = 0; n < 8; n++) {
-      movflt(Address(rsp, off++*sizeof(jdouble)), as_XMMRegister(n));
+      movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
     }
   } else if (UseSSE >= 2)  {
     if (UseAVX > 2) {
@@ -4709,37 +5378,35 @@
 #ifdef COMPILER2
     if (MaxVectorSize > 16) {
       if(UseAVX > 2) {
-        // Save upper half of ZMM registes
+        // Save upper half of ZMM registers
         subptr(rsp, 32*num_xmm_regs);
         for (int n = 0; n < num_xmm_regs; n++) {
-          vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n));
+          vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
         }
-        off = 0;
       }
       assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
-      // Save upper half of YMM registes
+      // Save upper half of YMM registers
       subptr(rsp, 16*num_xmm_regs);
       for (int n = 0; n < num_xmm_regs; n++) {
-        vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
+        vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
       }
     }
 #endif
     // Save whole 128bit (16 bytes) XMM registers
     subptr(rsp, 16*num_xmm_regs);
-    off = 0;
 #ifdef _LP64
-    if (VM_Version::supports_avx512novl()) {
+    if (VM_Version::supports_evex()) {
       for (int n = 0; n < num_xmm_regs; n++) {
-        vextractf32x4h(Address(rsp, off++*16), as_XMMRegister(n), 0);
+        vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0);
       }
     } else {
       for (int n = 0; n < num_xmm_regs; n++) {
-        movdqu(Address(rsp, off++*16), as_XMMRegister(n));
+        movdqu(Address(rsp, n*16), as_XMMRegister(n));
       }
     }
 #else
     for (int n = 0; n < num_xmm_regs; n++) {
-      movdqu(Address(rsp, off++*16), as_XMMRegister(n));
+      movdqu(Address(rsp, n*16), as_XMMRegister(n));
     }
 #endif
   }
@@ -4808,44 +5475,40 @@
     addptr(rsp, sizeof(jdouble)*nb_args);
   }
 
-  off = 0;
   if (UseSSE == 1)  {
     for (int n = 0; n < 8; n++) {
-      movflt(as_XMMRegister(n), Address(rsp, off++*sizeof(jdouble)));
+      movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
     }
     addptr(rsp, sizeof(jdouble)*8);
   } else if (UseSSE >= 2)  {
-    // Restore whole 128bit (16 bytes) XMM regiters
+    // Restore whole 128bit (16 bytes) XMM registers
 #ifdef _LP64
-    if (VM_Version::supports_avx512novl()) {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        vinsertf32x4h(as_XMMRegister(n), Address(rsp, off++*16), 0);
-      }
-    }
-    else {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        movdqu(as_XMMRegister(n), Address(rsp, off++*16));
-      }
-    }
+  if (VM_Version::supports_evex()) {
+    for (int n = 0; n < num_xmm_regs; n++) {
+      vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0);
+    }
+  } else {
+    for (int n = 0; n < num_xmm_regs; n++) {
+      movdqu(as_XMMRegister(n), Address(rsp, n*16));
+    }
+  }
 #else
-    for (int n = 0; n < num_xmm_regs; n++) {
-      movdqu(as_XMMRegister(n), Address(rsp, off++ * 16));
-    }
+  for (int n = 0; n < num_xmm_regs; n++) {
+    movdqu(as_XMMRegister(n), Address(rsp, n*16));
+  }
 #endif
     addptr(rsp, 16*num_xmm_regs);
 
 #ifdef COMPILER2
     if (MaxVectorSize > 16) {
-      // Restore upper half of YMM registes.
-      off = 0;
+      // Restore upper half of YMM registers.
       for (int n = 0; n < num_xmm_regs; n++) {
-        vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
+        vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
       }
       addptr(rsp, 16*num_xmm_regs);
       if(UseAVX > 2) {
-        off = 0;
         for (int n = 0; n < num_xmm_regs; n++) {
-          vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32));
+          vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
         }
         addptr(rsp, 32*num_xmm_regs);
       }
@@ -6831,7 +7494,7 @@
 
     bind(SCAN_TO_16_CHAR_LOOP);
     vmovdqu(vec3, Address(result, 0));
-    vpcmpeqw(vec3, vec3, vec1, true);
+    vpcmpeqw(vec3, vec3, vec1, 1);
     vptest(vec2, vec3);
     jcc(Assembler::carryClear, FOUND_CHAR);
     addptr(result, 32);
@@ -7672,7 +8335,7 @@
         BIND(L_check_fill_32_bytes);
         addl(count, 8 << shift);
         jccb(Assembler::less, L_check_fill_8_bytes);
-        evmovdqul(Address(to, 0), xtmp, Assembler::AVX_256bit);
+        vmovdqu(Address(to, 0), xtmp);
         addptr(to, 32);
         subl(count, 8 << shift);
 
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -962,10 +962,15 @@
   void divss(XMMRegister dst, AddressLiteral src);
 
   // Move Unaligned Double Quadword
-  void movdqu(Address     dst, XMMRegister src)   { Assembler::movdqu(dst, src); }
-  void movdqu(XMMRegister dst, Address src)       { Assembler::movdqu(dst, src); }
-  void movdqu(XMMRegister dst, XMMRegister src)   { Assembler::movdqu(dst, src); }
+  void movdqu(Address     dst, XMMRegister src);
+  void movdqu(XMMRegister dst, Address src);
+  void movdqu(XMMRegister dst, XMMRegister src);
   void movdqu(XMMRegister dst, AddressLiteral src);
+  // AVX Unaligned forms
+  void vmovdqu(Address     dst, XMMRegister src);
+  void vmovdqu(XMMRegister dst, Address src);
+  void vmovdqu(XMMRegister dst, XMMRegister src);
+  void vmovdqu(XMMRegister dst, AddressLiteral src);
 
   // Move Aligned Double Quadword
   void movdqa(XMMRegister dst, Address src)       { Assembler::movdqa(dst, src); }
@@ -1024,12 +1029,12 @@
   void ucomisd(XMMRegister dst, AddressLiteral src);
 
   // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
-  void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
+  void xorpd(XMMRegister dst, XMMRegister src);
   void xorpd(XMMRegister dst, Address src)     { Assembler::xorpd(dst, src); }
   void xorpd(XMMRegister dst, AddressLiteral src);
 
   // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
-  void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
+  void xorps(XMMRegister dst, XMMRegister src);
   void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
   void xorps(XMMRegister dst, AddressLiteral src);
 
@@ -1047,6 +1052,39 @@
   void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
   void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
+  void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
+  void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
+
+  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+
+  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+
+  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+
+  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+
+  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+
+  void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
+  void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
+
+  void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
+  void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
+
+  void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
+  void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
+
+  void punpcklbw(XMMRegister dst, XMMRegister src);
+  void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
+
+  void pshuflw(XMMRegister dst, XMMRegister src, int mode);
+  void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }
+
   void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
   void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len)     { Assembler::vandpd(dst, nds, src, vector_len); }
   void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -192,31 +192,22 @@
     }
   } else if(UseSSE >= 2) {
     // Save whole 128bit (16 bytes) XMM regiters
-    if (VM_Version::supports_avx512novl()) {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ vextractf32x4h(Address(rsp, off*wordSize), as_XMMRegister(n), 0);
-        off += delta;
-      }
-    } else {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
-        off += delta;
-      }
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
+      off += delta;
     }
   }
 
   if (vect_words > 0) {
     assert(vect_words*wordSize == 128, "");
     __ subptr(rsp, 128); // Save upper half of YMM registes
-    off = 0;
     for (int n = 0; n < num_xmm_regs; n++) {
-      __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
+      __ vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
     }
     if (UseAVX > 2) {
       __ subptr(rsp, 256); // Save upper half of ZMM registes
-      off = 0;
       for (int n = 0; n < num_xmm_regs; n++) {
-        __ vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n));
+        __ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
       }
     }
   }
@@ -285,31 +276,23 @@
       off += delta;
     }
   } else if (UseSSE >= 2) {
-    if (VM_Version::supports_avx512novl()) {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ vinsertf32x4h(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes), 0);
-        off += delta;
-      }
-    } else {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
-        off += delta;
-      }
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
+      off += delta;
     }
   }
   if (restore_vectors) {
+    assert(additional_frame_bytes == 128, "");
     if (UseAVX > 2) {
-      off = 0;
+      // Restore upper half of ZMM registers.
       for (int n = 0; n < num_xmm_regs; n++) {
-        __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32));
+        __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
       }
       __ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes
     }
     // Restore upper half of YMM registes.
-    assert(additional_frame_bytes == 128, "");
-    off = 0;
     for (int n = 0; n < num_xmm_regs; n++) {
-      __ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
+      __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
     }
     __ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes
   }
@@ -2562,7 +2545,8 @@
 
   oop_maps->add_gc_map( __ pc()-start, map);
 
-  // Discard arg to fetch_unroll_info
+  // Discard args to fetch_unroll_info
+  __ pop(rcx);
   __ pop(rcx);
 
   __ get_thread(rcx);
@@ -2575,9 +2559,8 @@
   // we are very short of registers
 
   Address unpack_kind(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
-  // retrieve the deopt kind from where we left it.
-  __ pop(rax);
-  __ movl(unpack_kind, rax);                      // save the unpack_kind value
+  // retrieve the deopt kind from the UnrollBlock.
+  __ movl(rax, unpack_kind);
 
    Label noException;
   __ cmpl(rax, Deoptimization::Unpack_exception);   // Was exception pending?
@@ -2787,11 +2770,12 @@
   enum frame_layout {
     arg0_off,      // thread                     sp + 0 // Arg location for
     arg1_off,      // unloaded_class_index       sp + 1 // calling C
+    arg2_off,      // exec_mode                  sp + 2
     // The frame sender code expects that rbp will be in the "natural" place and
     // will override any oopMap setting for it. We must therefore force the layout
     // so that it agrees with the frame sender code.
-    rbp_off,       // callee saved register      sp + 2
-    return_off,    // slot for return address    sp + 3
+    rbp_off,       // callee saved register      sp + 3
+    return_off,    // slot for return address    sp + 4
     framesize
   };
 
@@ -2823,6 +2807,7 @@
   __ movptr(Address(rsp, arg0_off*wordSize), rdx);
   // argument already in ECX
   __ movl(Address(rsp, arg1_off*wordSize),rcx);
+  __ movl(Address(rsp, arg2_off*wordSize), Deoptimization::Unpack_uncommon_trap);
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
 
   // Set an oopmap for the call site
@@ -2839,6 +2824,16 @@
   // Load UnrollBlock into EDI
   __ movptr(rdi, rax);
 
+#ifdef ASSERT
+  { Label L;
+    __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()),
+            (int32_t)Deoptimization::Unpack_uncommon_trap);
+    __ jcc(Assembler::equal, L);
+    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
+    __ bind(L);
+  }
+#endif
+
   // Pop all the frames we must move/replace.
   //
   // Frame picture (youngest to oldest)
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -72,45 +72,28 @@
 class RegisterSaver {
   // Capture info about frame layout.  Layout offsets are in jint
   // units because compiler frame slots are jints.
-#define HALF_ZMM_BANK_WORDS 128
+#define XSAVE_AREA_BEGIN 160
+#define XSAVE_AREA_YMM_BEGIN 576
+#define XSAVE_AREA_ZMM_BEGIN 1152
+#define XSAVE_AREA_UPPERBANK 1664
 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
+#define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
   enum layout {
     fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
-    xmm_off       = fpu_state_off + 160/BytesPerInt,            // offset in fxsave save area
+    xmm_off       = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt,            // offset in fxsave save area
     DEF_XMM_OFFS(0),
     DEF_XMM_OFFS(1),
-    DEF_XMM_OFFS(2),
-    DEF_XMM_OFFS(3),
-    DEF_XMM_OFFS(4),
-    DEF_XMM_OFFS(5),
-    DEF_XMM_OFFS(6),
-    DEF_XMM_OFFS(7),
-    DEF_XMM_OFFS(8),
-    DEF_XMM_OFFS(9),
-    DEF_XMM_OFFS(10),
-    DEF_XMM_OFFS(11),
-    DEF_XMM_OFFS(12),
-    DEF_XMM_OFFS(13),
-    DEF_XMM_OFFS(14),
-    DEF_XMM_OFFS(15),
-    zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt),
+    // 2..15 are implied in range usage
+    ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
+    DEF_YMM_OFFS(0),
+    DEF_YMM_OFFS(1),
+    // 2..15 are implied in range usage
+    zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
+    zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt,
     DEF_ZMM_OFFS(16),
     DEF_ZMM_OFFS(17),
-    DEF_ZMM_OFFS(18),
-    DEF_ZMM_OFFS(19),
-    DEF_ZMM_OFFS(20),
-    DEF_ZMM_OFFS(21),
-    DEF_ZMM_OFFS(22),
-    DEF_ZMM_OFFS(23),
-    DEF_ZMM_OFFS(24),
-    DEF_ZMM_OFFS(25),
-    DEF_ZMM_OFFS(26),
-    DEF_ZMM_OFFS(27),
-    DEF_ZMM_OFFS(28),
-    DEF_ZMM_OFFS(29),
-    DEF_ZMM_OFFS(30),
-    DEF_ZMM_OFFS(31),
+    // 18..31 are implied in range usage
     fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
     fpu_stateH_end,
     r15_off, r15H_off,
@@ -160,8 +143,6 @@
 };
 
 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
-  int vect_words = 0;
-  int ymmhi_offset = -1;
   int off = 0;
   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
   if (UseAVX < 3) {
@@ -171,24 +152,15 @@
   if (save_vectors) {
     assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
     assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
-    // Save upper half of YMM registers
-    vect_words = 16 * num_xmm_regs / wordSize;
-    if (UseAVX < 3) {
-      ymmhi_offset = additional_frame_words;
-      additional_frame_words += vect_words;
-    }
   }
 #else
   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 #endif
 
-  // Always make the frame size 16-byte aligned
-  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
-                                     reg_save_size*BytesPerInt, num_xmm_regs);
+  // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
+  int frame_size_in_bytes = round_to(reg_save_size*BytesPerInt, num_xmm_regs);
   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
-  // The caller will allocate additional_frame_words
-  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
   // CodeBlob frame size is in words.
   int frame_size_in_words = frame_size_in_bytes / wordSize;
   *total_frame_words = frame_size_in_words;
@@ -203,12 +175,34 @@
   __ push_CPU_state(); // Push a multiple of 16 bytes
 
   // push cpu state handles this on EVEX enabled targets
-  if ((vect_words > 0) && (UseAVX < 3)) {
-    assert(vect_words*wordSize >= 256, "");
-    // Save upper half of YMM registes(0..num_xmm_regs)
-    __ subptr(rsp, num_xmm_regs*16);
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
+  if (save_vectors) {
+    // Save upper half of YMM registes(0..15)
+    int base_addr = XSAVE_AREA_YMM_BEGIN;
+    for (int n = 0; n < 16; n++) {
+      __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n));
+    }
+    if (VM_Version::supports_evex()) {
+      // Save upper half of ZMM registes(0..15)
+      base_addr = XSAVE_AREA_ZMM_BEGIN;
+      for (int n = 0; n < 16; n++) {
+        __ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1);
+      }
+      // Save full ZMM registes(16..num_xmm_regs)
+      base_addr = XSAVE_AREA_UPPERBANK;
+      int off = 0;
+      int vector_len = Assembler::AVX_512bit;
+      for (int n = 16; n < num_xmm_regs; n++) {
+        __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
+      }
+    }
+  } else {
+    if (VM_Version::supports_evex()) {
+      // Save upper bank of ZMM registers(16..31) for double/float usage
+      int base_addr = XSAVE_AREA_UPPERBANK;
+      int off = 0;
+      for (int n = 16; n < num_xmm_regs; n++) {
+        __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
+      }
     }
   }
   if (frame::arg_reg_save_area_bytes != 0) {
@@ -224,8 +218,7 @@
   OopMapSet *oop_maps = new OopMapSet();
   OopMap* map = new OopMap(frame_size_in_slots, 0);
 
-#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
-#define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset)
+#define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
 
   map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
   map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
@@ -257,31 +250,21 @@
     off = zmm16_off;
     delta = zmm17_off - off;
     for (int n = 16; n < num_xmm_regs; n++) {
-      XMMRegister xmm_name = as_XMMRegister(n);
-      map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
+      XMMRegister zmm_name = as_XMMRegister(n);
+      map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg());
       off += delta;
     }
   }
 
 #if defined(COMPILER2) || INCLUDE_JVMCI
   if (save_vectors) {
-    assert(ymmhi_offset != -1, "save area must exist");
-    map->set_callee_saved(YMMHI_STACK_OFFSET(  0), xmm0->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next(4));
-    map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next(4));
+    off = ymm0_off;
+    int delta = ymm1_off - off;
+    for (int n = 0; n < 16; n++) {
+      XMMRegister ymm_name = as_XMMRegister(n);
+      map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4));
+      off += delta;
+    }
   }
 #endif // COMPILER2 || INCLUDE_JVMCI
 
@@ -316,8 +299,8 @@
       off = zmm16H_off;
       delta = zmm17H_off - off;
       for (int n = 16; n < num_xmm_regs; n++) {
-        XMMRegister xmm_name = as_XMMRegister(n);
-        map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
+        XMMRegister zmm_name = as_XMMRegister(n);
+        map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
         off += delta;
       }
     }
@@ -335,21 +318,48 @@
     // Pop arg register save area
     __ addptr(rsp, frame::arg_reg_save_area_bytes);
   }
+
 #if defined(COMPILER2) || INCLUDE_JVMCI
-  // On EVEX enabled targets everything is handled in pop fpu state
-  if ((restore_vectors) && (UseAVX < 3)) {
-    assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX");
-    assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");
-    int off = 0;
-    // Restore upper half of YMM registes (0..num_xmm_regs)
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ vinsertf128h(as_XMMRegister(n), Address(rsp,  off++*16));
-    }
-    __ addptr(rsp, num_xmm_regs*16);
+  if (restore_vectors) {
+    assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
+    assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
   }
 #else
-  assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
+  assert(!save_vectors, "vectors are generated only by C2");
 #endif
+
+  // On EVEX enabled targets everything is handled in pop fpu state
+  if (restore_vectors) {
+    // Restore upper half of YMM registes (0..15)
+    int base_addr = XSAVE_AREA_YMM_BEGIN;
+    for (int n = 0; n < 16; n++) {
+      __ vinsertf128h(as_XMMRegister(n), Address(rsp,  base_addr+n*16));
+    }
+    if (VM_Version::supports_evex()) {
+      // Restore upper half of ZMM registes (0..15)
+      base_addr = XSAVE_AREA_ZMM_BEGIN;
+      for (int n = 0; n < 16; n++) {
+        __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1);
+      }
+      // Restore full ZMM registes(16..num_xmm_regs)
+      base_addr = XSAVE_AREA_UPPERBANK;
+      int vector_len = Assembler::AVX_512bit;
+      int off = 0;
+      for (int n = 16; n < num_xmm_regs; n++) {
+        __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
+      }
+    }
+  } else {
+    if (VM_Version::supports_evex()) {
+      // Restore upper bank of ZMM registes(16..31) for double/float usage
+      int base_addr = XSAVE_AREA_UPPERBANK;
+      int off = 0;
+      for (int n = 16; n < num_xmm_regs; n++) {
+        __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)));
+      }
+    }
+  }
+
   // Recover CPU state
   __ pop_CPU_state();
   // Get the rbp described implicitly by the calling convention (no oopMap)
@@ -2819,6 +2829,7 @@
 
     __ movl(r14, (int32_t)Deoptimization::Unpack_reexecute);
     __ mov(c_rarg0, r15_thread);
+    __ movl(c_rarg2, r14); // exec mode
     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
     oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
 
@@ -2905,6 +2916,7 @@
   }
 #endif // ASSERT
   __ mov(c_rarg0, r15_thread);
+  __ movl(c_rarg1, r14); // exec_mode
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
 
   // Need to have an oopmap that tells fetch_unroll_info where to
@@ -2922,6 +2934,7 @@
   // Load UnrollBlock* into rdi
   __ mov(rdi, rax);
 
+  __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
    Label noException;
   __ cmpl(r14, Deoptimization::Unpack_exception);   // Was exception pending?
   __ jcc(Assembler::notEqual, noException);
@@ -3140,6 +3153,7 @@
   // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
 
   __ mov(c_rarg0, r15_thread);
+  __ movl(c_rarg2, Deoptimization::Unpack_uncommon_trap);
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
 
   // Set an oopmap for the call site
@@ -3155,6 +3169,16 @@
   // Load UnrollBlock* into rdi
   __ mov(rdi, rax);
 
+#ifdef ASSERT
+  { Label L;
+    __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()),
+            (int32_t)Deoptimization::Unpack_uncommon_trap);
+    __ jcc(Assembler::equal, L);
+    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
+    __ bind(L);
+  }
+#endif
+
   // Pop all the frames we must move/replace.
   //
   // Frame picture (youngest to oldest)
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -273,7 +273,7 @@
     if (UseAVX > 2) {
       last_reg = 31;
     }
-    if (VM_Version::supports_avx512novl()) {
+    if (VM_Version::supports_evex()) {
       for (int i = xmm_save_first; i <= last_reg; i++) {
         __ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0);
       }
@@ -391,7 +391,7 @@
     // restore regs belonging to calling function
 #ifdef _WIN64
     // emit the restores for xmm regs
-    if (VM_Version::supports_avx512novl()) {
+    if (VM_Version::supports_evex()) {
       for (int i = xmm_save_first; i <= last_reg; i++) {
         __ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0);
       }
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -891,7 +891,7 @@
       UseNewLongLShift = true;
     }
     if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
-      if( supports_sse4a() ) {
+      if (supports_sse4a()) {
         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
       } else {
         UseXmmLoadAndClearUpper = false;
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -552,6 +552,19 @@
           break;
         }
       }
+      // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
+      if (retVal == false) {
+        // Verify that OS save/restore all bits of EVEX registers
+        // during signal processing.
+        int nreg = 2 LP64_ONLY(+2);
+        retVal = true;
+        for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
+          if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
+            retVal = false;
+            break;
+          }
+        }
+      }
     }
     return retVal;
   }
@@ -706,6 +719,9 @@
   static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; }
   static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
   static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
+  static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
+  static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
+  static bool supports_avxonly()    { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
   // Intel features
   static bool is_intel_family_core() { return is_intel() &&
                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
--- a/hotspot/src/cpu/x86/vm/x86.ad	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/x86.ad	Fri Nov 13 13:31:48 2015 +0100
@@ -1716,6 +1716,36 @@
   return ret_value;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  if (ret_value) {
+    switch (opcode) {
+      case Op_AddVB:
+      case Op_SubVB:
+        if ((vlen == 64) && (VM_Version::supports_avx512bw() == false))
+          ret_value = false;
+        break;
+      case Op_URShiftVS:
+      case Op_RShiftVS:
+      case Op_LShiftVS:
+      case Op_MulVS:
+      case Op_AddVS:
+      case Op_SubVS:
+        if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
+          ret_value = false;
+        break;
+      case Op_CMoveVD:
+        if (vlen != 4)
+          ret_value  = false;
+        break;
+    }
+  }
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   int float_pressure_threshold = default_pressure_threshold;
 #ifdef _LP64
@@ -1759,11 +1789,9 @@
     break;
   case T_BYTE:
     if (size < 4) return 0;
-    if ((size > 32) && !VM_Version::supports_avx512bw()) return 0;
     break;
   case T_SHORT:
     if (size < 4) return 0;
-    if ((size > 16) && !VM_Version::supports_avx512bw()) return 0;
     break;
   default:
     ShouldNotReachHere();
@@ -1967,27 +1995,34 @@
   bool is_single_byte = false;
   int vec_len = 0;
   if ((UseAVX > 2) && (stack_offset != 0)) {
+    int tuple_type = Assembler::EVEX_FVM;
+    int input_size = Assembler::EVEX_32bit;
     switch (ireg) {
-	case Op_VecS:
+    case Op_VecS:
+      tuple_type = Assembler::EVEX_T1S;
+      break;
     case Op_VecD:
+      tuple_type = Assembler::EVEX_T1S;
+      input_size = Assembler::EVEX_64bit;
+      break;
     case Op_VecX:
-	  break;
-	case Op_VecY:
-	  vec_len = 1;
-	  break;
+      break;
+    case Op_VecY:
+      vec_len = 1;
+      break;
     case Op_VecZ:
-	  vec_len = 2;
-	  break;
+      vec_len = 2;
+      break;
     }
-    is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0);
+    is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
   }
   int offset_size = 0;
   int size = 5;
   if (UseAVX > 2 ) {
-    if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) { 
+    if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
       size += 2; // Need an additional two bytes for EVEX encoding
-    } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) { 
+    } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
       offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
     } else {
       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
@@ -2711,7 +2746,21 @@
 %}
 
 instruct absF_reg_reg(regF dst, regF src) %{
-  predicate(UseAVX > 0);
+  predicate(VM_Version::supports_avxonly());
+  match(Set dst (AbsF src));
+  ins_cost(150);
+  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(float_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+#ifdef _LP64
+instruct absF_reg_reg_evex(regF dst, regF src) %{
+  predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (AbsF src));
   ins_cost(150);
   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
@@ -2723,6 +2772,34 @@
   ins_pipe(pipe_slow);
 %}
 
+instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{
+  predicate(VM_Version::supports_avx512novl());
+  match(Set dst (AbsF src1));
+  effect(TEMP src2);
+  ins_cost(150);
+  format %{ "vabsss  $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
+              ExternalAddress(float_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+#else // _LP64
+instruct absF_reg_reg_evex(regF dst, regF src) %{
+  predicate(UseAVX > 2);
+  match(Set dst (AbsF src));
+  ins_cost(150);
+  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(float_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+#endif
+
 instruct absD_reg(regD dst) %{
   predicate((UseSSE>=2) && (UseAVX == 0));
   match(Set dst (AbsD dst));
@@ -2736,7 +2813,22 @@
 %}
 
 instruct absD_reg_reg(regD dst, regD src) %{
-  predicate(UseAVX > 0);
+  predicate(VM_Version::supports_avxonly());
+  match(Set dst (AbsD src));
+  ins_cost(150);
+  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
+            "# abs double by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(double_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+#ifdef _LP64
+instruct absD_reg_reg_evex(regD dst, regD src) %{
+  predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (AbsD src));
   ins_cost(150);
   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
@@ -2749,6 +2841,35 @@
   ins_pipe(pipe_slow);
 %}
 
+instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{
+  predicate(VM_Version::supports_avx512novl());
+  match(Set dst (AbsD src1));
+  effect(TEMP src2);
+  ins_cost(150);
+  format %{ "vabssd  $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
+              ExternalAddress(double_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+#else // _LP64
+instruct absD_reg_reg_evex(regD dst, regD src) %{
+  predicate(UseAVX > 2);
+  match(Set dst (AbsD src));
+  ins_cost(150);
+  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
+            "# abs double by sign masking" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(double_signmask()), vector_len);
+  %}
+  ins_pipe(pipe_slow);
+%}
+#endif
+
 instruct negF_reg(regF dst) %{
   predicate((UseSSE>=1) && (UseAVX == 0));
   match(Set dst (NegF dst));
@@ -4554,7 +4675,7 @@
 %}
 
 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
-  predicate(UseAVX > 0 && UseAVX < 3);
+  predicate(VM_Version::supports_avxonly());
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
@@ -4594,37 +4715,37 @@
 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
   predicate(UseSSE > 2 && UseAVX == 0);
   match(Set dst (AddReductionVI src1 src2));
-  effect(TEMP tmp2, TEMP tmp);
-  format %{ "movdqu  $tmp2,$src2\n\t"
-            "phaddd  $tmp2,$tmp2\n\t"
-            "phaddd  $tmp2,$tmp2\n\t"
-            "movd    $tmp,$src1\n\t"
-            "paddd   $tmp,$tmp2\n\t"
-            "movd    $dst,$tmp\t! add reduction4I" %}
-  ins_encode %{
-    __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
-    __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
-    __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdl($tmp$$XMMRegister, $src1$$Register);
-    __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdl($dst$$Register, $tmp$$XMMRegister);
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "movdqu  $tmp,$src2\n\t"
+            "phaddd  $tmp,$tmp\n\t"
+            "phaddd  $tmp,$tmp\n\t"
+            "movd    $tmp2,$src1\n\t"
+            "paddd   $tmp2,$tmp\n\t"
+            "movd    $dst,$tmp2\t! add reduction4I" %}
+  ins_encode %{
+    __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
+    __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
+    __ movdl($tmp2$$XMMRegister, $src1$$Register);
+    __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ movdl($dst$$Register, $tmp2$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
-  predicate(UseAVX > 0 && UseAVX < 3);
+  predicate(VM_Version::supports_avxonly());
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
-            "vphaddd  $tmp,$tmp,$tmp2\n\t"
+            "vphaddd  $tmp,$tmp,$tmp\n\t"
             "movd     $tmp2,$src1\n\t"
             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
             "movd     $dst,$tmp2\t! add reduction4I" %}
   ins_encode %{
     int vector_len = 0;
     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
-    __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+    __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
     __ movdl($tmp2$$XMMRegister, $src1$$Register);
     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
     __ movdl($dst$$Register, $tmp2$$XMMRegister);
@@ -4657,7 +4778,7 @@
 %}
 
 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
-  predicate(UseAVX > 0 && UseAVX < 3);
+  predicate(VM_Version::supports_avxonly());
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
@@ -4712,7 +4833,7 @@
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vextracti64x4  $tmp3,$src2\n\t"
+  format %{ "vextracti64x4  $tmp3,$src2,0x1\n\t"
             "vpaddd  $tmp3,$tmp3,$src2\n\t"
             "vextracti128   $tmp,$tmp3\n\t"
             "vpaddd  $tmp,$tmp,$tmp3\n\t"
@@ -4724,7 +4845,7 @@
             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
             "movd    $dst,$tmp2\t! mul reduction16I" %}
   ins_encode %{
-    __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
     __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
@@ -4763,7 +4884,7 @@
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
+  format %{ "vextracti128  $tmp,$src2\n\t"
             "vpaddq  $tmp2,$tmp,$src2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
@@ -4771,7 +4892,7 @@
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
             "movdq   $dst,$tmp2\t! add reduction4L" %}
   ins_encode %{
-    __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -4786,7 +4907,7 @@
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti64x4  $tmp2,$src2\n\t"
+  format %{ "vextracti64x4  $tmp2,$src2,0x1\n\t"
             "vpaddq  $tmp2,$tmp2,$src2\n\t"
             "vextracti128   $tmp,$tmp2\n\t"
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
@@ -4796,7 +4917,7 @@
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
             "movdq   $dst,$tmp2\t! add reduction8L" %}
   ins_encode %{
-    __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -4810,290 +4931,280 @@
 %}
 #endif
 
-instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "addss   $tmp,$src2\n\t"
-            "pshufd  $tmp2,$src2,0x01\n\t"
-            "addss   $tmp,$tmp2\n\t"
-            "movdqu  $dst,$tmp\t! add reduction2F" %}
-  ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "addss   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "addss   $dst,$tmp\t! add reduction2F" %}
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp2, TEMP tmp);
-  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vaddss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vaddss  $dst,$tmp2,$tmp\t! add reduction2F" %}
-  ins_encode %{
-    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddss  $dst,$dst,$tmp\t! add reduction2F" %}
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "addss   $tmp,$src2\n\t"
-            "pshufd  $tmp2,$src2,0x01\n\t"
-            "addss   $tmp,$tmp2\n\t"
-            "pshufd  $tmp2,$src2,0x02\n\t"
-            "addss   $tmp,$tmp2\n\t"
-            "pshufd  $tmp2,$src2,0x03\n\t"
-            "addss   $tmp,$tmp2\n\t"
-            "movdqu  $dst,$tmp\t! add reduction4F" %}
-  ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "addss   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "addss   $dst,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x02\n\t"
+            "addss   $dst,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x03\n\t"
+            "addss   $dst,$tmp\t! add reduction4F" %}
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+    __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+    __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "vaddss  $dst,dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vaddss  $dst,$tmp2,$tmp\t! add reduction4F" %}
-  ins_encode %{
-    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddss  $dst,$dst,$tmp\t! add reduction4F" %}
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
   predicate(UseAVX > 0);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vaddss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf128  $tmp3,$src2\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vaddss  $dst,$tmp2,$tmp\t! add reduction8F" %}
-  ins_encode %{
-    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "vextractf128  $tmp2,$src2\n\t"
+            "vaddss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vaddss  $dst,$dst,$tmp\t! add reduction8F" %}
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
   predicate(UseAVX > 2);
-  match(Set dst (AddReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vaddss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x1\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x2\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x3\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vaddss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vaddss  $dst,$tmp2,$tmp\t! add reduction16F" %}
-  ins_encode %{
-    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vaddss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vaddss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vaddss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vaddss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vaddss  $dst,$dst,$tmp\t! add reduction16F" %}
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (AddReductionVD src1 src2));
+  match(Set dst (AddReductionVD dst src2));
   effect(TEMP tmp, TEMP dst);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "addsd   $tmp,$src2\n\t"
-            "pshufd  $dst,$src2,0xE\n\t"
+  format %{ "addsd   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0xE\n\t"
             "addsd   $dst,$tmp\t! add reduction2D" %}
   ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ addsd($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
+instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (AddReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "vaddsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vaddsd  $dst,$tmp2,$tmp\t! add reduction2D" %}
-  ins_encode %{
-    __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddsd  $dst,$dst,$tmp\t! add reduction2D" %}
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
   predicate(UseAVX > 0);
-  match(Set dst (AddReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vaddsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf128  $tmp3,$src2\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vaddsd  $dst,$tmp2,$tmp\t! add reduction4D" %}
-  ins_encode %{
-    __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4h  $tmp2,$src2, 0x1\n\t"
+            "vaddsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vaddsd  $dst,$dst,$tmp\t! add reduction4D" %}
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
   predicate(UseAVX > 2);
-  match(Set dst (AddReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (AddReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vaddsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x1\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x2\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x3\n\t"
-            "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vaddsd  $dst,$tmp2,$tmp\t! add reduction8D" %}
-  ins_encode %{
-    __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vaddsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vaddsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vaddsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vaddsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vaddsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vaddsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vaddsd  $dst,$dst,$tmp\t! add reduction8D" %}
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
-    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -5216,7 +5327,7 @@
   predicate(UseAVX > 2);
   match(Set dst (MulReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vextracti64x4  $tmp3,$src2\n\t"
+  format %{ "vextracti64x4  $tmp3,$src2,0x1\n\t"
             "vpmulld  $tmp3,$tmp3,$src2\n\t"
             "vextracti128   $tmp,$tmp3\n\t"
             "vpmulld  $tmp,$tmp,$src2\n\t"
@@ -5228,7 +5339,7 @@
             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
             "movd     $dst,$tmp2\t! mul reduction16I" %}
   ins_encode %{
-    __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
     __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
@@ -5267,7 +5378,7 @@
   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
   match(Set dst (MulReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
+  format %{ "vextracti128  $tmp,$src2\n\t"
             "vpmullq  $tmp2,$tmp,$src2\n\t"
             "pshufd   $tmp,$tmp2,0xE\n\t"
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
@@ -5275,7 +5386,7 @@
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
             "movdq    $dst,$tmp2\t! mul reduction4L" %}
   ins_encode %{
-    __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5290,7 +5401,7 @@
   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
   match(Set dst (MulReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti64x4  $tmp2,$src2\n\t"
+  format %{ "vextracti64x4  $tmp2,$src2,0x1\n\t"
             "vpmullq  $tmp2,$tmp2,$src2\n\t"
             "vextracti128   $tmp,$tmp2\n\t"
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
@@ -5300,7 +5411,7 @@
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
             "movdq    $dst,$tmp2\t! mul reduction8L" %}
   ins_encode %{
-    __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5314,290 +5425,280 @@
 %}
 #endif
 
-instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "mulss   $tmp,$src2\n\t"
-            "pshufd  $tmp2,$src2,0x01\n\t"
-            "mulss   $tmp,$tmp2\n\t"
-            "movdqu  $dst,$tmp\t! mul reduction2F" %}
-  ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "mulss   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "mulss   $dst,$tmp\t! mul reduction2F" %}
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "vmulss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vmulss  $dst,$tmp2,$tmp\t! mul reduction2F" %}
-  ins_encode %{
-    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulss  $dst,$dst,$tmp\t! mul reduction2F" %}
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "mulss   $tmp,$src2\n\t"
-            "pshufd  $tmp2,$src2,0x01\n\t"
-            "mulss   $tmp,$tmp2\n\t"
-            "pshufd  $tmp2,$src2,0x02\n\t"
-            "mulss   $tmp,$tmp2\n\t"
-            "pshufd  $tmp2,$src2,0x03\n\t"
-            "mulss   $tmp,$tmp2\n\t"
-            "movdqu  $dst,$tmp\t! mul reduction4F" %}
-  ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "mulss   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "mulss   $dst,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x02\n\t"
+            "mulss   $dst,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x03\n\t"
+            "mulss   $dst,$tmp\t! mul reduction4F" %}
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+    __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+    __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "vmulss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vmulss  $dst,$tmp2,$tmp\t! mul reduction4F" %}
-  ins_encode %{
-    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulss  $dst,$dst,$tmp\t! mul reduction4F" %}
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
   predicate(UseAVX > 0);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vmulss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf128  $tmp3,$src2\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vmulss  $dst,$tmp2,$tmp\t! mul reduction8F" %}
-  ins_encode %{
-    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "vextractf128  $tmp2,$src2\n\t"
+            "vmulss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vmulss  $dst,$dst,$tmp\t! mul reduction8F" %}
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
   predicate(UseAVX > 2);
-  match(Set dst (MulReductionVF src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVF dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vmulss  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf32x4  $tmp3,$src2, 0x1\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf32x4  $tmp3,$src2, 0x2\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf32x4  $tmp3,$src2, 0x3\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0x01\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x02\n\t"
-            "vmulss  $tmp2,$tmp2,$tmp\n\t"
-            "pshufd  $tmp,$tmp3,0x03\n\t"
-            "vmulss  $dst,$tmp2,$tmp\t! mul reduction16F" %}
-  ins_encode %{
-    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vmulss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vmulss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vmulss  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0x01\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x02\n\t"
+            "vmulss  $dst,$dst,$tmp\n\t"
+            "pshufd  $tmp,$tmp2,0x03\n\t"
+            "vmulss  $dst,$dst,$tmp\t! mul reduction16F" %}
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
-    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
-    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
+    __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
-  match(Set dst (MulReductionVD src1 src2));
-  effect(TEMP tmp, TEMP dst);
-  format %{ "movdqu  $tmp,$src1\n\t"
-            "mulsd   $tmp,$src2\n\t"
-            "pshufd  $dst,$src2,0xE\n\t"
+  match(Set dst (MulReductionVD dst src2));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "mulsd   $dst,$src2\n\t"
+            "pshufd  $tmp,$src2,0xE\n\t"
             "mulsd   $dst,$tmp\t! mul reduction2D" %}
   ins_encode %{
-    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
-    __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
-    __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ mulsd($dst$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
+instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
   predicate(UseAVX > 0);
-  match(Set dst (MulReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2);
-  format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "vmulsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vmulsd  $dst,$tmp2,$tmp\t! mul reduction2D" %}
-  ins_encode %{
-    __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulsd  $dst,$dst,$tmp\t! mul reduction2D" %}
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
   predicate(UseAVX > 0);
-  match(Set dst (MulReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vmulsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf128  $tmp3,$src2\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vmulsd  $dst,$tmp2,$tmp\t! mul reduction4D" %}
-  ins_encode %{
-    __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulsd  $dst,$dst,$tmp\n\t"
+            "vextractf128  $tmp2,$src2\n\t"
+            "vmulsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vmulsd  $dst,$dst,$tmp\t! mul reduction4D" %}
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
   predicate(UseAVX > 2);
-  match(Set dst (MulReductionVD src1 src2));
-  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
+  match(Set dst (MulReductionVD dst src2));
+  effect(TEMP tmp, TEMP dst, TEMP tmp2);
+  format %{ "vmulsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x1\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
+            "vmulsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vmulsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x2\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp\n\t"
-            "vextractf64x2  $tmp3,$src2, 0x3\n\t"
-            "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
-            "pshufd  $tmp,$tmp3,0xE\n\t"
-            "vmulsd  $dst,$tmp2,$tmp\t! mul reduction8D" %}
-  ins_encode %{
-    __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+            "vmulsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vmulsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vmulsd  $dst,$dst,$tmp\n\t"
+            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vmulsd  $dst,$dst,$tmp2\n\t"
+            "pshufd  $tmp,$tmp2,0xE\n\t"
+            "vmulsd  $dst,$dst,$tmp\t! mul reduction8D" %}
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
-    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
-    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
-    __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+    __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -5608,7 +5709,7 @@
 
 // Bytes vector add
 instruct vadd4B(vecS dst, vecS src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVB dst src));
   format %{ "paddb   $dst,$src\t! add packed4B" %}
   ins_encode %{
@@ -5617,8 +5718,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
   ins_encode %{
@@ -5628,8 +5740,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddb  $dst,$dst,$src2\t! add packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
   ins_encode %{
@@ -5639,8 +5763,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd8B(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (AddVB dst src));
   format %{ "paddb   $dst,$src\t! add packed8B" %}
   ins_encode %{
@@ -5649,8 +5796,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
   ins_encode %{
@@ -5660,8 +5818,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddb  $dst,$dst,$src2\t! add packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
   ins_encode %{
@@ -5671,8 +5841,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd16B(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 16);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
   match(Set dst (AddVB dst src));
   format %{ "paddb   $dst,$src\t! add packed16B" %}
   ins_encode %{
@@ -5681,8 +5874,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
   ins_encode %{
@@ -5692,8 +5896,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddb  $dst,$dst,$src2\t! add packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
   ins_encode %{
@@ -5703,8 +5930,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
   ins_encode %{
@@ -5714,8 +5964,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddb  $dst,$dst,$src2\t! add packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
   ins_encode %{
@@ -5725,8 +5987,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed64B" %}
   ins_encode %{
@@ -5737,7 +6022,7 @@
 %}
 
 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed64B" %}
   ins_encode %{
@@ -5749,7 +6034,7 @@
 
 // Shorts/Chars vector add
 instruct vadd2S(vecS dst, vecS src) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (AddVS dst src));
   format %{ "paddw   $dst,$src\t! add packed2S" %}
   ins_encode %{
@@ -5758,8 +6043,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
   ins_encode %{
@@ -5769,8 +6065,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddw  $dst,$dst,$src2\t! add packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
   ins_encode %{
@@ -5780,8 +6088,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd4S(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVS dst src));
   format %{ "paddw   $dst,$src\t! add packed4S" %}
   ins_encode %{
@@ -5790,8 +6121,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
   ins_encode %{
@@ -5801,8 +6143,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddw  $dst,$dst,$src2\t! add packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
   ins_encode %{
@@ -5812,8 +6166,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd8S(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (AddVS dst src));
   format %{ "paddw   $dst,$src\t! add packed8S" %}
   ins_encode %{
@@ -5822,8 +6199,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
   ins_encode %{
@@ -5833,8 +6221,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddw  $dst,$dst,$src2\t! add packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
   ins_encode %{
@@ -5844,8 +6255,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
   ins_encode %{
@@ -5855,8 +6289,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpaddw  $dst,$dst,$src2\t! add packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
   ins_encode %{
@@ -5866,8 +6312,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed32S" %}
   ins_encode %{
@@ -5878,7 +6347,7 @@
 %}
 
 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed32S" %}
   ins_encode %{
@@ -6264,7 +6733,7 @@
 
 // Bytes vector sub
 instruct vsub4B(vecS dst, vecS src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (SubVB dst src));
   format %{ "psubb   $dst,$src\t! sub packed4B" %}
   ins_encode %{
@@ -6273,8 +6742,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
   ins_encode %{
@@ -6284,8 +6764,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (SubVB src (LoadVector mem)));
   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
   ins_encode %{
@@ -6295,8 +6787,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub8B(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (SubVB dst src));
   format %{ "psubb   $dst,$src\t! sub packed8B" %}
   ins_encode %{
@@ -6305,8 +6820,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
   ins_encode %{
@@ -6316,8 +6842,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
   match(Set dst (SubVB src (LoadVector mem)));
   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
   ins_encode %{
@@ -6327,8 +6865,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub16B(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 16);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
   match(Set dst (SubVB dst src));
   format %{ "psubb   $dst,$src\t! sub packed16B" %}
   ins_encode %{
@@ -6337,8 +6898,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
   ins_encode %{
@@ -6348,8 +6920,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (SubVB src (LoadVector mem)));
   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
   ins_encode %{
@@ -6359,8 +6954,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
   ins_encode %{
@@ -6370,8 +6988,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
   match(Set dst (SubVB src (LoadVector mem)));
   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
   ins_encode %{
@@ -6381,8 +7011,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed64B" %}
   ins_encode %{
@@ -6393,7 +7046,7 @@
 %}
 
 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
   match(Set dst (SubVB src (LoadVector mem)));
   format %{ "vpsubb  $dst,$src,$mem\t! sub packed64B" %}
   ins_encode %{
@@ -6405,7 +7058,7 @@
 
 // Shorts/Chars vector sub
 instruct vsub2S(vecS dst, vecS src) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (SubVS dst src));
   format %{ "psubw   $dst,$src\t! sub packed2S" %}
   ins_encode %{
@@ -6414,8 +7067,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (SubVS src1 src2));
   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
   ins_encode %{
@@ -6425,8 +7089,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (SubVS src (LoadVector mem)));
   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
   ins_encode %{
@@ -6436,8 +7112,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub4S(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (SubVS dst src));
   format %{ "psubw   $dst,$src\t! sub packed4S" %}
   ins_encode %{
@@ -6446,8 +7145,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (SubVS src1 src2));
   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
   ins_encode %{
@@ -6457,8 +7167,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (SubVS src (LoadVector mem)));
   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
   ins_encode %{
@@ -6468,8 +7190,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub8S(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (SubVS dst src));
   format %{ "psubw   $dst,$src\t! sub packed8S" %}
   ins_encode %{
@@ -6478,8 +7223,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (SubVS src1 src2));
   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
   ins_encode %{
@@ -6489,8 +7245,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (SubVS src (LoadVector mem)));
   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
   ins_encode %{
@@ -6500,8 +7279,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (SubVS src1 src2));
   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
   ins_encode %{
@@ -6511,8 +7313,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
   match(Set dst (SubVS src (LoadVector mem)));
   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
   ins_encode %{
@@ -6522,8 +7336,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS dst (LoadVector mem)));
+   effect(TEMP src);
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (SubVS src1 src2));
   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed32S" %}
   ins_encode %{
@@ -6534,7 +7371,7 @@
 %}
 
 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (SubVS src (LoadVector mem)));
   format %{ "vpsubw  $dst,$src,$mem\t! sub packed32S" %}
   ins_encode %{
@@ -6920,7 +7757,7 @@
 
 // Shorts/Chars vector mul
 instruct vmul2S(vecS dst, vecS src) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (MulVS dst src));
   format %{ "pmullw $dst,$src\t! mul packed2S" %}
   ins_encode %{
@@ -6929,8 +7766,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (MulVS src1 src2));
   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
   ins_encode %{
@@ -6940,8 +7788,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (MulVS src (LoadVector mem)));
   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
   ins_encode %{
@@ -6951,8 +7811,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vmul4S(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (MulVS dst src));
   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
   ins_encode %{
@@ -6961,8 +7844,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (MulVS src1 src2));
   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
   ins_encode %{
@@ -6972,8 +7866,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (MulVS src (LoadVector mem)));
   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
   ins_encode %{
@@ -6983,8 +7889,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vmul8S(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (MulVS dst src));
   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
   ins_encode %{
@@ -6993,8 +7922,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (MulVS src1 src2));
   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
   ins_encode %{
@@ -7004,8 +7944,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (MulVS src (LoadVector mem)));
   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
   ins_encode %{
@@ -7015,8 +7978,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (MulVS src1 src2));
   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
   ins_encode %{
@@ -7026,8 +8012,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS dst src2));
+  effect(TEMP src1);
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
   match(Set dst (MulVS src (LoadVector mem)));
   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
   ins_encode %{
@@ -7037,8 +8035,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS dst (LoadVector mem)));
+  effect(TEMP src);
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (MulVS src1 src2));
   format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
   ins_encode %{
@@ -7049,7 +8070,7 @@
 %}
 
 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (MulVS src (LoadVector mem)));
   format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
   ins_encode %{
@@ -7711,7 +8732,7 @@
 
 // Shorts/Chars vector left shift
 instruct vsll2S(vecS dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
   ins_encode %{
@@ -7721,7 +8742,7 @@
 %}
 
 instruct vsll2S_imm(vecS dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
   ins_encode %{
@@ -7730,8 +8751,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
   ins_encode %{
@@ -7741,8 +8773,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
   ins_encode %{
@@ -7752,8 +8796,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsll4S(vecD dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
   ins_encode %{
@@ -7763,7 +8830,7 @@
 %}
 
 instruct vsll4S_imm(vecD dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
   ins_encode %{
@@ -7772,8 +8839,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
   ins_encode %{
@@ -7783,8 +8861,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
   ins_encode %{
@@ -7794,8 +8884,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsll8S(vecX dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
   ins_encode %{
@@ -7805,7 +8918,7 @@
 %}
 
 instruct vsll8S_imm(vecX dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
   ins_encode %{
@@ -7814,8 +8927,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
   ins_encode %{
@@ -7825,8 +8949,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
   ins_encode %{
@@ -7836,8 +8983,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
   ins_encode %{
@@ -7847,8 +9017,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
   ins_encode %{
@@ -7858,8 +9040,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
   ins_encode %{
@@ -7870,7 +9075,7 @@
 %}
 
 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
   ins_encode %{
@@ -8104,7 +9309,7 @@
 // unsigned values.
 
 instruct vsrl2S(vecS dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
   ins_encode %{
@@ -8114,7 +9319,7 @@
 %}
 
 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
   ins_encode %{
@@ -8123,8 +9328,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
   ins_encode %{
@@ -8134,8 +9350,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
   ins_encode %{
@@ -8145,8 +9373,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsrl4S(vecD dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
   ins_encode %{
@@ -8156,7 +9407,7 @@
 %}
 
 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
   ins_encode %{
@@ -8165,8 +9416,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
   ins_encode %{
@@ -8176,8 +9438,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
   ins_encode %{
@@ -8187,8 +9461,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsrl8S(vecX dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
   ins_encode %{
@@ -8198,7 +9495,7 @@
 %}
 
 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (URShiftVS dst shift));
   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
   ins_encode %{
@@ -8207,8 +9504,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
   ins_encode %{
@@ -8218,8 +9526,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
   ins_encode %{
@@ -8229,8 +9560,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
   ins_encode %{
@@ -8240,8 +9594,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
   ins_encode %{
@@ -8251,8 +9617,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
   ins_encode %{
@@ -8263,7 +9652,7 @@
 %}
 
 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (URShiftVS src shift));
   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
   ins_encode %{
@@ -8493,7 +9882,7 @@
 
 // Shorts/Chars vector arithmetic right shift
 instruct vsra2S(vecS dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
   ins_encode %{
@@ -8512,8 +9901,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
   ins_encode %{
@@ -8523,8 +9923,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
   ins_encode %{
@@ -8534,8 +9946,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsra4S(vecD dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
   ins_encode %{
@@ -8545,7 +9980,7 @@
 %}
 
 instruct vsra4S_imm(vecD dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
   ins_encode %{
@@ -8554,8 +9989,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
   ins_encode %{
@@ -8565,8 +10011,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
   ins_encode %{
@@ -8576,8 +10034,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsra8S(vecX dst, vecS shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
   ins_encode %{
@@ -8587,7 +10068,7 @@
 %}
 
 instruct vsra8S_imm(vecX dst, immI8 shift) %{
-  predicate(n->as_Vector()->length() == 8);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
   ins_encode %{
@@ -8596,8 +10077,19 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
   ins_encode %{
@@ -8607,8 +10099,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
-  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
   ins_encode %{
@@ -8618,8 +10133,31 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
   ins_encode %{
@@ -8629,8 +10167,20 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
   ins_encode %{
@@ -8640,8 +10190,31 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
+  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS dst shift));
+  effect(TEMP src);
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
   ins_encode %{
@@ -8652,7 +10225,7 @@
 %}
 
 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
-  predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
   ins_encode %{
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Fri Nov 13 13:31:48 2015 +0100
@@ -291,9 +291,7 @@
     size += 6; // fldcw
   }
   if (C->max_vector_size() > 16) {
-    if(UseAVX <= 2) {
-      size += 3; // vzeroupper
-    }
+    size += 3; // vzeroupper
   }
   return size;
 }
@@ -1915,7 +1913,7 @@
       if (stub == NULL) {
         ciEnv::current()->record_failure("CodeCache is full");
         return;
-      } 
+      }
     }
   %}
 
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Fri Nov 13 13:31:48 2015 +0100
@@ -536,11 +536,7 @@
 #define __ _masm.
 
 static int clear_avx_size() {
-  if(UseAVX > 2) {
-    return 0; // vzeroupper is ignored
-  } else {
-    return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
-  }
+  return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
 }
 
 // !!!!! Special hack to get all types of calls to specify the byte offset
@@ -871,7 +867,7 @@
       if (framesize > 0) {
         st->print("\n\t");
         st->print("addq    rbp, #%d", framesize);
-      }      
+      }
     }
   }
 
--- a/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -497,12 +497,15 @@
   //  1:  getfield
   //  2:    index
   //  3:    index
-  //  4:  ireturn/areturn
+  //  4:  ireturn/areturn/freturn/lreturn/dreturn
   // NB this is not raw bytecode: index is in machine order
   u1 *code = method->code_base();
   assert(code[0] == Bytecodes::_aload_0 &&
          code[1] == Bytecodes::_getfield &&
          (code[4] == Bytecodes::_ireturn ||
+          code[4] == Bytecodes::_freturn ||
+          code[4] == Bytecodes::_lreturn ||
+          code[4] == Bytecodes::_dreturn ||
           code[4] == Bytecodes::_areturn), "should do");
   u2 index = Bytes::get_native_u2(&code[2]);
 
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/CompilerToVM.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/CompilerToVM.java	Fri Nov 13 13:31:48 2015 +0100
@@ -32,6 +32,7 @@
 import jdk.vm.ci.code.InstalledCode;
 import jdk.vm.ci.code.InvalidInstalledCodeException;
 import jdk.vm.ci.code.TargetDescription;
+import jdk.vm.ci.common.JVMCIError;
 import jdk.vm.ci.hotspotvmconfig.HotSpotVMField;
 import jdk.vm.ci.inittimer.InitTimer;
 import jdk.vm.ci.meta.JavaType;
@@ -308,6 +309,8 @@
      *         {@link HotSpotVMConfig#codeInstallResultCodeTooLarge},
      *         {@link HotSpotVMConfig#codeInstallResultDependenciesFailed} or
      *         {@link HotSpotVMConfig#codeInstallResultDependenciesInvalid}.
+     * @throws JVMCIError if there is something wrong with the compiled code or the associated
+     *             metadata.
      */
     native int installCode(TargetDescription target, HotSpotCompiledCode compiledCode, InstalledCode code, HotSpotSpeculationLog speculationLog);
 
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Fri Nov 13 13:31:48 2015 +0100
@@ -1677,6 +1677,7 @@
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_caller_adjustment", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockCallerAdjustmentOffset;
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_number_of_frames", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockNumberOfFramesOffset;
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_total_frame_sizes", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockTotalFrameSizesOffset;
+    @HotSpotVMField(name = "Deoptimization::UnrollBlock::_unpack_kind", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockUnpackKindOffset;
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_frame_sizes", type = "intptr_t*", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockFrameSizesOffset;
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_frame_pcs", type = "address*", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockFramePcsOffset;
     @HotSpotVMField(name = "Deoptimization::UnrollBlock::_initial_info", type = "intptr_t", get = HotSpotVMField.Type.OFFSET) @Stable public int deoptimizationUnrollBlockInitialInfoOffset;
--- a/hotspot/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -66,12 +66,12 @@
   features = generic_v9_m;
 
   if (detect_niagara()) {
-    NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Detected Linux on Niagara");)
+    if (PrintMiscellaneous && Verbose) { tty->print_cr("Detected Linux on Niagara"); }
     features = niagara1_m | T_family_m;
   }
 
   if (detect_M_family()) {
-    NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Detected Linux on M family");)
+    if (PrintMiscellaneous && Verbose) { tty->print_cr("Detected Linux on M family"); }
     features = sun4v_m | generic_v9_m | M_family_m | T_family_m;
   }
 
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -707,12 +707,10 @@
     BlockBegin* block = bci2block()->at(bci);
     if (block != NULL && block == parent()->bci2block()->at(bci)) {
       BlockBegin* new_block = new BlockBegin(block->bci());
-#ifndef PRODUCT
       if (PrintInitialBlockList) {
         tty->print_cr("CFG: cloned block %d (bci %d) as block %d for jsr",
                       block->block_id(), block->bci(), new_block->block_id());
       }
-#endif
       // copy data from cloned blocked
       new_block->set_depth_first_number(block->depth_first_number());
       if (block->is_set(BlockBegin::parser_loop_header_flag)) new_block->set(BlockBegin::parser_loop_header_flag);
@@ -1438,7 +1436,9 @@
 
   bool need_mem_bar = false;
   if (method()->name() == ciSymbol::object_initializer_name() &&
-      (scope()->wrote_final() || (AlwaysSafeConstructors && scope()->wrote_fields()))) {
+      (scope()->wrote_final() || (AlwaysSafeConstructors && scope()->wrote_fields())
+                              || (support_IRIW_for_not_multiple_copy_atomic_cpu && scope()->wrote_volatile())
+     )){
     need_mem_bar = true;
   }
 
@@ -1554,6 +1554,9 @@
 
   if (code == Bytecodes::_putfield) {
     scope()->set_wrote_fields();
+    if (field->is_volatile()) {
+      scope()->set_wrote_volatile();
+    }
   }
 
   const int offset = !needs_patching ? field->offset() : -1;
@@ -3785,12 +3788,10 @@
     cont = new BlockBegin(next_bci());
     // low number so that continuation gets parsed as early as possible
     cont->set_depth_first_number(0);
-#ifndef PRODUCT
     if (PrintInitialBlockList) {
       tty->print_cr("CFG: created block %d (bci %d) as continuation for inline at bci %d",
                     cont->block_id(), cont->bci(), bci());
     }
-#endif
     continuation_existed = false;
   }
   // Record number of predecessors of continuation block before
--- a/hotspot/src/share/vm/c1/c1_IR.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/c1/c1_IR.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -143,6 +143,7 @@
   _monitor_pairing_ok = method->has_balanced_monitors();
   _wrote_final        = false;
   _wrote_fields       = false;
+  _wrote_volatile     = false;
   _start              = NULL;
 
   if (osr_bci == -1) {
--- a/hotspot/src/share/vm/c1/c1_IR.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/c1/c1_IR.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -151,6 +151,7 @@
   bool          _monitor_pairing_ok;             // the monitor pairing info
   bool          _wrote_final;                    // has written final field
   bool          _wrote_fields;                   // has written fields
+  bool          _wrote_volatile;                 // has written volatile field
   BlockBegin*   _start;                          // the start block, successsors are method entries
 
   BitMap        _requires_phi_function;          // bit is set if phi functions at loop headers are necessary for a local variable
@@ -187,7 +188,8 @@
   bool          wrote_final    () const          { return _wrote_final; }
   void          set_wrote_fields()               { _wrote_fields = true; }
   bool          wrote_fields    () const         { return _wrote_fields; }
-
+  void          set_wrote_volatile()             { _wrote_volatile = true; }
+  bool          wrote_volatile    () const       { return _wrote_volatile; }
 };
 
 
--- a/hotspot/src/share/vm/c1/c1_LIR.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -2004,7 +2004,7 @@
 
 // LIR_Op2
 void LIR_Op2::print_instr(outputStream* out) const {
-  if (code() == lir_cmove) {
+  if (code() == lir_cmove || code() == lir_cmp) {
     print_condition(out, condition());         out->print(" ");
   }
   in_opr1()->print(out);    out->print(" ");
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -1761,7 +1761,7 @@
     post_barrier(object.result(), value.result());
   }
 
-  if (is_volatile && os::is_MP()) {
+  if (!support_IRIW_for_not_multiple_copy_atomic_cpu && is_volatile && os::is_MP()) {
     __ membar();
   }
 }
@@ -1822,6 +1822,10 @@
     address = generate_address(object.result(), x->offset(), field_type);
   }
 
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu && is_volatile && os::is_MP()) {
+    __ membar();
+  }
+
   bool needs_atomic_access = is_volatile || AlwaysAtomicAccesses;
   if (needs_atomic_access && !needs_patching) {
     volatile_field_load(address, reg, info);
@@ -2238,6 +2242,10 @@
 
   LIR_Opr value = rlock_result(x, x->basic_type());
 
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu && x->is_volatile() && os::is_MP()) {
+    __ membar();
+  }
+
   get_Object_unsafe(value, src.result(), off.result(), type, x->is_volatile());
 
 #if INCLUDE_ALL_GCS
@@ -2395,7 +2403,7 @@
 
   if (x->is_volatile() && os::is_MP()) __ membar_release();
   put_Object_unsafe(src.result(), off.result(), data.result(), type, x->is_volatile());
-  if (x->is_volatile() && os::is_MP()) __ membar();
+  if (!support_IRIW_for_not_multiple_copy_atomic_cpu && x->is_volatile() && os::is_MP()) __ membar();
 }
 
 
--- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -6233,9 +6233,19 @@
             if (prev_branch->stub() == NULL) {
 
               LIR_Op2* prev_cmp = NULL;
+              // There might be a cmove inserted for profiling which depends on the same
+              // compare. If we change the condition of the respective compare, we have
+              // to take care of this cmove as well.
+              LIR_Op2* prev_cmove = NULL;
 
               for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
                 prev_op = instructions->at(j);
+                // check for the cmove
+                if (prev_op->code() == lir_cmove) {
+                  assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
+                  prev_cmove = (LIR_Op2*)prev_op;
+                  assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
+                }
                 if (prev_op->code() == lir_cmp) {
                   assert(prev_op->as_Op2() != NULL, "branch must be of type LIR_Op2");
                   prev_cmp = (LIR_Op2*)prev_op;
@@ -6252,6 +6262,13 @@
                 prev_branch->negate_cond();
                 prev_cmp->set_condition(prev_branch->cond());
                 instructions->truncate(instructions->length() - 1);
+                // if we do change the condition, we have to change the cmove as well
+                if (prev_cmove != NULL) {
+                  prev_cmove->set_condition(prev_branch->cond());
+                  LIR_Opr t = prev_cmove->in_opr1();
+                  prev_cmove->set_in_opr1(prev_cmove->in_opr2());
+                  prev_cmove->set_in_opr2(t);
+                }
               }
             }
           }
--- a/hotspot/src/share/vm/ci/ciMethod.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/ci/ciMethod.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -1262,6 +1262,8 @@
 bool ciMethod::is_vanilla_constructor() const {  FETCH_FLAG_FROM_VM(is_vanilla_constructor); }
 bool ciMethod::has_loops      () const {         FETCH_FLAG_FROM_VM(has_loops); }
 bool ciMethod::has_jsrs       () const {         FETCH_FLAG_FROM_VM(has_jsrs);  }
+bool ciMethod::is_getter      () const {         FETCH_FLAG_FROM_VM(is_getter); }
+bool ciMethod::is_setter      () const {         FETCH_FLAG_FROM_VM(is_setter); }
 bool ciMethod::is_accessor    () const {         FETCH_FLAG_FROM_VM(is_accessor); }
 bool ciMethod::is_initializer () const {         FETCH_FLAG_FROM_VM(is_initializer); }
 
--- a/hotspot/src/share/vm/ci/ciMethod.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/ci/ciMethod.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -311,6 +311,8 @@
   bool is_final_method() const                   { return is_final() || holder()->is_final(); }
   bool has_loops      () const;
   bool has_jsrs       () const;
+  bool is_getter      () const;
+  bool is_setter      () const;
   bool is_accessor    () const;
   bool is_initializer () const;
   bool can_be_statically_bound() const           { return _can_be_statically_bound; }
--- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -1588,6 +1588,7 @@
   _exceptions = NULL;
   _exc_klasses = NULL;
   _successors = NULL;
+  _predecessors = new (outer->arena()) GrowableArray<Block*>(outer->arena(), 1, 0, NULL);
   _state = new (outer->arena()) StateVector(outer);
   JsrSet* new_jsrs =
     new (outer->arena()) JsrSet(outer->arena(), jsrs->size());
@@ -1771,6 +1772,12 @@
         break;
       }
     }
+
+    // Set predecessor information
+    for (int i = 0; i < _successors->length(); i++) {
+      Block* block = _successors->at(i);
+      block->predecessors()->append(this);
+    }
   }
   return _successors;
 }
@@ -1813,7 +1820,9 @@
     } else {
       klass = handler->catch_klass();
     }
-    _exceptions->append(analyzer->block_at(bci, _jsrs));
+    Block* block = analyzer->block_at(bci, _jsrs);
+    _exceptions->append(block);
+    block->predecessors()->append(this);
     _exc_klasses->append(klass);
   }
 }
@@ -1909,6 +1918,18 @@
       st->cr();
     }
   }
+  if (_predecessors == NULL) {
+    st->print_cr("  No predecessor information");
+  } else {
+    int num_predecessors = _predecessors->length();
+    st->print_cr("  Predecessors : %d", num_predecessors);
+    for (int i = 0; i < num_predecessors; i++) {
+      Block* predecessor = _predecessors->at(i);
+      st->print("    ");
+      predecessor->print_value_on(st);
+      st->cr();
+    }
+  }
   if (_exceptions == NULL) {
     st->print_cr("  No exception information");
   } else {
@@ -2270,6 +2291,9 @@
   for (SuccIter iter(tail); !iter.done(); iter.next()) {
     if (iter.succ() == head) {
       iter.set_succ(clone);
+      // Update predecessor information
+      head->predecessors()->remove(tail);
+      clone->predecessors()->append(tail);
     }
   }
   flow_block(tail, temp_vector, temp_set);
@@ -2279,6 +2303,9 @@
     for (SuccIter iter(clone); !iter.done(); iter.next()) {
       if (iter.succ() == head) {
         iter.set_succ(clone);
+        // Update predecessor information
+        head->predecessors()->remove(clone);
+        clone->predecessors()->append(clone);
         break;
       }
     }
@@ -2884,6 +2911,69 @@
 }
 
 // ------------------------------------------------------------------
+// ciTypeFlow::is_dominated_by
+//
+// Determine if the instruction at bci is dominated by the instruction at dom_bci.
+bool ciTypeFlow::is_dominated_by(int bci, int dom_bci) {
+  assert(!method()->has_jsrs(), "jsrs are not supported");
+
+  ResourceMark rm;
+  JsrSet* jsrs = new ciTypeFlow::JsrSet(NULL);
+  int        index = _methodBlocks->block_containing(bci)->index();
+  int    dom_index = _methodBlocks->block_containing(dom_bci)->index();
+  Block*     block = get_block_for(index, jsrs, ciTypeFlow::no_create);
+  Block* dom_block = get_block_for(dom_index, jsrs, ciTypeFlow::no_create);
+
+  // Start block dominates all other blocks
+  if (start_block()->rpo() == dom_block->rpo()) {
+    return true;
+  }
+
+  // Dominated[i] is true if block i is dominated by dom_block
+  int num_blocks = _methodBlocks->num_blocks();
+  bool* dominated = NEW_RESOURCE_ARRAY(bool, num_blocks);
+  for (int i = 0; i < num_blocks; ++i) {
+    dominated[i] = true;
+  }
+  dominated[start_block()->rpo()] = false;
+
+  // Iterative dominator algorithm
+  bool changed = true;
+  while (changed) {
+    changed = false;
+    // Use reverse postorder iteration
+    for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) {
+      if (blk->is_start()) {
+        // Ignore start block
+        continue;
+      }
+      // The block is dominated if it is the dominating block
+      // itself or if all predecessors are dominated.
+      int index = blk->rpo();
+      bool dom = (index == dom_block->rpo());
+      if (!dom) {
+        // Check if all predecessors are dominated
+        dom = true;
+        for (int i = 0; i < blk->predecessors()->length(); ++i) {
+          Block* pred = blk->predecessors()->at(i);
+          if (!dominated[pred->rpo()]) {
+            dom = false;
+            break;
+          }
+        }
+      }
+      // Update dominator information
+      if (dominated[index] != dom) {
+        changed = true;
+        dominated[index] = dom;
+      }
+    }
+  }
+  // block dominated by dom_block?
+  return dominated[block->rpo()];
+}
+
+// ------------------------------------------------------------------
 // ciTypeFlow::record_failure()
 // The ciTypeFlow object keeps track of failure reasons separately from the ciEnv.
 // This is required because there is not a 1-1 relation between the ciEnv and
--- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -529,6 +529,7 @@
     GrowableArray<Block*>*           _exceptions;
     GrowableArray<ciInstanceKlass*>* _exc_klasses;
     GrowableArray<Block*>*           _successors;
+    GrowableArray<Block*>*           _predecessors;
     StateVector*                     _state;
     JsrSet*                          _jsrs;
 
@@ -617,6 +618,12 @@
       return _successors;
     }
 
+    // Predecessors of this block (including exception edges)
+    GrowableArray<Block*>* predecessors() {
+      assert(_predecessors != NULL, "must be filled in");
+      return _predecessors;
+    }
+
     // Get the exceptional successors for this Block.
     GrowableArray<Block*>* exceptions() {
       if (_exceptions == NULL) {
@@ -941,6 +948,9 @@
   // Perform type inference flow analysis.
   void do_flow();
 
+  // Determine if bci is dominated by dom_bci
+  bool is_dominated_by(int bci, int dom_bci);
+
   void print_on(outputStream* st) const PRODUCT_RETURN;
 
   void rpo_print_on(outputStream* st) const PRODUCT_RETURN;
--- a/hotspot/src/share/vm/code/codeCache.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/code/codeCache.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -133,13 +133,9 @@
 
 address CodeCache::_low_bound = 0;
 address CodeCache::_high_bound = 0;
-int CodeCache::_number_of_blobs = 0;
-int CodeCache::_number_of_adapters = 0;
-int CodeCache::_number_of_nmethods = 0;
 int CodeCache::_number_of_nmethods_with_dependencies = 0;
 bool CodeCache::_needs_cache_clean = false;
 nmethod* CodeCache::_scavenge_root_nmethods = NULL;
-int CodeCache::_codemem_full_count = 0;
 
 // Initialize array of CodeHeaps
 GrowableArray<CodeHeap*>* CodeCache::_heaps = new(ResourceObj::C_HEAP, mtCode) GrowableArray<CodeHeap*> (CodeBlobType::All, true);
@@ -420,42 +416,41 @@
     }
   }
   print_trace("allocation", cb, size);
-  _number_of_blobs++;
   return cb;
 }
 
 void CodeCache::free(CodeBlob* cb) {
   assert_locked_or_safepoint(CodeCache_lock);
-
+  CodeHeap* heap = get_code_heap(cb);
   print_trace("free", cb);
   if (cb->is_nmethod()) {
-    _number_of_nmethods--;
+    heap->set_nmethod_count(heap->nmethod_count() - 1);
     if (((nmethod *)cb)->has_dependencies()) {
       _number_of_nmethods_with_dependencies--;
     }
   }
   if (cb->is_adapter_blob()) {
-    _number_of_adapters--;
+    heap->set_adapter_count(heap->adapter_count() - 1);
   }
-  _number_of_blobs--;
 
   // Get heap for given CodeBlob and deallocate
   get_code_heap(cb)->deallocate(cb);
 
-  assert(_number_of_blobs >= 0, "sanity check");
+  assert(heap->blob_count() >= 0, "sanity check");
 }
 
 void CodeCache::commit(CodeBlob* cb) {
   // this is called by nmethod::nmethod, which must already own CodeCache_lock
   assert_locked_or_safepoint(CodeCache_lock);
+  CodeHeap* heap = get_code_heap(cb);
   if (cb->is_nmethod()) {
-    _number_of_nmethods++;
+    heap->set_nmethod_count(heap->nmethod_count() + 1);
     if (((nmethod *)cb)->has_dependencies()) {
       _number_of_nmethods_with_dependencies++;
     }
   }
   if (cb->is_adapter_blob()) {
-    _number_of_adapters++;
+    heap->set_adapter_count(heap->adapter_count() + 1);
   }
 
   // flush the hardware I-cache
@@ -577,11 +572,9 @@
     assert(cur->on_scavenge_root_list(), "else shouldn't be on this list");
 
     bool is_live = (!cur->is_zombie() && !cur->is_unloaded());
-#ifndef PRODUCT
     if (TraceScavenge) {
       cur->print_on(tty, is_live ? "scavenge root" : "dead scavenge root"); tty->cr();
     }
-#endif //PRODUCT
     if (is_live) {
       // Perform cur->oops_do(f), maybe just once per nmethod.
       f->do_code_blob(cur);
@@ -774,6 +767,55 @@
   }
 }
 
+int CodeCache::blob_count(int code_blob_type) {
+  CodeHeap* heap = get_code_heap(code_blob_type);
+  return (heap != NULL) ? heap->blob_count() : 0;
+}
+
+int CodeCache::blob_count() {
+  int count = 0;
+  FOR_ALL_HEAPS(heap) {
+    count += (*heap)->blob_count();
+  }
+  return count;
+}
+
+int CodeCache::nmethod_count(int code_blob_type) {
+  CodeHeap* heap = get_code_heap(code_blob_type);
+  return (heap != NULL) ? heap->nmethod_count() : 0;
+}
+
+int CodeCache::nmethod_count() {
+  int count = 0;
+  FOR_ALL_HEAPS(heap) {
+    count += (*heap)->nmethod_count();
+  }
+  return count;
+}
+
+int CodeCache::adapter_count(int code_blob_type) {
+  CodeHeap* heap = get_code_heap(code_blob_type);
+  return (heap != NULL) ? heap->adapter_count() : 0;
+}
+
+int CodeCache::adapter_count() {
+  int count = 0;
+  FOR_ALL_HEAPS(heap) {
+    count += (*heap)->adapter_count();
+  }
+  return count;
+}
+
+address CodeCache::low_bound(int code_blob_type) {
+  CodeHeap* heap = get_code_heap(code_blob_type);
+  return (heap != NULL) ? (address)heap->low_boundary() : NULL;
+}
+
+address CodeCache::high_bound(int code_blob_type) {
+  CodeHeap* heap = get_code_heap(code_blob_type);
+  return (heap != NULL) ? (address)heap->high_boundary() : NULL;
+}
+
 size_t CodeCache::capacity() {
   size_t cap = 0;
   FOR_ALL_HEAPS(heap) {
@@ -863,6 +905,9 @@
     initialize_heaps();
   } else {
     // Use a single code heap
+    FLAG_SET_ERGO(uintx, NonNMethodCodeHeapSize, 0);
+    FLAG_SET_ERGO(uintx, ProfiledCodeHeapSize, 0);
+    FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, 0);
     ReservedCodeSpace rs = reserve_heap_memory(ReservedCodeCacheSize);
     add_heap(rs, "CodeCache", CodeBlobType::All);
   }
@@ -1104,9 +1149,8 @@
   CodeHeap* heap = get_code_heap(code_blob_type);
   assert(heap != NULL, "heap is null");
 
-  if (!heap->was_full() || print) {
+  if ((heap->full_count() == 0) || print) {
     // Not yet reported for this heap, report
-    heap->report_full();
     if (SegmentedCodeCache) {
       warning("%s is full. Compiler has been disabled.", get_code_heap_name(code_blob_type));
       warning("Try increasing the code heap size using -XX:%s=", get_code_heap_flag_name(code_blob_type));
@@ -1125,18 +1169,19 @@
     tty->print("%s", s.as_string());
   }
 
-  _codemem_full_count++;
+  heap->report_full();
+
   EventCodeCacheFull event;
   if (event.should_commit()) {
     event.set_codeBlobType((u1)code_blob_type);
     event.set_startAddress((u8)heap->low_boundary());
     event.set_commitedTopAddress((u8)heap->high());
     event.set_reservedTopAddress((u8)heap->high_boundary());
-    event.set_entryCount(nof_blobs());
-    event.set_methodCount(nof_nmethods());
-    event.set_adaptorCount(nof_adapters());
+    event.set_entryCount(heap->blob_count());
+    event.set_methodCount(heap->nmethod_count());
+    event.set_adaptorCount(heap->adapter_count());
     event.set_unallocatedCapacity(heap->unallocated_capacity()/K);
-    event.set_fullCount(_codemem_full_count);
+    event.set_fullCount(heap->full_count());
     event.commit();
   }
 }
@@ -1360,7 +1405,7 @@
   if (detailed) {
     st->print_cr(" total_blobs=" UINT32_FORMAT " nmethods=" UINT32_FORMAT
                        " adapters=" UINT32_FORMAT,
-                       nof_blobs(), nof_nmethods(), nof_adapters());
+                       blob_count(), nmethod_count(), adapter_count());
     st->print_cr(" compilation: %s", CompileBroker::should_compile_new_jobs() ?
                  "enabled" : Arguments::mode() == Arguments::_int ?
                  "disabled (interpreter mode)" :
@@ -1392,6 +1437,6 @@
 void CodeCache::log_state(outputStream* st) {
   st->print(" total_blobs='" UINT32_FORMAT "' nmethods='" UINT32_FORMAT "'"
             " adapters='" UINT32_FORMAT "' free_code_cache='" SIZE_FORMAT "'",
-            nof_blobs(), nof_nmethods(), nof_adapters(),
+            blob_count(), nmethod_count(), adapter_count(),
             unallocated_capacity());
 }
--- a/hotspot/src/share/vm/code/codeCache.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/code/codeCache.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -85,13 +85,9 @@
 
   static address _low_bound;                            // Lower bound of CodeHeap addresses
   static address _high_bound;                           // Upper bound of CodeHeap addresses
-  static int _number_of_blobs;                          // Total number of CodeBlobs in the cache
-  static int _number_of_adapters;                       // Total number of Adapters in the cache
-  static int _number_of_nmethods;                       // Total number of nmethods in the cache
   static int _number_of_nmethods_with_dependencies;     // Total number of nmethods with dependencies
   static bool _needs_cache_clean;                       // True if inline caches of the nmethods needs to be flushed
   static nmethod* _scavenge_root_nmethods;              // linked via nm->scavenge_root_link()
-  static int _codemem_full_count;                       // Number of times a CodeHeap in the cache was full
 
   static void mark_scavenge_root_nmethods() PRODUCT_RETURN;
   static void verify_perm_nmethods(CodeBlobClosure* f_or_null) PRODUCT_RETURN;
@@ -104,7 +100,6 @@
   static CodeHeap* get_code_heap(int code_blob_type);         // Returns the CodeHeap for the given CodeBlobType
   // Returns the name of the VM option to set the size of the corresponding CodeHeap
   static const char* get_code_heap_flag_name(int code_blob_type);
-  static bool heap_available(int code_blob_type);             // Returns true if an own CodeHeap for the given CodeBlobType is available
   static size_t heap_alignment();                             // Returns the alignment of the CodeHeaps in bytes
   static ReservedCodeSpace reserve_heap_memory(size_t size);  // Reserves one continuous chunk of memory for the CodeHeaps
 
@@ -139,9 +134,12 @@
   static CodeBlob* find_blob_unsafe(void* start);       // Same as find_blob but does not fail if looking up a zombie method
   static nmethod*  find_nmethod(void* start);           // Returns the nmethod containing the given address
 
-  static int       nof_blobs()      { return _number_of_blobs; }      // Returns the total number of CodeBlobs in the cache
-  static int       nof_adapters()   { return _number_of_adapters; }   // Returns the total number of Adapters in the cache
-  static int       nof_nmethods()   { return _number_of_nmethods; }   // Returns the total number of nmethods in the cache
+  static int       blob_count();                        // Returns the total number of CodeBlobs in the cache
+  static int       blob_count(int code_blob_type);
+  static int       adapter_count();                     // Returns the total number of Adapters in the cache
+  static int       adapter_count(int code_blob_type);
+  static int       nmethod_count();                     // Returns the total number of nmethods in the cache
+  static int       nmethod_count(int code_blob_type);
 
   // GC support
   static void gc_epilogue();
@@ -177,7 +175,9 @@
 
   // The full limits of the codeCache
   static address low_bound()                          { return _low_bound; }
+  static address low_bound(int code_blob_type);
   static address high_bound()                         { return _high_bound; }
+  static address high_bound(int code_blob_type);
 
   // Profiling
   static size_t capacity();
@@ -191,6 +191,9 @@
   static void set_needs_cache_clean(bool v)           { _needs_cache_clean = v;    }
   static void clear_inline_caches();                  // clear all inline caches
 
+  // Returns true if an own CodeHeap for the given CodeBlobType is available
+  static bool heap_available(int code_blob_type);
+
   // Returns the CodeBlobType for the given nmethod
   static int get_code_blob_type(nmethod* nm) {
     return get_code_heap(nm)->code_blob_type();
@@ -239,7 +242,10 @@
   // tells how many nmethods have dependencies
   static int number_of_nmethods_with_dependencies();
 
-  static int get_codemem_full_count() { return _codemem_full_count; }
+  static int get_codemem_full_count(int code_blob_type) {
+    CodeHeap* heap = get_code_heap(code_blob_type);
+    return (heap != NULL) ? heap->full_count() : 0;
+  }
 };
 
 
--- a/hotspot/src/share/vm/code/nmethod.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/code/nmethod.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -1539,7 +1539,7 @@
   if (PrintMethodFlushing) {
     tty->print_cr("*flushing nmethod %3d/" INTPTR_FORMAT ". Live blobs:" UINT32_FORMAT
                   "/Free CodeCache:" SIZE_FORMAT "Kb",
-                  _compile_id, p2i(this), CodeCache::nof_blobs(),
+                  _compile_id, p2i(this), CodeCache::blob_count(),
                   CodeCache::unallocated_capacity(CodeCache::get_code_blob_type(this))/1024);
   }
 
@@ -1819,9 +1819,7 @@
   if (_jvmci_installed_code != NULL) {
     if (_jvmci_installed_code->is_a(HotSpotNmethod::klass()) && HotSpotNmethod::isDefault(_jvmci_installed_code)) {
       if (!is_alive->do_object_b(_jvmci_installed_code)) {
-        bs->write_ref_nmethod_pre(&_jvmci_installed_code, this);
-        _jvmci_installed_code = NULL;
-        bs->write_ref_nmethod_post(&_jvmci_installed_code, this);
+        clear_jvmci_installed_code();
       }
     } else {
       if (can_unload(is_alive, (oop*)&_jvmci_installed_code, unloading_occurred)) {
@@ -1922,27 +1920,6 @@
     unloading_occurred = true;
   }
 
-#if INCLUDE_JVMCI
-  // Follow JVMCI method
-  if (_jvmci_installed_code != NULL) {
-    if (_jvmci_installed_code->is_a(HotSpotNmethod::klass()) && HotSpotNmethod::isDefault(_jvmci_installed_code)) {
-      if (!is_alive->do_object_b(_jvmci_installed_code)) {
-        _jvmci_installed_code = NULL;
-      }
-    } else {
-      if (can_unload(is_alive, (oop*)&_jvmci_installed_code, unloading_occurred)) {
-        return false;
-      }
-    }
-  }
-
-  if (_speculation_log != NULL) {
-    if (!is_alive->do_object_b(_speculation_log)) {
-      _speculation_log = NULL;
-    }
-  }
-#endif
-
   // Exception cache
   clean_exception_cache(is_alive);
 
@@ -2006,9 +1983,7 @@
   if (_jvmci_installed_code != NULL) {
     if (_jvmci_installed_code->is_a(HotSpotNmethod::klass()) && HotSpotNmethod::isDefault(_jvmci_installed_code)) {
       if (!is_alive->do_object_b(_jvmci_installed_code)) {
-        bs->write_ref_nmethod_pre(&_jvmci_installed_code, this);
-        _jvmci_installed_code = NULL;
-        bs->write_ref_nmethod_post(&_jvmci_installed_code, this);
+        clear_jvmci_installed_code();
       }
     } else {
       if (can_unload(is_alive, (oop*)&_jvmci_installed_code, unloading_occurred)) {
@@ -2271,7 +2246,7 @@
           break;
       }
       // Mark was clear when we first saw this guy.
-      NOT_PRODUCT(if (TraceScavenge)  print_on(tty, "oops_do, mark"));
+      if (TraceScavenge) { print_on(tty, "oops_do, mark"); }
       return false;
     }
   }
@@ -2280,7 +2255,7 @@
 }
 
 void nmethod::oops_do_marking_prologue() {
-  NOT_PRODUCT(if (TraceScavenge)  tty->print_cr("[oops_do_marking_prologue"));
+  if (TraceScavenge) { tty->print_cr("[oops_do_marking_prologue"); }
   assert(_oops_do_mark_nmethods == NULL, "must not call oops_do_marking_prologue twice in a row");
   // We use cmpxchg_ptr instead of regular assignment here because the user
   // may fork a bunch of threads, and we need them all to see the same state.
@@ -2302,7 +2277,7 @@
   void* required = _oops_do_mark_nmethods;
   void* observed = Atomic::cmpxchg_ptr(NULL, &_oops_do_mark_nmethods, required);
   guarantee(observed == required, "no races in this sequential code");
-  NOT_PRODUCT(if (TraceScavenge)  tty->print_cr("oops_do_marking_epilogue]"));
+  if (TraceScavenge) { tty->print_cr("oops_do_marking_epilogue]"); }
 }
 
 class DetectScavengeRoot: public OopClosure {
@@ -3373,6 +3348,14 @@
 #endif // !PRODUCT
 
 #if INCLUDE_JVMCI
+void nmethod::clear_jvmci_installed_code() {
+  // This must be done carefully to maintain nmethod remembered sets properly
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  bs->write_ref_nmethod_pre(&_jvmci_installed_code, this);
+  _jvmci_installed_code = NULL;
+  bs->write_ref_nmethod_post(&_jvmci_installed_code, this);
+}
+
 void nmethod::maybe_invalidate_installed_code() {
   if (_jvmci_installed_code != NULL) {
      if (!is_alive()) {
@@ -3382,7 +3365,7 @@
        // might want to invalidate all existing activations.
        InstalledCode::set_address(_jvmci_installed_code, 0);
        InstalledCode::set_entryPoint(_jvmci_installed_code, 0);
-       _jvmci_installed_code = NULL;
+       clear_jvmci_installed_code();
      } else if (is_not_entrant()) {
        InstalledCode::set_entryPoint(_jvmci_installed_code, 0);
      }
--- a/hotspot/src/share/vm/code/nmethod.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/code/nmethod.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -602,7 +602,7 @@
 #if INCLUDE_JVMCI
   oop jvmci_installed_code() { return _jvmci_installed_code ; }
   char* jvmci_installed_code_name(char* buf, size_t buflen);
-  void set_jvmci_installed_code(oop installed_code) { _jvmci_installed_code = installed_code;  }
+  void clear_jvmci_installed_code();
   void maybe_invalidate_installed_code();
   oop speculation_log() { return _speculation_log ; }
   void set_speculation_log(oop speculation_log) { _speculation_log = speculation_log;  }
--- a/hotspot/src/share/vm/compiler/compileBroker.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/compiler/compileBroker.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -1716,7 +1716,8 @@
     EventCompilation event;
 
     JVMCIEnv env(task, system_dictionary_modification_counter);
-    jvmci->compile_method(target_handle, osr_bci, &env);
+    methodHandle method(thread, target_handle);
+    jvmci->compile_method(method, osr_bci, &env);
 
     post_compile(thread, task, event, task->code() != NULL, NULL);
   } else
--- a/hotspot/src/share/vm/compiler/compilerDirectives.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/compiler/compilerDirectives.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -67,7 +67,7 @@
     cflags(VectorizeDebug,          bool, false, VectorizeDebug) \
     cflags(CloneMapDebug,           bool, false, CloneMapDebug) \
     cflags(DoReserveCopyInSuperWordDebug, bool, false, DoReserveCopyInSuperWordDebug) \
-    NOT_PRODUCT( cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLevel)) \
+    cflags(IGVPrintLevel,           intx, PrintIdealGraphLevel, IGVPrintLevel) \
     cflags(MaxNodeLimit,            intx, MaxNodeLimit, MaxNodeLimit)
 #else
   #define compilerdirectives_c2_flags(cflags)
--- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -1148,7 +1148,6 @@
   }
   assert(new_obj != NULL, "just checking");
 
-#ifndef PRODUCT
   // This code must come after the CAS test, or it will print incorrect
   // information.
   if (TraceScavenge) {
@@ -1156,7 +1155,6 @@
        is_in_reserved(new_obj) ? "copying" : "tenuring",
        new_obj->klass()->internal_name(), p2i(old), p2i(new_obj), new_obj->size());
   }
-#endif
 
   if (forward_ptr == NULL) {
     oop obj_to_push = new_obj;
--- a/hotspot/src/share/vm/gc/cms/parOopClosures.inline.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/gc/cms/parOopClosures.inline.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -108,14 +108,11 @@
       if (m->is_marked()) { // Contains forwarding pointer.
         new_obj = ParNewGeneration::real_forwardee(obj);
         oopDesc::encode_store_heap_oop_not_null(p, new_obj);
-#ifndef PRODUCT
         if (TraceScavenge) {
           gclog_or_tty->print_cr("{%s %s ( " PTR_FORMAT " ) " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
              "forwarded ",
              new_obj->klass()->internal_name(), p2i(p), p2i((void *)obj), p2i((void *)new_obj), new_obj->size());
         }
-#endif
-
       } else {
         size_t obj_sz = obj->size_given_klass(objK);
         new_obj = _g->copy_to_survivor_space(_par_scan_state, obj, obj_sz, m);
--- a/hotspot/src/share/vm/gc/parallel/psPromotionManager.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/gc/parallel/psPromotionManager.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -430,7 +430,6 @@
     obj = obj->forwardee();
   }
 
-#ifndef PRODUCT
   if (TraceScavenge) {
     gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " (%d)}",
                            "promotion-failure",
@@ -438,7 +437,6 @@
                            p2i(obj), obj->size());
 
   }
-#endif
 
   return obj;
 }
--- a/hotspot/src/share/vm/gc/parallel/psPromotionManager.inline.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/gc/parallel/psPromotionManager.inline.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -260,7 +260,6 @@
     new_obj = o->forwardee();
   }
 
-#ifndef PRODUCT
   // This code must come after the CAS test, or it will print incorrect
   // information.
   if (TraceScavenge) {
@@ -268,7 +267,6 @@
        should_scavenge(&new_obj) ? "copying" : "tenuring",
        new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size());
   }
-#endif
 
   return new_obj;
 }
@@ -285,15 +283,13 @@
         ? o->forwardee()
         : copy_to_survivor_space<promote_immediately>(o);
 
-#ifndef PRODUCT
   // This code must come after the CAS test, or it will print incorrect
   // information.
-  if (TraceScavenge &&  o->is_forwarded()) {
+  if (TraceScavenge && o->is_forwarded()) {
     gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
        "forwarding",
        new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size());
   }
-#endif
 
   oopDesc::encode_store_heap_oop_not_null(p, new_obj);
 
--- a/hotspot/src/share/vm/gc/parallel/psScavenge.inline.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/gc/parallel/psScavenge.inline.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -138,7 +138,6 @@
     // If the klass has not been dirtied we know that there's
     // no references into  the young gen and we can skip it.
 
-#ifndef PRODUCT
     if (TraceScavenge) {
       ResourceMark rm;
       gclog_or_tty->print_cr("PSScavengeKlassClosure::do_klass " PTR_FORMAT ", %s, dirty: %s",
@@ -146,7 +145,6 @@
                              klass->external_name(),
                              klass->has_modified_oops() ? "true" : "false");
     }
-#endif
 
     if (klass->has_modified_oops()) {
       // Clean the klass since we're going to scavenge all the metadata.
--- a/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -134,7 +134,6 @@
 void FastScanClosure::do_oop(narrowOop* p) { FastScanClosure::do_oop_work(p); }
 
 void KlassScanClosure::do_klass(Klass* klass) {
-#ifndef PRODUCT
   if (TraceScavenge) {
     ResourceMark rm;
     gclog_or_tty->print_cr("KlassScanClosure::do_klass " PTR_FORMAT ", %s, dirty: %s",
@@ -142,7 +141,6 @@
                            klass->external_name(),
                            klass->has_modified_oops() ? "true" : "false");
   }
-#endif
 
   // If the klass has not been dirtied we know that there's
   // no references into  the young gen and we can skip it.
--- a/hotspot/src/share/vm/interpreter/interpreter.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/interpreter/interpreter.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -300,7 +300,10 @@
   }
 
   // Accessor method?
-  if (m->is_accessor()) {
+  if (m->is_getter()) {
+    // TODO: We should have used ::is_accessor above, but fast accessors in Zero expect only getters.
+    // See CppInterpreter::accessor_entry in cppInterpreter_zero.cpp. This should be fixed in Zero,
+    // then the call above updated to ::is_accessor
     assert(m->size_of_parameters() == 1, "fast code for accessors assumes parameter size = 1");
     return accessor;
   }
--- a/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -71,62 +71,97 @@
   return CompilerToVM::asMethod(hotspot_method);
 }
 
-VMReg getVMRegFromLocation(oop location, int total_frame_size) {
-  oop reg = code_Location::reg(location);
+VMReg getVMRegFromLocation(Handle location, int total_frame_size, TRAPS) {
+  if (location.is_null()) {
+    THROW_NULL(vmSymbols::java_lang_NullPointerException());
+  }
+
+  Handle reg = code_Location::reg(location);
   jint offset = code_Location::offset(location);
 
-  if (reg != NULL) {
+  if (reg.not_null()) {
     // register
     jint number = code_Register::number(reg);
-    VMReg vmReg = CodeInstaller::get_hotspot_reg(number);
-    assert(offset % 4 == 0, "must be aligned");
-    return vmReg->next(offset / 4);
+    VMReg vmReg = CodeInstaller::get_hotspot_reg(number, CHECK_NULL);
+    if (offset % 4 == 0) {
+      return vmReg->next(offset / 4);
+    } else {
+      JVMCI_ERROR_NULL("unaligned subregister offset %d in oop map", offset);
+    }
   } else {
     // stack slot
-    assert(offset % 4 == 0, "must be aligned");
-    return VMRegImpl::stack2reg(offset / 4);
+    if (offset % 4 == 0) {
+      return VMRegImpl::stack2reg(offset / 4);
+    } else {
+      JVMCI_ERROR_NULL("unaligned stack offset %d in oop map", offset);
+    }
   }
 }
 
 // creates a HotSpot oop map out of the byte arrays provided by DebugInfo
-OopMap* CodeInstaller::create_oop_map(oop debug_info) {
-  oop reference_map = DebugInfo::referenceMap(debug_info);
+OopMap* CodeInstaller::create_oop_map(Handle debug_info, TRAPS) {
+  Handle reference_map = DebugInfo::referenceMap(debug_info);
+  if (reference_map.is_null()) {
+    THROW_NULL(vmSymbols::java_lang_NullPointerException());
+  }
+  if (!reference_map->is_a(HotSpotReferenceMap::klass())) {
+    JVMCI_ERROR_NULL("unknown reference map: %s", reference_map->klass()->signature_name());
+  }
   if (HotSpotReferenceMap::maxRegisterSize(reference_map) > 16) {
     _has_wide_vector = true;
   }
   OopMap* map = new OopMap(_total_frame_size, _parameter_count);
-  objArrayOop objects = HotSpotReferenceMap::objects(reference_map);
-  objArrayOop derivedBase = HotSpotReferenceMap::derivedBase(reference_map);
-  typeArrayOop sizeInBytes = HotSpotReferenceMap::sizeInBytes(reference_map);
+  objArrayHandle objects = HotSpotReferenceMap::objects(reference_map);
+  objArrayHandle derivedBase = HotSpotReferenceMap::derivedBase(reference_map);
+  typeArrayHandle sizeInBytes = HotSpotReferenceMap::sizeInBytes(reference_map);
+  if (objects.is_null() || derivedBase.is_null() || sizeInBytes.is_null()) {
+    THROW_NULL(vmSymbols::java_lang_NullPointerException());
+  }
+  if (objects->length() != derivedBase->length() || objects->length() != sizeInBytes->length()) {
+    JVMCI_ERROR_NULL("arrays in reference map have different sizes: %d %d %d", objects->length(), derivedBase->length(), sizeInBytes->length());
+  }
   for (int i = 0; i < objects->length(); i++) {
-    oop location = objects->obj_at(i);
-    oop baseLocation = derivedBase->obj_at(i);
+    Handle location = objects->obj_at(i);
+    Handle baseLocation = derivedBase->obj_at(i);
     int bytes = sizeInBytes->int_at(i);
 
-    VMReg vmReg = getVMRegFromLocation(location, _total_frame_size);
-    if (baseLocation != NULL) {
+    VMReg vmReg = getVMRegFromLocation(location, _total_frame_size, CHECK_NULL);
+    if (baseLocation.not_null()) {
       // derived oop
-      assert(bytes == 8, "derived oop can't be compressed");
-      VMReg baseReg = getVMRegFromLocation(baseLocation, _total_frame_size);
-      map->set_derived_oop(vmReg, baseReg);
+#ifdef _LP64
+      if (bytes == 8) {
+#else
+      if (bytes == 4) {
+#endif
+        VMReg baseReg = getVMRegFromLocation(baseLocation, _total_frame_size, CHECK_NULL);
+        map->set_derived_oop(vmReg, baseReg);
+      } else {
+        JVMCI_ERROR_NULL("invalid derived oop size in ReferenceMap: %d", bytes);
+      }
+#ifdef _LP64
     } else if (bytes == 8) {
       // wide oop
       map->set_oop(vmReg);
-    } else {
+    } else if (bytes == 4) {
       // narrow oop
-      assert(bytes == 4, "wrong size");
       map->set_narrowoop(vmReg);
+#else
+    } else if (bytes == 4) {
+      map->set_oop(vmReg);
+#endif
+    } else {
+      JVMCI_ERROR_NULL("invalid oop size in ReferenceMap: %d", bytes);
     }
   }
 
-  oop callee_save_info = (oop) DebugInfo::calleeSaveInfo(debug_info);
-  if (callee_save_info != NULL) {
-    objArrayOop registers = RegisterSaveLayout::registers(callee_save_info);
-    typeArrayOop slots = RegisterSaveLayout::slots(callee_save_info);
+  Handle callee_save_info = (oop) DebugInfo::calleeSaveInfo(debug_info);
+  if (callee_save_info.not_null()) {
+    objArrayHandle registers = RegisterSaveLayout::registers(callee_save_info);
+    typeArrayHandle slots = RegisterSaveLayout::slots(callee_save_info);
     for (jint i = 0; i < slots->length(); i++) {
-      oop jvmci_reg = registers->obj_at(i);
+      Handle jvmci_reg = registers->obj_at(i);
       jint jvmci_reg_number = code_Register::number(jvmci_reg);
-      VMReg hotspot_reg = CodeInstaller::get_hotspot_reg(jvmci_reg_number);
+      VMReg hotspot_reg = CodeInstaller::get_hotspot_reg(jvmci_reg_number, CHECK_NULL);
       // HotSpot stack slots are 4 bytes
       jint jvmci_slot = slots->int_at(i);
       jint hotspot_slot = jvmci_slot * VMRegImpl::slots_per_word;
@@ -142,7 +177,7 @@
   return map;
 }
 
-Metadata* CodeInstaller::record_metadata_reference(Handle& constant) {
+Metadata* CodeInstaller::record_metadata_reference(Handle constant, TRAPS) {
   oop obj = HotSpotMetaspaceConstantImpl::metaspaceObject(constant);
   if (obj->is_a(HotSpotResolvedObjectTypeImpl::klass())) {
     Klass* klass = java_lang_Class::as_Klass(HotSpotResolvedObjectTypeImpl::javaClass(obj));
@@ -157,16 +192,18 @@
     TRACE_jvmci_3("metadata[%d of %d] = %s", index, _oop_recorder->metadata_count(), method->name()->as_C_string());
     return method;
   } else {
-    fatal("unexpected metadata reference for constant of type %s", obj->klass()->name()->as_C_string());
-    return NULL;
+    JVMCI_ERROR_NULL("unexpected metadata reference for constant of type %s", obj->klass()->signature_name());
   }
 }
 
 #ifdef _LP64
-narrowKlass CodeInstaller::record_narrow_metadata_reference(Handle& constant) {
+narrowKlass CodeInstaller::record_narrow_metadata_reference(Handle constant, TRAPS) {
   oop obj = HotSpotMetaspaceConstantImpl::metaspaceObject(constant);
   assert(HotSpotMetaspaceConstantImpl::compressed(constant), "unexpected uncompressed pointer");
-  assert(obj->is_a(HotSpotResolvedObjectTypeImpl::klass()), "unexpected compressed pointer of type %s", obj->klass()->name()->as_C_string());
+
+  if (!obj->is_a(HotSpotResolvedObjectTypeImpl::klass())) {
+    JVMCI_ERROR_0("unexpected compressed pointer of type %s", obj->klass()->signature_name());
+  }
 
   Klass* klass = java_lang_Class::as_Klass(HotSpotResolvedObjectTypeImpl::javaClass(obj));
   int index = _oop_recorder->find_index(klass);
@@ -175,9 +212,9 @@
 }
 #endif
 
-Location::Type CodeInstaller::get_oop_type(oop value) {
-  oop lirKind = Value::lirKind(value);
-  oop platformKind = LIRKind::platformKind(lirKind);
+Location::Type CodeInstaller::get_oop_type(Handle value) {
+  Handle lirKind = Value::lirKind(value);
+  Handle platformKind = LIRKind::platformKind(lirKind);
   assert(LIRKind::referenceMask(lirKind) == 1, "unexpected referenceMask");
 
   if (platformKind == word_kind()) {
@@ -187,24 +224,29 @@
   }
 }
 
-ScopeValue* CodeInstaller::get_scope_value(oop value, BasicType type, GrowableArray<ScopeValue*>* objects, ScopeValue* &second) {
+ScopeValue* CodeInstaller::get_scope_value(Handle value, BasicType type, GrowableArray<ScopeValue*>* objects, ScopeValue* &second, TRAPS) {
   second = NULL;
-  if (value == Value::ILLEGAL()) {
-    assert(type == T_ILLEGAL, "expected legal value");
+  if (value.is_null()) {
+    THROW_NULL(vmSymbols::java_lang_NullPointerException());
+  } else if (value == Value::ILLEGAL()) {
+    if (type != T_ILLEGAL) {
+      JVMCI_ERROR_NULL("unexpected illegal value, expected %s", basictype_to_str(type));
+    }
     return _illegal_value;
   } else if (value->is_a(RegisterValue::klass())) {
-    oop reg = RegisterValue::reg(value);
+    Handle reg = RegisterValue::reg(value);
     jint number = code_Register::number(reg);
-    VMReg hotspotRegister = get_hotspot_reg(number);
+    VMReg hotspotRegister = get_hotspot_reg(number, CHECK_NULL);
     if (is_general_purpose_reg(hotspotRegister)) {
       Location::Type locationType;
       if (type == T_OBJECT) {
         locationType = get_oop_type(value);
       } else if (type == T_LONG) {
         locationType = Location::lng;
+      } else if (type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN) {
+        locationType = Location::int_in_long;
       } else {
-        assert(type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN, "unexpected type in cpu register");
-        locationType = Location::int_in_long;
+        JVMCI_ERROR_NULL("unexpected type %s in cpu register", basictype_to_str(type));
       }
       ScopeValue* value = new LocationValue(Location::new_reg_loc(locationType, hotspotRegister));
       if (type == T_LONG) {
@@ -212,13 +254,14 @@
       }
       return value;
     } else {
-      assert(type == T_FLOAT || type == T_DOUBLE, "only float and double expected in xmm register");
       Location::Type locationType;
       if (type == T_FLOAT) {
         // this seems weird, but the same value is used in c1_LinearScan
         locationType = Location::normal;
+      } else if (type == T_DOUBLE) {
+        locationType = Location::dbl;
       } else {
-        locationType = Location::dbl;
+        JVMCI_ERROR_NULL("unexpected type %s in floating point register", basictype_to_str(type));
       }
       ScopeValue* value = new LocationValue(Location::new_reg_loc(locationType, hotspotRegister));
       if (type == T_DOUBLE) {
@@ -239,9 +282,10 @@
       locationType = Location::lng;
     } else if (type == T_DOUBLE) {
       locationType = Location::dbl;
+    } else if (type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN) {
+      locationType = Location::normal;
     } else {
-      assert(type == T_INT || type == T_FLOAT || type == T_SHORT || type == T_CHAR || type == T_BYTE || type == T_BOOLEAN, "unexpected type in stack slot");
-      locationType = Location::normal;
+      JVMCI_ERROR_NULL("unexpected type %s in stack slot", basictype_to_str(type));
     }
     ScopeValue* value = new LocationValue(Location::new_stk_loc(locationType, offset));
     if (type == T_DOUBLE || type == T_LONG) {
@@ -254,7 +298,10 @@
         jlong prim = PrimitiveConstant::primitive(value);
         return new ConstantLongValue(prim);
       } else {
-        assert(type == JVMCIRuntime::kindToBasicType(JavaKind::typeChar(PrimitiveConstant::kind(value))), "primitive constant type doesn't match");
+        BasicType constantType = JVMCIRuntime::kindToBasicType(PrimitiveConstant::kind(value), CHECK_NULL);
+        if (type != constantType) {
+          JVMCI_ERROR_NULL("primitive constant type doesn't match, expected %s but got %s", basictype_to_str(type), basictype_to_str(constantType));
+        }
         if (type == T_INT || type == T_FLOAT) {
           jint prim = (jint)PrimitiveConstant::primitive(value);
           switch (prim) {
@@ -264,53 +311,63 @@
             case  2: return _int_2_scope_value;
             default: return new ConstantIntValue(prim);
           }
-        } else {
-          assert(type == T_LONG || type == T_DOUBLE, "unexpected primitive constant type");
+        } else if (type == T_LONG || type == T_DOUBLE) {
           jlong prim = PrimitiveConstant::primitive(value);
           second = _int_1_scope_value;
           return new ConstantLongValue(prim);
+        } else {
+          JVMCI_ERROR_NULL("unexpected primitive constant type %s", basictype_to_str(type));
         }
       }
-    } else {
-      assert(type == T_OBJECT, "unexpected object constant");
-      if (value->is_a(NullConstant::klass()) || value->is_a(HotSpotCompressedNullConstant::klass())) {
+    } else if (value->is_a(NullConstant::klass()) || value->is_a(HotSpotCompressedNullConstant::klass())) {
+      if (type == T_OBJECT) {
         return _oop_null_scope_value;
       } else {
-        assert(value->is_a(HotSpotObjectConstantImpl::klass()), "unexpected constant type");
+        JVMCI_ERROR_NULL("unexpected null constant, expected %s", basictype_to_str(type));
+      }
+    } else if (value->is_a(HotSpotObjectConstantImpl::klass())) {
+      if (type == T_OBJECT) {
         oop obj = HotSpotObjectConstantImpl::object(value);
-        assert(obj != NULL, "null value must be in NullConstant");
+        if (obj == NULL) {
+          JVMCI_ERROR_NULL("null value must be in NullConstant");
+        }
         return new ConstantOopWriteValue(JNIHandles::make_local(obj));
+      } else {
+        JVMCI_ERROR_NULL("unexpected object constant, expected %s", basictype_to_str(type));
       }
     }
   } else if (value->is_a(VirtualObject::klass())) {
-    assert(type == T_OBJECT, "unexpected virtual object");
-    int id = VirtualObject::id(value);
-    ScopeValue* object = objects->at(id);
-    assert(object != NULL, "missing value");
-    return object;
-  } else {
-    value->klass()->print();
-    value->print();
+    if (type == T_OBJECT) {
+      int id = VirtualObject::id(value);
+      if (0 <= id && id < objects->length()) {
+        ScopeValue* object = objects->at(id);
+        if (object != NULL) {
+          return object;
+        }
+      }
+      JVMCI_ERROR_NULL("unknown virtual object id %d", id);
+    } else {
+      JVMCI_ERROR_NULL("unexpected virtual object, expected %s", basictype_to_str(type));
+    }
   }
-  ShouldNotReachHere();
-  return NULL;
+
+  JVMCI_ERROR_NULL("unexpected value in scope: %s", value->klass()->signature_name())
 }
 
-void CodeInstaller::record_object_value(ObjectValue* sv, oop value, GrowableArray<ScopeValue*>* objects) {
-  oop type = VirtualObject::type(value);
+void CodeInstaller::record_object_value(ObjectValue* sv, Handle value, GrowableArray<ScopeValue*>* objects, TRAPS) {
+  Handle type = VirtualObject::type(value);
   int id = VirtualObject::id(value);
   oop javaMirror = HotSpotResolvedObjectTypeImpl::javaClass(type);
   Klass* klass = java_lang_Class::as_Klass(javaMirror);
   bool isLongArray = klass == Universe::longArrayKlassObj();
 
-  objArrayOop values = VirtualObject::values(value);
-  objArrayOop slotKinds = VirtualObject::slotKinds(value);
+  objArrayHandle values = VirtualObject::values(value);
+  objArrayHandle slotKinds = VirtualObject::slotKinds(value);
   for (jint i = 0; i < values->length(); i++) {
     ScopeValue* cur_second = NULL;
-    oop object = values->obj_at(i);
-    oop kind = slotKinds->obj_at(i);
-    BasicType type = JVMCIRuntime::kindToBasicType(JavaKind::typeChar(kind));
-    ScopeValue* value = get_scope_value(object, type, objects, cur_second);
+    Handle object = values->obj_at(i);
+    BasicType type = JVMCIRuntime::kindToBasicType(slotKinds->obj_at(i), CHECK);
+    ScopeValue* value = get_scope_value(object, type, objects, cur_second, CHECK);
 
     if (isLongArray && cur_second == NULL) {
       // we're trying to put ints into a long array... this isn't really valid, but it's used for some optimizations.
@@ -326,14 +383,19 @@
   }
 }
 
-MonitorValue* CodeInstaller::get_monitor_value(oop value, GrowableArray<ScopeValue*>* objects) {
-  guarantee(value->is_a(StackLockValue::klass()), "Monitors must be of type StackLockValue");
+MonitorValue* CodeInstaller::get_monitor_value(Handle value, GrowableArray<ScopeValue*>* objects, TRAPS) {
+  if (value.is_null()) {
+    THROW_NULL(vmSymbols::java_lang_NullPointerException());
+  }
+  if (!value->is_a(StackLockValue::klass())) {
+    JVMCI_ERROR_NULL("Monitors must be of type StackLockValue, got %s", value->klass()->signature_name());
+  }
 
   ScopeValue* second = NULL;
-  ScopeValue* owner_value = get_scope_value(StackLockValue::owner(value), T_OBJECT, objects, second);
+  ScopeValue* owner_value = get_scope_value(StackLockValue::owner(value), T_OBJECT, objects, second, CHECK_NULL);
   assert(second == NULL, "monitor cannot occupy two stack slots");
 
-  ScopeValue* lock_data_value = get_scope_value(StackLockValue::slot(value), T_LONG, objects, second);
+  ScopeValue* lock_data_value = get_scope_value(StackLockValue::slot(value), T_LONG, objects, second, CHECK_NULL);
   assert(second == lock_data_value, "monitor is LONG value that occupies two stack slots");
   assert(lock_data_value->is_location(), "invalid monitor location");
   Location lock_data_loc = ((LocationValue*)lock_data_value)->location();
@@ -346,7 +408,7 @@
   return new MonitorValue(owner_value, lock_data_loc, eliminated);
 }
 
-void CodeInstaller::initialize_dependencies(oop compiled_code, OopRecorder* recorder) {
+void CodeInstaller::initialize_dependencies(oop compiled_code, OopRecorder* recorder, TRAPS) {
   JavaThread* thread = JavaThread::current();
   CompilerThread* compilerThread = thread->is_Compiler_thread() ? thread->as_CompilerThread() : NULL;
   _oop_recorder = recorder;
@@ -368,8 +430,7 @@
         } else if (assumption->klass() == Assumptions_CallSiteTargetValue::klass()) {
           assumption_CallSiteTargetValue(assumption);
         } else {
-          assumption->print();
-          fatal("unexpected Assumption subclass");
+          JVMCI_ERROR("unexpected Assumption subclass %s", assumption->klass()->signature_name());
         }
       }
     }
@@ -414,18 +475,19 @@
   _size = bytes;
 }
 
-JVMCIEnv::CodeInstallResult CodeInstaller::gather_metadata(Handle target, Handle& compiled_code, CodeMetadata& metadata) {
+JVMCIEnv::CodeInstallResult CodeInstaller::gather_metadata(Handle target, Handle compiled_code, CodeMetadata& metadata, TRAPS) {
   CodeBuffer buffer("JVMCI Compiler CodeBuffer for Metadata");
   jobject compiled_code_obj = JNIHandles::make_local(compiled_code());
-  initialize_dependencies(JNIHandles::resolve(compiled_code_obj), NULL);
+  initialize_dependencies(JNIHandles::resolve(compiled_code_obj), NULL, CHECK_OK);
 
   // Get instructions and constants CodeSections early because we need it.
   _instructions = buffer.insts();
   _constants = buffer.consts();
 
-  initialize_fields(target(), JNIHandles::resolve(compiled_code_obj));
-  if (!initialize_buffer(buffer)) {
-    return JVMCIEnv::code_too_large;
+  initialize_fields(target(), JNIHandles::resolve(compiled_code_obj), CHECK_OK);
+  JVMCIEnv::CodeInstallResult result = initialize_buffer(buffer, CHECK_OK);
+  if (result != JVMCIEnv::ok) {
+    return result;
   }
   process_exception_handlers();
 
@@ -446,18 +508,18 @@
 }
 
 // constructor used to create a method
-JVMCIEnv::CodeInstallResult CodeInstaller::install(JVMCICompiler* compiler, Handle target, Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log) {
+JVMCIEnv::CodeInstallResult CodeInstaller::install(JVMCICompiler* compiler, Handle target, Handle compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log, TRAPS) {
   CodeBuffer buffer("JVMCI Compiler CodeBuffer");
   jobject compiled_code_obj = JNIHandles::make_local(compiled_code());
   OopRecorder* recorder = new OopRecorder(&_arena, true);
-  initialize_dependencies(JNIHandles::resolve(compiled_code_obj), recorder);
+  initialize_dependencies(JNIHandles::resolve(compiled_code_obj), recorder, CHECK_OK);
 
   // Get instructions and constants CodeSections early because we need it.
   _instructions = buffer.insts();
   _constants = buffer.consts();
 
-  initialize_fields(target(), JNIHandles::resolve(compiled_code_obj));
-  JVMCIEnv::CodeInstallResult result = initialize_buffer(buffer);
+  initialize_fields(target(), JNIHandles::resolve(compiled_code_obj), CHECK_OK);
+  JVMCIEnv::CodeInstallResult result = initialize_buffer(buffer, CHECK_OK);
   if (result != JVMCIEnv::ok) {
     return result;
   }
@@ -500,7 +562,7 @@
   return result;
 }
 
-void CodeInstaller::initialize_fields(oop target, oop compiled_code) {
+void CodeInstaller::initialize_fields(oop target, oop compiled_code, TRAPS) {
   if (compiled_code->is_a(HotSpotCompiledNmethod::klass())) {
     Handle hotspotJavaMethod = HotSpotCompiledNmethod::method(compiled_code);
     methodHandle method = getMethodFromHotSpotMethod(hotspotJavaMethod());
@@ -521,7 +583,9 @@
 
   // Pre-calculate the constants section size.  This is required for PC-relative addressing.
   _data_section_handle = JNIHandles::make_local(HotSpotCompiledCode::dataSection(compiled_code));
-  guarantee(HotSpotCompiledCode::dataSectionAlignment(compiled_code) <= _constants->alignment(), "Alignment inside constants section is restricted by alignment of section begin");
+  if ((_constants->alignment() % HotSpotCompiledCode::dataSectionAlignment(compiled_code)) != 0) {
+    JVMCI_ERROR("invalid data section alignment: %d", HotSpotCompiledCode::dataSectionAlignment(compiled_code));
+  }
   _constants_size = data_section()->length();
 
   _data_section_patches_handle = JNIHandles::make_local(HotSpotCompiledCode::dataSectionPatches(compiled_code));
@@ -538,16 +602,18 @@
   _word_kind_handle = JNIHandles::make_local(Architecture::wordKind(arch));
 }
 
-int CodeInstaller::estimate_stubs_size() {
+int CodeInstaller::estimate_stubs_size(TRAPS) {
   // Estimate the number of static call stubs that might be emitted.
   int static_call_stubs = 0;
   objArrayOop sites = this->sites();
   for (int i = 0; i < sites->length(); i++) {
     oop site = sites->obj_at(i);
-    if (site->is_a(CompilationResult_Mark::klass())) {
+    if (site != NULL && site->is_a(CompilationResult_Mark::klass())) {
       oop id_obj = CompilationResult_Mark::id(site);
       if (id_obj != NULL) {
-        assert(java_lang_boxing_object::is_instance(id_obj, T_INT), "Integer id expected");
+        if (!java_lang_boxing_object::is_instance(id_obj, T_INT)) {
+          JVMCI_ERROR_0("expected Integer id, got %s", id_obj->klass()->signature_name());
+        }
         jint id = id_obj->int_field(java_lang_boxing_object::value_offset_in_bytes(T_INT));
         if (id == INVOKESTATIC || id == INVOKESPECIAL) {
           static_call_stubs++;
@@ -559,7 +625,7 @@
 }
 
 // perform data and call relocation on the CodeBuffer
-JVMCIEnv::CodeInstallResult CodeInstaller::initialize_buffer(CodeBuffer& buffer) {
+JVMCIEnv::CodeInstallResult CodeInstaller::initialize_buffer(CodeBuffer& buffer, TRAPS) {
   HandleMark hm;
   objArrayHandle sites = this->sites();
   int locs_buffer_size = sites->length() * (relocInfo::length_limit + sizeof(relocInfo));
@@ -568,7 +634,7 @@
   // stubs.  Stubs have extra relocs but they are managed by the stub
   // section itself so they don't need to be accounted for in the
   // locs_buffer above.
-  int stubs_size = estimate_stubs_size();
+  int stubs_size = estimate_stubs_size(CHECK_OK);
   int total_size = round_to(_code_size, buffer.insts()->alignment()) + round_to(_constants_size, buffer.consts()->alignment()) + round_to(stubs_size, buffer.stubs()->alignment());
 
   if (total_size > JVMCINMethodSizeLimit) {
@@ -600,19 +666,30 @@
 
   for (int i = 0; i < data_section_patches()->length(); i++) {
     Handle patch = data_section_patches()->obj_at(i);
+    if (patch.is_null()) {
+      THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok);
+    }
     Handle reference = CompilationResult_DataPatch::reference(patch);
-    assert(reference->is_a(CompilationResult_ConstantReference::klass()), "patch in data section must be a ConstantReference");
+    if (reference.is_null()) {
+      THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok);
+    }
+    if (!reference->is_a(CompilationResult_ConstantReference::klass())) {
+      JVMCI_ERROR_OK("invalid patch in data section: %s", reference->klass()->signature_name());
+    }
     Handle constant = CompilationResult_ConstantReference::constant(reference);
+    if (constant.is_null()) {
+      THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok);
+    }
     address dest = _constants->start() + CompilationResult_Site::pcOffset(patch);
     if (constant->is_a(HotSpotMetaspaceConstantImpl::klass())) {
       if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
 #ifdef _LP64
-        *((narrowKlass*) dest) = record_narrow_metadata_reference(constant);
+        *((narrowKlass*) dest) = record_narrow_metadata_reference(constant, CHECK_OK);
 #else
-        fatal("unexpected compressed Klass* in 32-bit mode");
+        JVMCI_ERROR_OK("unexpected compressed Klass* in 32-bit mode");
 #endif
       } else {
-        *((Metadata**) dest) = record_metadata_reference(constant);
+        *((Metadata**) dest) = record_metadata_reference(constant, CHECK_OK);
       }
     } else if (constant->is_a(HotSpotObjectConstantImpl::klass())) {
       Handle obj = HotSpotObjectConstantImpl::object(constant);
@@ -623,48 +700,49 @@
 #ifdef _LP64
         _constants->relocate(dest, oop_Relocation::spec(oop_index), relocInfo::narrow_oop_in_const);
 #else
-        fatal("unexpected compressed oop in 32-bit mode");
+        JVMCI_ERROR_OK("unexpected compressed oop in 32-bit mode");
 #endif
       } else {
         _constants->relocate(dest, oop_Relocation::spec(oop_index));
       }
     } else {
-      ShouldNotReachHere();
+      JVMCI_ERROR_OK("invalid constant in data section: %s", constant->klass()->signature_name());
     }
   }
   jint last_pc_offset = -1;
   for (int i = 0; i < sites->length(); i++) {
-    {
-        No_Safepoint_Verifier no_safepoint;
-        oop site = sites->obj_at(i);
-        jint pc_offset = CompilationResult_Site::pcOffset(site);
+    Handle site = sites->obj_at(i);
+    if (site.is_null()) {
+      THROW_(vmSymbols::java_lang_NullPointerException(), JVMCIEnv::ok);
+    }
+
+    jint pc_offset = CompilationResult_Site::pcOffset(site);
 
-        if (site->is_a(CompilationResult_Call::klass())) {
-          TRACE_jvmci_4("call at %i", pc_offset);
-          site_Call(buffer, pc_offset, site);
-        } else if (site->is_a(CompilationResult_Infopoint::klass())) {
-          // three reasons for infopoints denote actual safepoints
-          oop reason = CompilationResult_Infopoint::reason(site);
-          if (InfopointReason::SAFEPOINT() == reason || InfopointReason::CALL() == reason || InfopointReason::IMPLICIT_EXCEPTION() == reason) {
-            TRACE_jvmci_4("safepoint at %i", pc_offset);
-            site_Safepoint(buffer, pc_offset, site);
-          } else {
-            // if the infopoint is not an actual safepoint, it must have one of the other reasons
-            // (safeguard against new safepoint types that require handling above)
-            assert(InfopointReason::METHOD_START() == reason || InfopointReason::METHOD_END() == reason || InfopointReason::LINE_NUMBER() == reason, "");
-            site_Infopoint(buffer, pc_offset, site);
-          }
-        } else if (site->is_a(CompilationResult_DataPatch::klass())) {
-          TRACE_jvmci_4("datapatch at %i", pc_offset);
-          site_DataPatch(buffer, pc_offset, site);
-        } else if (site->is_a(CompilationResult_Mark::klass())) {
-          TRACE_jvmci_4("mark at %i", pc_offset);
-          site_Mark(buffer, pc_offset, site);
-        } else {
-          fatal("unexpected Site subclass");
-        }
-        last_pc_offset = pc_offset;
+    if (site->is_a(CompilationResult_Call::klass())) {
+      TRACE_jvmci_4("call at %i", pc_offset);
+      site_Call(buffer, pc_offset, site, CHECK_OK);
+    } else if (site->is_a(CompilationResult_Infopoint::klass())) {
+      // three reasons for infopoints denote actual safepoints
+      oop reason = CompilationResult_Infopoint::reason(site);
+      if (InfopointReason::SAFEPOINT() == reason || InfopointReason::CALL() == reason || InfopointReason::IMPLICIT_EXCEPTION() == reason) {
+        TRACE_jvmci_4("safepoint at %i", pc_offset);
+        site_Safepoint(buffer, pc_offset, site, CHECK_OK);
+      } else if (InfopointReason::METHOD_START() == reason || InfopointReason::METHOD_END() == reason || InfopointReason::LINE_NUMBER() == reason) {
+        site_Infopoint(buffer, pc_offset, site, CHECK_OK);
+      } else {
+        JVMCI_ERROR_OK("unknown infopoint reason at %i", pc_offset);
+      }
+    } else if (site->is_a(CompilationResult_DataPatch::klass())) {
+      TRACE_jvmci_4("datapatch at %i", pc_offset);
+      site_DataPatch(buffer, pc_offset, site, CHECK_OK);
+    } else if (site->is_a(CompilationResult_Mark::klass())) {
+      TRACE_jvmci_4("mark at %i", pc_offset);
+      site_Mark(buffer, pc_offset, site, CHECK_OK);
+    } else {
+      JVMCI_ERROR_OK("unexpected site subclass: %s", site->klass()->signature_name());
     }
+    last_pc_offset = pc_offset;
+
     if (CodeInstallSafepointChecks && SafepointSynchronize::do_call_back()) {
       // this is a hacky way to force a safepoint check but nothing else was jumping out at me.
       ThreadToNativeFromVM ttnfv(JavaThread::current());
@@ -673,7 +751,6 @@
 
 #ifndef PRODUCT
   if (comments() != NULL) {
-    No_Safepoint_Verifier no_safepoint;
     for (int i = 0; i < comments()->length(); i++) {
       oop comment = comments()->obj_at(i);
       assert(comment->is_a(HotSpotCompiledCode_Comment::klass()), "cce");
@@ -759,56 +836,61 @@
   return true;
 }
 
-GrowableArray<ScopeValue*>* CodeInstaller::record_virtual_objects(oop debug_info) {
-  objArrayOop virtualObjects = DebugInfo::virtualObjectMapping(debug_info);
-  if (virtualObjects == NULL) {
+GrowableArray<ScopeValue*>* CodeInstaller::record_virtual_objects(Handle debug_info, TRAPS) {
+  objArrayHandle virtualObjects = DebugInfo::virtualObjectMapping(debug_info);
+  if (virtualObjects.is_null()) {
     return NULL;
   }
   GrowableArray<ScopeValue*>* objects = new GrowableArray<ScopeValue*>(virtualObjects->length(), virtualObjects->length(), NULL);
   // Create the unique ObjectValues
   for (int i = 0; i < virtualObjects->length(); i++) {
-    oop value = virtualObjects->obj_at(i);
+    Handle value = virtualObjects->obj_at(i);
     int id = VirtualObject::id(value);
-    oop type = VirtualObject::type(value);
+    Handle type = VirtualObject::type(value);
     oop javaMirror = HotSpotResolvedObjectTypeImpl::javaClass(type);
     ObjectValue* sv = new ObjectValue(id, new ConstantOopWriteValue(JNIHandles::make_local(Thread::current(), javaMirror)));
-    assert(objects->at(id) == NULL, "once");
+    if (id < 0 || id >= objects->length()) {
+      JVMCI_ERROR_NULL("virtual object id %d out of bounds", id);
+    }
+    if (objects->at(id) != NULL) {
+      JVMCI_ERROR_NULL("duplicate virtual object id %d", id);
+    }
     objects->at_put(id, sv);
   }
   // All the values which could be referenced by the VirtualObjects
   // exist, so now describe all the VirtualObjects themselves.
   for (int i = 0; i < virtualObjects->length(); i++) {
-    oop value = virtualObjects->obj_at(i);
+    Handle value = virtualObjects->obj_at(i);
     int id = VirtualObject::id(value);
-    record_object_value(objects->at(id)->as_ObjectValue(), value, objects);
+    record_object_value(objects->at(id)->as_ObjectValue(), value, objects, CHECK_NULL);
   }
   _debug_recorder->dump_object_pool(objects);
   return objects;
 }
 
-void CodeInstaller::record_scope(jint pc_offset, oop debug_info) {
-  oop position = DebugInfo::bytecodePosition(debug_info);
-  if (position == NULL) {
+void CodeInstaller::record_scope(jint pc_offset, Handle debug_info, TRAPS) {
+  Handle position = DebugInfo::bytecodePosition(debug_info);
+  if (position.is_null()) {
     // Stubs do not record scope info, just oop maps
     return;
   }
 
-  GrowableArray<ScopeValue*>* objectMapping = record_virtual_objects(debug_info);
-  record_scope(pc_offset, position, objectMapping);
+  GrowableArray<ScopeValue*>* objectMapping = record_virtual_objects(debug_info, CHECK);
+  record_scope(pc_offset, position, objectMapping, CHECK);
 }
 
-void CodeInstaller::record_scope(jint pc_offset, oop position, GrowableArray<ScopeValue*>* objects) {
-  oop frame = NULL;
+void CodeInstaller::record_scope(jint pc_offset, Handle position, GrowableArray<ScopeValue*>* objects, TRAPS) {
+  Handle frame;
   if (position->is_a(BytecodeFrame::klass())) {
     frame = position;
   }
-  oop caller_frame = BytecodePosition::caller(position);
-  if (caller_frame != NULL) {
-    record_scope(pc_offset, caller_frame, objects);
+  Handle caller_frame = BytecodePosition::caller(position);
+  if (caller_frame.not_null()) {
+    record_scope(pc_offset, caller_frame, objects, CHECK);
   }
 
-  oop hotspot_method = BytecodePosition::method(position);
-  Method* method = getMethodFromHotSpotMethod(hotspot_method);
+  Handle hotspot_method = BytecodePosition::method(position);
+  Method* method = getMethodFromHotSpotMethod(hotspot_method());
   jint bci = BytecodePosition::bci(position);
   if (bci == BytecodeFrame::BEFORE_BCI()) {
     bci = SynchronizationEntryBCI;
@@ -817,13 +899,13 @@
   TRACE_jvmci_2("Recording scope pc_offset=%d bci=%d method=%s", pc_offset, bci, method->name_and_sig_as_C_string());
 
   bool reexecute = false;
-  if (frame != NULL) {
+  if (frame.not_null()) {
     if (bci == SynchronizationEntryBCI){
        reexecute = false;
     } else {
       Bytecodes::Code code = Bytecodes::java_code_at(method, method->bcp_from(bci));
       reexecute = bytecode_should_reexecute(code);
-      if (frame != NULL) {
+      if (frame.not_null()) {
         reexecute = (BytecodeFrame::duringCall(frame) == JNI_FALSE);
       }
     }
@@ -834,15 +916,22 @@
   DebugToken* monitors_token = NULL;
   bool throw_exception = false;
 
-  if (frame != NULL) {
+  if (frame.not_null()) {
     jint local_count = BytecodeFrame::numLocals(frame);
     jint expression_count = BytecodeFrame::numStack(frame);
     jint monitor_count = BytecodeFrame::numLocks(frame);
-    objArrayOop values = BytecodeFrame::values(frame);
-    objArrayOop slotKinds = BytecodeFrame::slotKinds(frame);
+    objArrayHandle values = BytecodeFrame::values(frame);
+    objArrayHandle slotKinds = BytecodeFrame::slotKinds(frame);
 
-    assert(local_count + expression_count + monitor_count == values->length(), "unexpected values length");
-    assert(local_count + expression_count == slotKinds->length(), "unexpected slotKinds length");
+    if (values.is_null() || slotKinds.is_null()) {
+      THROW(vmSymbols::java_lang_NullPointerException());
+    }
+    if (local_count + expression_count + monitor_count != values->length()) {
+      JVMCI_ERROR("unexpected values length %d in scope (%d locals, %d expressions, %d monitors)", values->length(), local_count, expression_count, monitor_count);
+    }
+    if (local_count + expression_count != slotKinds->length()) {
+      JVMCI_ERROR("unexpected slotKinds length %d in scope (%d locals, %d expressions)", slotKinds->length(), local_count, expression_count);
+    }
 
     GrowableArray<ScopeValue*>* locals = local_count > 0 ? new GrowableArray<ScopeValue*> (local_count) : NULL;
     GrowableArray<ScopeValue*>* expressions = expression_count > 0 ? new GrowableArray<ScopeValue*> (expression_count) : NULL;
@@ -853,30 +942,30 @@
 
     for (jint i = 0; i < values->length(); i++) {
       ScopeValue* second = NULL;
-      oop value = values->obj_at(i);
+      Handle value = values->obj_at(i);
       if (i < local_count) {
-        oop kind = slotKinds->obj_at(i);
-        BasicType type = JVMCIRuntime::kindToBasicType(JavaKind::typeChar(kind));
-        ScopeValue* first = get_scope_value(value, type, objects, second);
+        BasicType type = JVMCIRuntime::kindToBasicType(slotKinds->obj_at(i), CHECK);
+        ScopeValue* first = get_scope_value(value, type, objects, second, CHECK);
         if (second != NULL) {
           locals->append(second);
         }
         locals->append(first);
       } else if (i < local_count + expression_count) {
-        oop kind = slotKinds->obj_at(i);
-        BasicType type = JVMCIRuntime::kindToBasicType(JavaKind::typeChar(kind));
-        ScopeValue* first = get_scope_value(value, type, objects, second);
+        BasicType type = JVMCIRuntime::kindToBasicType(slotKinds->obj_at(i), CHECK);
+        ScopeValue* first = get_scope_value(value, type, objects, second, CHECK);
         if (second != NULL) {
           expressions->append(second);
         }
         expressions->append(first);
       } else {
-        monitors->append(get_monitor_value(value, objects));
+        MonitorValue *monitor = get_monitor_value(value, objects, CHECK);
+        monitors->append(monitor);
       }
       if (second != NULL) {
         i++;
-        assert(i < values->length(), "double-slot value not followed by Value.ILLEGAL");
-        assert(values->obj_at(i) == Value::ILLEGAL(), "double-slot value not followed by Value.ILLEGAL");
+        if (i >= values->length() || values->obj_at(i) != Value::ILLEGAL()) {
+          JVMCI_ERROR("double-slot value not followed by Value.ILLEGAL");
+        }
       }
     }
 
@@ -891,32 +980,37 @@
                                   locals_token, expressions_token, monitors_token);
 }
 
-void CodeInstaller::site_Safepoint(CodeBuffer& buffer, jint pc_offset, oop site) {
-  oop debug_info = CompilationResult_Infopoint::debugInfo(site);
-  assert(debug_info != NULL, "debug info expected");
+void CodeInstaller::site_Safepoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) {
+  Handle debug_info = CompilationResult_Infopoint::debugInfo(site);
+  if (debug_info.is_null()) {
+    JVMCI_ERROR("debug info expected at safepoint at %i", pc_offset);
+  }
 
   // address instruction = _instructions->start() + pc_offset;
   // jint next_pc_offset = Assembler::locate_next_instruction(instruction) - _instructions->start();
-  _debug_recorder->add_safepoint(pc_offset, create_oop_map(debug_info));
-  record_scope(pc_offset, debug_info);
+  OopMap *map = create_oop_map(debug_info, CHECK);
+  _debug_recorder->add_safepoint(pc_offset, map);
+  record_scope(pc_offset, debug_info, CHECK);
   _debug_recorder->end_safepoint(pc_offset);
 }
 
-void CodeInstaller::site_Infopoint(CodeBuffer& buffer, jint pc_offset, oop site) {
-  oop debug_info = CompilationResult_Infopoint::debugInfo(site);
-  assert(debug_info != NULL, "debug info expected");
+void CodeInstaller::site_Infopoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) {
+  Handle debug_info = CompilationResult_Infopoint::debugInfo(site);
+  if (debug_info.is_null()) {
+    JVMCI_ERROR("debug info expected at infopoint at %i", pc_offset);
+  }
 
   _debug_recorder->add_non_safepoint(pc_offset);
-  record_scope(pc_offset, debug_info);
+  record_scope(pc_offset, debug_info, CHECK);
   _debug_recorder->end_non_safepoint(pc_offset);
 }
 
-void CodeInstaller::site_Call(CodeBuffer& buffer, jint pc_offset, oop site) {
-  oop target = CompilationResult_Call::target(site);
+void CodeInstaller::site_Call(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) {
+  Handle target = CompilationResult_Call::target(site);
   InstanceKlass* target_klass = InstanceKlass::cast(target->klass());
 
-  oop hotspot_method = NULL; // JavaMethod
-  oop foreign_call = NULL;
+  Handle hotspot_method; // JavaMethod
+  Handle foreign_call;
 
   if (target_klass->is_subclass_of(SystemDictionary::HotSpotForeignCallTarget_klass())) {
     foreign_call = target;
@@ -924,27 +1018,29 @@
     hotspot_method = target;
   }
 
-  oop debug_info = CompilationResult_Call::debugInfo(site);
+  Handle debug_info = CompilationResult_Call::debugInfo(site);
 
-  assert(!!hotspot_method ^ !!foreign_call, "Call site needs exactly one type");
+  assert(hotspot_method.not_null() ^ foreign_call.not_null(), "Call site needs exactly one type");
 
   NativeInstruction* inst = nativeInstruction_at(_instructions->start() + pc_offset);
-  jint next_pc_offset = CodeInstaller::pd_next_offset(inst, pc_offset, hotspot_method);
+  jint next_pc_offset = CodeInstaller::pd_next_offset(inst, pc_offset, hotspot_method, CHECK);
 
-  if (debug_info != NULL) {
-    _debug_recorder->add_safepoint(next_pc_offset, create_oop_map(debug_info));
-    record_scope(next_pc_offset, debug_info);
+  if (debug_info.not_null()) {
+    OopMap *map = create_oop_map(debug_info, CHECK);
+    _debug_recorder->add_safepoint(next_pc_offset, map);
+    record_scope(next_pc_offset, debug_info, CHECK);
   }
 
-  if (foreign_call != NULL) {
+  if (foreign_call.not_null()) {
     jlong foreign_call_destination = HotSpotForeignCallTarget::address(foreign_call);
-    CodeInstaller::pd_relocate_ForeignCall(inst, foreign_call_destination);
+    CodeInstaller::pd_relocate_ForeignCall(inst, foreign_call_destination, CHECK);
   } else { // method != NULL
-    assert(hotspot_method != NULL, "unexpected JavaMethod");
-    assert(debug_info != NULL, "debug info expected");
+    if (debug_info.is_null()) {
+      JVMCI_ERROR("debug info expected at call at %i", pc_offset);
+    }
 
     TRACE_jvmci_3("method call");
-    CodeInstaller::pd_relocate_JavaMethod(hotspot_method, pc_offset);
+    CodeInstaller::pd_relocate_JavaMethod(hotspot_method, pc_offset, CHECK);
     if (_next_call_type == INVOKESTATIC || _next_call_type == INVOKESPECIAL) {
       // Need a static call stub for transitions from compiled to interpreted.
       CompiledStaticCall::emit_to_interp_stub(buffer, _instructions->start() + pc_offset);
@@ -953,38 +1049,45 @@
 
   _next_call_type = INVOKE_INVALID;
 
-  if (debug_info != NULL) {
+  if (debug_info.not_null()) {
     _debug_recorder->end_safepoint(next_pc_offset);
   }
 }
 
-void CodeInstaller::site_DataPatch(CodeBuffer& buffer, jint pc_offset, oop site) {
-  oop reference = CompilationResult_DataPatch::reference(site);
-  if (reference->is_a(CompilationResult_ConstantReference::klass())) {
+void CodeInstaller::site_DataPatch(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) {
+  Handle reference = CompilationResult_DataPatch::reference(site);
+  if (reference.is_null()) {
+    THROW(vmSymbols::java_lang_NullPointerException());
+  } else if (reference->is_a(CompilationResult_ConstantReference::klass())) {
     Handle constant = CompilationResult_ConstantReference::constant(reference);
-    if (constant->is_a(HotSpotObjectConstantImpl::klass())) {
-      pd_patch_OopConstant(pc_offset, constant);
+    if (constant.is_null()) {
+      THROW(vmSymbols::java_lang_NullPointerException());
+    } else if (constant->is_a(HotSpotObjectConstantImpl::klass())) {
+      pd_patch_OopConstant(pc_offset, constant, CHECK);
     } else if (constant->is_a(HotSpotMetaspaceConstantImpl::klass())) {
-      pd_patch_MetaspaceConstant(pc_offset, constant);
-    } else if (constant->is_a(HotSpotSentinelConstant::klass())) {
-      fatal("sentinel constant unsupported");
+      pd_patch_MetaspaceConstant(pc_offset, constant, CHECK);
     } else {
-      fatal("unknown constant type in data patch");
+      JVMCI_ERROR("unknown constant type in data patch: %s", constant->klass()->signature_name());
     }
   } else if (reference->is_a(CompilationResult_DataSectionReference::klass())) {
     int data_offset = CompilationResult_DataSectionReference::offset(reference);
-    assert(0 <= data_offset && data_offset < _constants_size, "data offset 0x%X points outside data section (size 0x%X)", data_offset, _constants_size);
-    pd_patch_DataSectionReference(pc_offset, data_offset);
+    if (0 <= data_offset && data_offset < _constants_size) {
+      pd_patch_DataSectionReference(pc_offset, data_offset);
+    } else {
+      JVMCI_ERROR("data offset 0x%X points outside data section (size 0x%X)", data_offset, _constants_size);
+    }
   } else {
-    fatal("unknown data patch type");
+    JVMCI_ERROR("unknown data patch type: %s", reference->klass()->signature_name());
   }
 }
 
-void CodeInstaller::site_Mark(CodeBuffer& buffer, jint pc_offset, oop site) {
-  oop id_obj = CompilationResult_Mark::id(site);
+void CodeInstaller::site_Mark(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS) {
+  Handle id_obj = CompilationResult_Mark::id(site);
 
-  if (id_obj != NULL) {
-    assert(java_lang_boxing_object::is_instance(id_obj, T_INT), "Integer id expected");
+  if (id_obj.not_null()) {
+    if (!java_lang_boxing_object::is_instance(id_obj(), T_INT)) {
+      JVMCI_ERROR("expected Integer id, got %s", id_obj->klass()->signature_name());
+    }
     jint id = id_obj->int_field(java_lang_boxing_object::value_offset_in_bytes(T_INT));
 
     address pc = _instructions->start() + pc_offset;
@@ -1017,7 +1120,7 @@
       case POLL_FAR:
       case POLL_RETURN_NEAR:
       case POLL_RETURN_FAR:
-        pd_relocate_poll(pc, id);
+        pd_relocate_poll(pc, id, CHECK);
         break;
       case CARD_TABLE_SHIFT:
       case CARD_TABLE_ADDRESS:
@@ -1027,7 +1130,7 @@
       case CRC_TABLE_ADDRESS:
         break;
       default:
-        ShouldNotReachHere();
+        JVMCI_ERROR("invalid mark id: %d", id);
         break;
     }
   }
--- a/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -154,13 +154,13 @@
   static ConstantIntValue*    _int_2_scope_value;
   static LocationValue*       _illegal_value;
 
-  jint pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method);
-  void pd_patch_OopConstant(int pc_offset, Handle& constant);
-  void pd_patch_MetaspaceConstant(int pc_offset, Handle& constant);
+  jint pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS);
+  void pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS);
+  void pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS);
   void pd_patch_DataSectionReference(int pc_offset, int data_offset);
-  void pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination);
-  void pd_relocate_JavaMethod(oop method, jint pc_offset);
-  void pd_relocate_poll(address pc, jint mark);
+  void pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS);
+  void pd_relocate_JavaMethod(Handle method, jint pc_offset, TRAPS);
+  void pd_relocate_poll(address pc, jint mark, TRAPS);
 
   objArrayOop sites() { return (objArrayOop) JNIHandles::resolve(_sites_handle); }
   arrayOop code() { return (arrayOop) JNIHandles::resolve(_code_handle); }
@@ -177,33 +177,33 @@
 
   CodeInstaller() : _arena(mtCompiler) {}
 
-  JVMCIEnv::CodeInstallResult gather_metadata(Handle target, Handle& compiled_code, CodeMetadata& metadata);
-  JVMCIEnv::CodeInstallResult install(JVMCICompiler* compiler, Handle target, Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log);
+  JVMCIEnv::CodeInstallResult gather_metadata(Handle target, Handle compiled_code, CodeMetadata& metadata, TRAPS);
+  JVMCIEnv::CodeInstallResult install(JVMCICompiler* compiler, Handle target, Handle compiled_code, CodeBlob*& cb, Handle installed_code, Handle speculation_log, TRAPS);
 
   static address runtime_call_target_address(oop runtime_call);
-  static VMReg get_hotspot_reg(jint jvmciRegisterNumber);
+  static VMReg get_hotspot_reg(jint jvmciRegisterNumber, TRAPS);
   static bool is_general_purpose_reg(VMReg hotspotRegister);
 
   const OopMapSet* oopMapSet() const { return _debug_recorder->_oopmaps; }
 
 protected:
-  Location::Type get_oop_type(oop value);
-  ScopeValue* get_scope_value(oop value, BasicType type, GrowableArray<ScopeValue*>* objects, ScopeValue* &second);
-  MonitorValue* get_monitor_value(oop value, GrowableArray<ScopeValue*>* objects);
+  Location::Type get_oop_type(Handle value);
+  ScopeValue* get_scope_value(Handle value, BasicType type, GrowableArray<ScopeValue*>* objects, ScopeValue* &second, TRAPS);
+  MonitorValue* get_monitor_value(Handle value, GrowableArray<ScopeValue*>* objects, TRAPS);
 
-  Metadata* record_metadata_reference(Handle& constant);
+  Metadata* record_metadata_reference(Handle constant, TRAPS);
 #ifdef _LP64
-  narrowKlass record_narrow_metadata_reference(Handle& constant);
+  narrowKlass record_narrow_metadata_reference(Handle constant, TRAPS);
 #endif
 
   // extract the fields of the CompilationResult
-  void initialize_fields(oop target, oop target_method);
-  void initialize_dependencies(oop target_method, OopRecorder* oop_recorder);
+  void initialize_fields(oop target, oop target_method, TRAPS);
+  void initialize_dependencies(oop target_method, OopRecorder* oop_recorder, TRAPS);
 
-  int estimate_stubs_size();
+  int estimate_stubs_size(TRAPS);
 
   // perform data and call relocation on the CodeBuffer
-  JVMCIEnv::CodeInstallResult initialize_buffer(CodeBuffer& buffer);
+  JVMCIEnv::CodeInstallResult initialize_buffer(CodeBuffer& buffer, TRAPS);
 
   void assumption_NoFinalizableSubclass(Handle assumption);
   void assumption_ConcreteSubtype(Handle assumption);
@@ -211,19 +211,19 @@
   void assumption_ConcreteMethod(Handle assumption);
   void assumption_CallSiteTargetValue(Handle assumption);
 
-  void site_Safepoint(CodeBuffer& buffer, jint pc_offset, oop site);
-  void site_Infopoint(CodeBuffer& buffer, jint pc_offset, oop site);
-  void site_Call(CodeBuffer& buffer, jint pc_offset, oop site);
-  void site_DataPatch(CodeBuffer& buffer, jint pc_offset, oop site);
-  void site_Mark(CodeBuffer& buffer, jint pc_offset, oop site);
+  void site_Safepoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS);
+  void site_Infopoint(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS);
+  void site_Call(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS);
+  void site_DataPatch(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS);
+  void site_Mark(CodeBuffer& buffer, jint pc_offset, Handle site, TRAPS);
 
-  OopMap* create_oop_map(oop debug_info);
+  OopMap* create_oop_map(Handle debug_info, TRAPS);
 
-  void record_scope(jint pc_offset, oop debug_info);
-  void record_scope(jint pc_offset, oop code_pos, GrowableArray<ScopeValue*>* objects);
-  void record_object_value(ObjectValue* sv, oop value, GrowableArray<ScopeValue*>* objects);
+  void record_scope(jint pc_offset, Handle debug_info, TRAPS);
+  void record_scope(jint pc_offset, Handle code_pos, GrowableArray<ScopeValue*>* objects, TRAPS);
+  void record_object_value(ObjectValue* sv, Handle value, GrowableArray<ScopeValue*>* objects, TRAPS);
 
-  GrowableArray<ScopeValue*>* record_virtual_objects(oop debug_info);
+  GrowableArray<ScopeValue*>* record_virtual_objects(Handle debug_info, TRAPS);
 
   void process_exception_handlers();
   int estimateStubSpace(int static_call_stubs);
--- a/hotspot/src/share/vm/jvmci/jvmciCompiler.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/jvmciCompiler.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -112,7 +112,7 @@
   _bootstrapping = false;
 }
 
-void JVMCICompiler::compile_method(methodHandle method, int entry_bci, JVMCIEnv* env) {
+void JVMCICompiler::compile_method(const methodHandle& method, int entry_bci, JVMCIEnv* env) {
   JVMCI_EXCEPTION_CONTEXT
 
   bool is_osr = entry_bci != InvocationEntryBci;
--- a/hotspot/src/share/vm/jvmci/jvmciCompiler.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/jvmciCompiler.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -71,7 +71,7 @@
   // Compilation entry point for methods
   virtual void compile_method(ciEnv* env, ciMethod* target, int entry_bci, DirectiveSet* directive);
 
-  void compile_method(methodHandle target, int entry_bci, JVMCIEnv* env);
+  void compile_method(const methodHandle& target, int entry_bci, JVMCIEnv* env);
 
   virtual bool is_trivial(Method* method);
 
--- a/hotspot/src/share/vm/jvmci/jvmciCompilerToVM.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/jvmciCompilerToVM.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -670,7 +670,7 @@
 
   TraceTime install_time("installCode", JVMCICompiler::codeInstallTimer());
   CodeInstaller installer;
-  JVMCIEnv::CodeInstallResult result = installer.install(compiler, target_handle, compiled_code_handle, cb, installed_code_handle, speculation_log_handle);
+  JVMCIEnv::CodeInstallResult result = installer.install(compiler, target_handle, compiled_code_handle, cb, installed_code_handle, speculation_log_handle, CHECK_0);
 
   if (PrintCodeCacheOnCompilation) {
     stringStream s;
@@ -690,6 +690,7 @@
       assert(installed_code_handle->is_a(InstalledCode::klass()), "wrong type");
       CompilerToVM::invalidate_installed_code(installed_code_handle, CHECK_0);
       InstalledCode::set_address(installed_code_handle, (jlong) cb);
+      InstalledCode::set_version(installed_code_handle, InstalledCode::version(installed_code_handle) + 1);
       if (cb->is_nmethod()) {
         InstalledCode::set_entryPoint(installed_code_handle, (jlong) cb->as_nmethod_or_null()->verified_entry_point());
       } else {
@@ -726,7 +727,7 @@
   CodeBlob *cb = NULL;
   CodeInstaller installer;
 
-  JVMCIEnv::CodeInstallResult result = installer.gather_metadata(target_handle, compiled_code_handle, code_metadata); //cb, pc_descs, nr_pc_descs, scopes_descs, scopes_size, reloc_buffer);
+  JVMCIEnv::CodeInstallResult result = installer.gather_metadata(target_handle, compiled_code_handle, code_metadata, CHECK_0); //cb, pc_descs, nr_pc_descs, scopes_descs, scopes_size, reloc_buffer);
   if (result != JVMCIEnv::ok) {
     return result;
   }
--- a/hotspot/src/share/vm/jvmci/jvmciEnv.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/jvmciEnv.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -161,7 +161,7 @@
 }
 
 // ------------------------------------------------------------------
-KlassHandle JVMCIEnv::get_klass_by_name(KlassHandle& accessing_klass,
+KlassHandle JVMCIEnv::get_klass_by_name(KlassHandle accessing_klass,
                                   Symbol* klass_name,
                                   bool require_local) {
   ResourceMark rm;
@@ -177,7 +177,7 @@
 KlassHandle JVMCIEnv::get_klass_by_index_impl(const constantPoolHandle& cpool,
                                         int index,
                                         bool& is_accessible,
-                                        KlassHandle& accessor) {
+                                        KlassHandle accessor) {
   JVMCI_EXCEPTION_CONTEXT;
   KlassHandle klass (THREAD, ConstantPool::klass_at_if_loaded(cpool, index));
   Symbol* klass_name = NULL;
@@ -218,7 +218,7 @@
 KlassHandle JVMCIEnv::get_klass_by_index(const constantPoolHandle& cpool,
                                    int index,
                                    bool& is_accessible,
-                                   KlassHandle& accessor) {
+                                   KlassHandle accessor) {
   ResourceMark rm;
   KlassHandle result = get_klass_by_index_impl(cpool, index, is_accessible, accessor);
   return result;
@@ -229,7 +229,7 @@
 //
 // Implementation note: the results of field lookups are cached
 // in the accessor klass.
-void JVMCIEnv::get_field_by_index_impl(instanceKlassHandle& klass, fieldDescriptor& field_desc,
+void JVMCIEnv::get_field_by_index_impl(instanceKlassHandle klass, fieldDescriptor& field_desc,
                                         int index) {
   JVMCI_EXCEPTION_CONTEXT;
 
@@ -270,7 +270,7 @@
 
 // ------------------------------------------------------------------
 // Get a field by index from a klass's constant pool.
-void JVMCIEnv::get_field_by_index(instanceKlassHandle& accessor, fieldDescriptor& fd, int index) {
+void JVMCIEnv::get_field_by_index(instanceKlassHandle accessor, fieldDescriptor& fd, int index) {
   ResourceMark rm;
   return get_field_by_index_impl(accessor, fd, index);
 }
@@ -278,8 +278,8 @@
 // ------------------------------------------------------------------
 // Perform an appropriate method lookup based on accessor, holder,
 // name, signature, and bytecode.
-methodHandle JVMCIEnv::lookup_method(instanceKlassHandle& h_accessor,
-                               instanceKlassHandle& h_holder,
+methodHandle JVMCIEnv::lookup_method(instanceKlassHandle h_accessor,
+                               instanceKlassHandle h_holder,
                                Symbol*       name,
                                Symbol*       sig,
                                Bytecodes::Code bc) {
@@ -314,7 +314,7 @@
 // ------------------------------------------------------------------
 methodHandle JVMCIEnv::get_method_by_index_impl(const constantPoolHandle& cpool,
                                           int index, Bytecodes::Code bc,
-                                          instanceKlassHandle& accessor) {
+                                          instanceKlassHandle accessor) {
   if (bc == Bytecodes::_invokedynamic) {
     ConstantPoolCacheEntry* cpce = cpool->invokedynamic_cp_cache_entry_at(index);
     bool is_resolved = !cpce->is_f1_null();
@@ -379,7 +379,7 @@
 }
 
 // ------------------------------------------------------------------
-instanceKlassHandle JVMCIEnv::get_instance_klass_for_declared_method_holder(KlassHandle& method_holder) {
+instanceKlassHandle JVMCIEnv::get_instance_klass_for_declared_method_holder(KlassHandle method_holder) {
   // For the case of <array>.clone(), the method holder can be an ArrayKlass*
   // instead of an InstanceKlass*.  For that case simply pretend that the
   // declared holder is Object.clone since that's where the call will bottom out.
@@ -397,7 +397,7 @@
 // ------------------------------------------------------------------
 methodHandle JVMCIEnv::get_method_by_index(const constantPoolHandle& cpool,
                                      int index, Bytecodes::Code bc,
-                                     instanceKlassHandle& accessor) {
+                                     instanceKlassHandle accessor) {
   ResourceMark rm;
   return get_method_by_index_impl(cpool, index, bc, accessor);
 }
@@ -452,7 +452,7 @@
 
 // ------------------------------------------------------------------
 JVMCIEnv::CodeInstallResult JVMCIEnv::register_method(
-                                methodHandle& method,
+                                const methodHandle& method,
                                 nmethod*& nm,
                                 int entry_bci,
                                 CodeOffsets* offsets,
--- a/hotspot/src/share/vm/jvmci/jvmciEnv.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/jvmciEnv.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -78,7 +78,7 @@
   // The CI treats a klass as loaded if it is consistently defined in
   // another loader, even if it hasn't yet been loaded in all loaders
   // that could potentially see it via delegation.
-  static KlassHandle get_klass_by_name(KlassHandle& accessing_klass,
+  static KlassHandle get_klass_by_name(KlassHandle accessing_klass,
                              Symbol* klass_name,
                              bool require_local);
 
@@ -86,12 +86,12 @@
   static KlassHandle   get_klass_by_index(const constantPoolHandle& cpool,
                                 int klass_index,
                                 bool& is_accessible,
-                                KlassHandle& loading_klass);
-  static void   get_field_by_index(instanceKlassHandle& loading_klass, fieldDescriptor& fd,
+                                KlassHandle loading_klass);
+  static void   get_field_by_index(instanceKlassHandle loading_klass, fieldDescriptor& fd,
                                 int field_index);
   static methodHandle  get_method_by_index(const constantPoolHandle& cpool,
                                  int method_index, Bytecodes::Code bc,
-                                 instanceKlassHandle& loading_klass);
+                                 instanceKlassHandle loading_klass);
 
   JVMCIEnv(CompileTask* task, int system_dictionary_modification_counter);
 
@@ -112,17 +112,17 @@
   static KlassHandle   get_klass_by_index_impl(const constantPoolHandle& cpool,
                                      int klass_index,
                                      bool& is_accessible,
-                                     KlassHandle& loading_klass);
-  static void   get_field_by_index_impl(instanceKlassHandle& loading_klass, fieldDescriptor& fd,
+                                     KlassHandle loading_klass);
+  static void   get_field_by_index_impl(instanceKlassHandle loading_klass, fieldDescriptor& fd,
                                      int field_index);
   static methodHandle  get_method_by_index_impl(const constantPoolHandle& cpool,
                                       int method_index, Bytecodes::Code bc,
-                                      instanceKlassHandle& loading_klass);
+                                      instanceKlassHandle loading_klass);
 
   // Helper methods
   static bool       check_klass_accessibility(KlassHandle accessing_klass, KlassHandle resolved_klass);
-  static methodHandle  lookup_method(instanceKlassHandle&  accessor,
-                           instanceKlassHandle&  holder,
+  static methodHandle  lookup_method(instanceKlassHandle  accessor,
+                           instanceKlassHandle  holder,
                            Symbol*         name,
                            Symbol*         sig,
                            Bytecodes::Code bc);
@@ -142,7 +142,7 @@
 
   // Register the result of a compilation.
   static JVMCIEnv::CodeInstallResult register_method(
-                       methodHandle&             target,
+                       const methodHandle&       target,
                        nmethod*&                 nm,
                        int                       entry_bci,
                        CodeOffsets*              offsets,
@@ -166,7 +166,7 @@
   // InstanceKlass*.  This is needed since the holder of a method in
   // the bytecodes could be an array type.  Basically this converts
   // array types into java/lang/Object and other types stay as they are.
-  static instanceKlassHandle get_instance_klass_for_declared_method_holder(KlassHandle& klass);
+  static instanceKlassHandle get_instance_klass_for_declared_method_holder(KlassHandle klass);
 };
 
 #endif // SHARE_VM_JVMCI_JVMCIENV_HPP
--- a/hotspot/src/share/vm/jvmci/jvmciJavaClasses.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/jvmciJavaClasses.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -315,10 +315,10 @@
 #define FIELD(name, type, accessor, cast)                                                                                                                         \
     static int _##name##_offset;                                                                                                                                  \
     static type name(oop obj)                   { check(obj, #name, _##name##_offset); return cast obj->accessor(_##name##_offset); }                                               \
-    static type name(Handle& obj)                { check(obj(), #name, _##name##_offset); return cast obj->accessor(_##name##_offset); }                                            \
+    static type name(Handle obj)                { check(obj(), #name, _##name##_offset); return cast obj->accessor(_##name##_offset); }                                             \
     static type name(jobject obj)               { check(JNIHandles::resolve(obj), #name, _##name##_offset); return cast JNIHandles::resolve(obj)->accessor(_##name##_offset); }     \
     static void set_##name(oop obj, type x)     { check(obj, #name, _##name##_offset); obj->accessor##_put(_##name##_offset, x); }                                                  \
-    static void set_##name(Handle& obj, type x)  { check(obj(), #name, _##name##_offset); obj->accessor##_put(_##name##_offset, x); }                                               \
+    static void set_##name(Handle obj, type x)  { check(obj(), #name, _##name##_offset); obj->accessor##_put(_##name##_offset, x); }                                                \
     static void set_##name(jobject obj, type x) { check(JNIHandles::resolve(obj), #name, _##name##_offset); JNIHandles::resolve(obj)->accessor##_put(_##name##_offset, x); }
 
 #define EMPTY_CAST
--- a/hotspot/src/share/vm/jvmci/jvmciRuntime.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/jvmciRuntime.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -59,7 +59,11 @@
 static const char* OPTION_PREFIX = "jvmci.option.";
 static const size_t OPTION_PREFIX_LEN = strlen(OPTION_PREFIX);
 
-BasicType JVMCIRuntime::kindToBasicType(jchar ch) {
+BasicType JVMCIRuntime::kindToBasicType(Handle kind, TRAPS) {
+  if (kind.is_null()) {
+    THROW_(vmSymbols::java_lang_NullPointerException(), T_ILLEGAL);
+  }
+  jchar ch = JavaKind::typeChar(kind);
   switch(ch) {
     case 'z': return T_BOOLEAN;
     case 'b': return T_BYTE;
@@ -72,10 +76,8 @@
     case 'a': return T_OBJECT;
     case '-': return T_ILLEGAL;
     default:
-      fatal("unexpected Kind: %c", ch);
-      break;
+      JVMCI_ERROR_(T_ILLEGAL, "unexpected Kind: %c", ch);
   }
-  return T_ILLEGAL;
 }
 
 // Simple helper to see if the caller of a runtime stub which
--- a/hotspot/src/share/vm/jvmci/jvmciRuntime.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/jvmciRuntime.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -29,6 +29,17 @@
 #include "runtime/arguments.hpp"
 #include "runtime/deoptimization.hpp"
 
+#define JVMCI_ERROR(...)       \
+  { Exceptions::fthrow(THREAD_AND_LOCATION, vmSymbols::jdk_vm_ci_common_JVMCIError(), __VA_ARGS__); return; }
+
+#define JVMCI_ERROR_(ret, ...) \
+  { Exceptions::fthrow(THREAD_AND_LOCATION, vmSymbols::jdk_vm_ci_common_JVMCIError(), __VA_ARGS__); return ret; }
+
+#define JVMCI_ERROR_0(...)    JVMCI_ERROR_(0, __VA_ARGS__)
+#define JVMCI_ERROR_NULL(...) JVMCI_ERROR_(NULL, __VA_ARGS__)
+#define JVMCI_ERROR_OK(...)   JVMCI_ERROR_(JVMCIEnv::ok, __VA_ARGS__)
+#define CHECK_OK              CHECK_(JVMCIEnv::ok)
+
 class ParseClosure : public StackObj {
   int _lineNo;
   char* _filename;
@@ -171,7 +182,7 @@
   } \
   (void)(0
 
-  static BasicType kindToBasicType(jchar ch);
+  static BasicType kindToBasicType(Handle kind, TRAPS);
 
   // The following routines are all called from compiled JVMCI code
 
--- a/hotspot/src/share/vm/jvmci/vmSymbols_jvmci.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/jvmci/vmSymbols_jvmci.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -86,6 +86,7 @@
   template(jdk_vm_ci_code_VirtualObject,                          "jdk/vm/ci/code/VirtualObject")                          \
   template(jdk_vm_ci_code_RegisterSaveLayout,                     "jdk/vm/ci/code/RegisterSaveLayout")                     \
   template(jdk_vm_ci_code_InvalidInstalledCodeException,          "jdk/vm/ci/code/InvalidInstalledCodeException")          \
+  template(jdk_vm_ci_common_JVMCIError,                           "jdk/vm/ci/common/JVMCIError")                           \
   template(compileMethod_name,                                    "compileMethod")                                         \
   template(compileMethod_signature,                               "(Ljdk/vm/ci/hotspot/HotSpotResolvedJavaMethod;IJI)V")   \
   template(fromMetaspace_name,                                    "fromMetaspace")                                         \
--- a/hotspot/src/share/vm/memory/heap.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/memory/heap.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -47,7 +47,10 @@
   _freelist_segments            = 0;
   _freelist_length              = 0;
   _max_allocated_capacity       = 0;
-  _was_full                     = false;
+  _blob_count                   = 0;
+  _nmethod_count                = 0;
+  _adapter_count                = 0;
+  _full_count                   = 0;
 }
 
 
@@ -185,6 +188,7 @@
     assert(!block->free(), "must be marked free");
     DEBUG_ONLY(memset((void*)block->allocated_space(), badCodeHeapNewVal, instance_size));
     _max_allocated_capacity = MAX2(_max_allocated_capacity, allocated_capacity());
+    _blob_count++;
     return block->allocated_space();
   }
 
@@ -198,6 +202,7 @@
     _next_segment += number_of_segments;
     DEBUG_ONLY(memset((void *)b->allocated_space(), badCodeHeapNewVal, instance_size));
     _max_allocated_capacity = MAX2(_max_allocated_capacity, allocated_capacity());
+    _blob_count++;
     return b->allocated_space();
   } else {
     return NULL;
--- a/hotspot/src/share/vm/memory/heap.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/memory/heap.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -100,7 +100,11 @@
 
   const char*  _name;                            // Name of the CodeHeap
   const int    _code_blob_type;                  // CodeBlobType it contains
-  bool         _was_full;                        // True if the code heap was full
+  int          _blob_count;                      // Number of CodeBlobs
+  int          _nmethod_count;                   // Number of nmethods
+  int          _adapter_count;                   // Number of adapters
+  int          _full_count;                      // Number of times the code heap was full
+
 
   enum { free_sentinel = 0xFF };
 
@@ -179,8 +183,13 @@
 
   // Debugging / Profiling
   const char* name() const                       { return _name; }
-  bool was_full()                                { return _was_full; }
-  void report_full()                             { _was_full = true; }
+  int         blob_count()                       { return _blob_count; }
+  int         nmethod_count()                    { return _nmethod_count; }
+  void    set_nmethod_count(int count)           {        _nmethod_count = count; }
+  int         adapter_count()                    { return _adapter_count; }
+  void    set_adapter_count(int count)           {        _adapter_count = count; }
+  int         full_count()                       { return _full_count; }
+  void        report_full()                      {        _full_count++; }
 
 private:
   size_t heap_unallocated_capacity() const;
--- a/hotspot/src/share/vm/oops/method.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/oops/method.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -579,12 +579,45 @@
 }
 
 bool Method::is_accessor() const {
+  return is_getter() || is_setter();
+}
+
+bool Method::is_getter() const {
   if (code_size() != 5) return false;
   if (size_of_parameters() != 1) return false;
-  if (java_code_at(0) != Bytecodes::_aload_0 ) return false;
+  if (java_code_at(0) != Bytecodes::_aload_0)  return false;
   if (java_code_at(1) != Bytecodes::_getfield) return false;
-  if (java_code_at(4) != Bytecodes::_areturn &&
-      java_code_at(4) != Bytecodes::_ireturn ) return false;
+  switch (java_code_at(4)) {
+    case Bytecodes::_ireturn:
+    case Bytecodes::_lreturn:
+    case Bytecodes::_freturn:
+    case Bytecodes::_dreturn:
+    case Bytecodes::_areturn:
+      break;
+    default:
+      return false;
+  }
+  return true;
+}
+
+bool Method::is_setter() const {
+  if (code_size() != 6) return false;
+  if (java_code_at(0) != Bytecodes::_aload_0) return false;
+  switch (java_code_at(1)) {
+    case Bytecodes::_iload_1:
+    case Bytecodes::_aload_1:
+    case Bytecodes::_fload_1:
+      if (size_of_parameters() != 2) return false;
+      break;
+    case Bytecodes::_dload_1:
+    case Bytecodes::_lload_1:
+      if (size_of_parameters() != 3) return false;
+      break;
+    default:
+      return false;
+  }
+  if (java_code_at(2) != Bytecodes::_putfield) return false;
+  if (java_code_at(5) != Bytecodes::_return)   return false;
   return true;
 }
 
--- a/hotspot/src/share/vm/oops/method.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/oops/method.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -595,6 +595,12 @@
   // returns true if the method is an accessor function (setter/getter).
   bool is_accessor() const;
 
+  // returns true if the method is a getter
+  bool is_getter() const;
+
+  // returns true if the method is a setter
+  bool is_setter() const;
+
   // returns true if the method does nothing but return a constant of primitive type
   bool is_constant_getter() const;
 
--- a/hotspot/src/share/vm/opto/buildOopMap.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/buildOopMap.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -542,10 +542,11 @@
     if (i == cfg->number_of_blocks()) {
       break;                    // Got 'em all
     }
-#ifndef PRODUCT
-    if( PrintOpto && Verbose )
+
+    if (PrintOpto && Verbose) {
       tty->print_cr("retripping live calc");
-#endif
+    }
+
     // Force the issue (expensively): recheck everybody
     for (i = 1; i < cfg->number_of_blocks(); i++) {
       worklist->push(cfg->get_block(i));
--- a/hotspot/src/share/vm/opto/c2_globals.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -186,9 +186,9 @@
           "Maximum number of unrolls for main loop")                        \
           range(0, max_jint)                                                \
                                                                             \
-  product(bool,  SuperWordLoopUnrollAnalysis, false,                        \
-          "Map number of unrolls for main loop via "                        \
-          "Superword Level Parallelism analysis")                           \
+  product_pd(bool,  SuperWordLoopUnrollAnalysis,                            \
+           "Map number of unrolls for main loop via "                       \
+           "Superword Level Parallelism analysis")                          \
                                                                             \
   notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false,                 \
           "Trace what Superword Level Parallelism analysis applies")        \
--- a/hotspot/src/share/vm/opto/callnode.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/callnode.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -778,7 +778,7 @@
     }
     if (is_CallJava() && as_CallJava()->method() != NULL) {
       ciMethod* meth = as_CallJava()->method();
-      if (meth->is_accessor()) {
+      if (meth->is_getter()) {
         return false;
       }
       // May modify (by reflection) if an boxing object is passed
--- a/hotspot/src/share/vm/opto/cfgnode.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/cfgnode.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -270,7 +270,6 @@
   virtual uint size_of() const { return sizeof(*this); }
 
 private:
-  ProjNode* range_check_trap_proj(int& flip, Node*& l, Node*& r);
   ProjNode* range_check_trap_proj() {
     int flip_test = 0;
     Node* l = NULL;
@@ -283,7 +282,7 @@
   bool is_ctrl_folds(Node* ctrl, PhaseIterGVN* igvn);
   bool has_shared_region(ProjNode* proj, ProjNode*& success, ProjNode*& fail);
   bool has_only_uncommon_traps(ProjNode* proj, ProjNode*& success, ProjNode*& fail, PhaseIterGVN* igvn);
-  static void merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn);
+  Node* merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn);
   static void improve_address_types(Node* l, Node* r, ProjNode* fail, PhaseIterGVN* igvn);
   bool is_cmp_with_loadrange(ProjNode* proj);
   bool is_null_check(ProjNode* proj, PhaseIterGVN* igvn);
@@ -292,6 +291,12 @@
   ProjNode* uncommon_trap_proj(CallStaticJavaNode*& call) const;
   bool fold_compares_helper(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn);
 
+protected:
+  ProjNode* range_check_trap_proj(int& flip, Node*& l, Node*& r);
+  Node* Ideal_common(PhaseGVN *phase, bool can_reshape);
+  Node* dominated_by(Node* prev_dom, PhaseIterGVN* igvn);
+  Node* search_identical(int dist);
+
 public:
 
   // Degrees of branch prediction probability by order of magnitude:
@@ -375,8 +380,6 @@
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual int required_outcnt() const { return 2; }
   virtual const RegMask &out_RegMask() const;
-  void dominated_by(Node* prev_dom, PhaseIterGVN* igvn);
-  int is_range_check(Node* &range, Node* &index, jint &offset);
   Node* fold_compares(PhaseIterGVN* phase);
   static Node* up_one_dom(Node* curr, bool linear_only = false);
 
@@ -391,6 +394,20 @@
 #endif
 };
 
+class RangeCheckNode : public IfNode {
+private:
+  int is_range_check(Node* &range, Node* &index, jint &offset);
+
+public:
+  RangeCheckNode(Node* control, Node *b, float p, float fcnt)
+    : IfNode(control, b, p, fcnt) {
+    init_class_id(Class_RangeCheck);
+  }
+
+  virtual int Opcode() const;
+  virtual Node* Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
 class IfProjNode : public CProjNode {
 public:
   IfProjNode(IfNode *ifnode, uint idx) : CProjNode(ifnode,idx) {}
--- a/hotspot/src/share/vm/opto/classes.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/classes.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -138,6 +138,7 @@
 macro(Halt)
 macro(HasNegatives)
 macro(If)
+macro(RangeCheck)
 macro(IfFalse)
 macro(IfTrue)
 macro(Initialize)
--- a/hotspot/src/share/vm/opto/compile.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/compile.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -707,7 +707,7 @@
     _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
   }
 #endif
-  set_print_inlining(directive->PrintInliningOption NOT_PRODUCT( || PrintOptoInlining));
+  set_print_inlining(directive->PrintInliningOption || PrintOptoInlining);
   set_print_intrinsics(directive->PrintIntrinsicsOption);
   set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
 
@@ -3181,6 +3181,13 @@
       n->set_req(MemBarNode::Precedent, top());
     }
     break;
+  case Op_RangeCheck: {
+    RangeCheckNode* rc = n->as_RangeCheck();
+    Node* iff = new IfNode(rc->in(0), rc->in(1), rc->_prob, rc->_fcnt);
+    n->subsume_by(iff, this);
+    frc._tests.push(iff);
+    break;
+  }
   default:
     assert( !n->is_Call(), "" );
     assert( !n->is_Mem(), "" );
@@ -3189,8 +3196,9 @@
   }
 
   // Collect CFG split points
-  if (n->is_MultiBranch())
+  if (n->is_MultiBranch() && !n->is_RangeCheck()) {
     frc._tests.push(n);
+  }
 }
 
 //------------------------------final_graph_reshaping_walk---------------------
--- a/hotspot/src/share/vm/opto/doCall.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/doCall.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -45,7 +45,7 @@
   if (TraceTypeProfile || C->print_inlining()) {
     outputStream* out = tty;
     if (!C->print_inlining()) {
-      if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) {
+      if (!PrintOpto && !PrintCompilation) {
         method->print_short_name();
         tty->cr();
       }
@@ -426,12 +426,10 @@
   // uncommon-trap when callee is unloaded, uninitialized or will not link
   // bailout when too many arguments for register representation
   if (!will_link || can_not_compile_call_site(orig_callee, klass)) {
-#ifndef PRODUCT
     if (PrintOpto && (Verbose || WizardMode)) {
       method()->print_name(); tty->print_cr(" can not compile call at bci %d to:", bci());
       orig_callee->print_name(); tty->cr();
     }
-#endif
     return;
   }
   assert(holder_klass->is_loaded(), "");
@@ -634,12 +632,10 @@
     // If the return type of the method is not loaded, assert that the
     // value we got is a null.  Otherwise, we need to recompile.
     if (!rtype->is_loaded()) {
-#ifndef PRODUCT
       if (PrintOpto && (Verbose || WizardMode)) {
         method()->print_name(); tty->print_cr(" asserting nullness of result at bci: %d", bci());
         cg->method()->print_name(); tty->cr();
       }
-#endif
       if (C->log() != NULL) {
         C->log()->elem("assert_null reason='return' klass='%d'",
                        C->log()->identify(rtype));
@@ -851,11 +847,9 @@
 
     if (remaining == 1) {
       push_ex_oop(ex_node);        // Push exception oop for handler
-#ifndef PRODUCT
       if (PrintOpto && WizardMode) {
         tty->print_cr("  Catching every inline exception bci:%d -> handler_bci:%d", bci(), handler_bci);
       }
-#endif
       merge_exception(handler_bci); // jump to handler
       return;                   // No more handling to be done here!
     }
@@ -882,13 +876,11 @@
       assert(klass->has_subklass() || tinst->klass_is_exact(), "lost exactness");
       Node* ex_oop = _gvn.transform(new CheckCastPPNode(control(), ex_node, tinst));
       push_ex_oop(ex_oop);      // Push exception oop for handler
-#ifndef PRODUCT
       if (PrintOpto && WizardMode) {
         tty->print("  Catching inline exception bci:%d -> handler_bci:%d -- ", bci(), handler_bci);
         klass->print_name();
         tty->cr();
       }
-#endif
       merge_exception(handler_bci);
     }
     set_control(not_subtype_ctrl);
@@ -1067,13 +1059,11 @@
     // such method can be changed when its class is redefined.
     ciMethod* exact_method = callee->resolve_invoke(calling_klass, actual_receiver);
     if (exact_method != NULL) {
-#ifndef PRODUCT
       if (PrintOpto) {
         tty->print("  Calling method via exact type @%d --- ", bci);
         exact_method->print_name();
         tty->cr();
       }
-#endif
       return exact_method;
     }
   }
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -1457,7 +1457,11 @@
 // factory methods in "int adr_idx"
 Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
                           int adr_idx,
-                          MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency, bool require_atomic_access) {
+                          MemNode::MemOrd mo,
+                          LoadNode::ControlDependency control_dependency,
+                          bool require_atomic_access,
+                          bool unaligned,
+                          bool mismatched) {
   assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" );
   const TypePtr* adr_type = NULL; // debug-mode-only argument
   debug_only(adr_type = C->get_adr_type(adr_idx));
@@ -1470,6 +1474,12 @@
   } else {
     ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency);
   }
+  if (unaligned) {
+    ld->as_Load()->set_unaligned_access();
+  }
+  if (mismatched) {
+    ld->as_Load()->set_mismatched_access();
+  }
   ld = _gvn.transform(ld);
   if ((bt == T_OBJECT) && C->do_escape_analysis() || C->eliminate_boxing()) {
     // Improve graph before escape analysis and boxing elimination.
@@ -1481,7 +1491,9 @@
 Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt,
                                 int adr_idx,
                                 MemNode::MemOrd mo,
-                                bool require_atomic_access) {
+                                bool require_atomic_access,
+                                bool unaligned,
+                                bool mismatched) {
   assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
   const TypePtr* adr_type = NULL;
   debug_only(adr_type = C->get_adr_type(adr_idx));
@@ -1494,6 +1506,12 @@
   } else {
     st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo);
   }
+  if (unaligned) {
+    st->as_Store()->set_unaligned_access();
+  }
+  if (mismatched) {
+    st->as_Store()->set_mismatched_access();
+  }
   st = _gvn.transform(st);
   set_memory(st, adr_idx);
   // Back-to-back stores can only remove intermediate store with DU info
@@ -1587,7 +1605,8 @@
                           const TypeOopPtr* val_type,
                           BasicType bt,
                           bool use_precise,
-                          MemNode::MemOrd mo) {
+                          MemNode::MemOrd mo,
+                          bool mismatched) {
   // Transformation of a value which could be NULL pointer (CastPP #NULL)
   // could be delayed during Parse (for example, in adjust_map_after_if()).
   // Execute transformation here to avoid barrier generation in such case.
@@ -1607,7 +1626,7 @@
               NULL /* pre_val */,
               bt);
 
-  Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo);
+  Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo, mismatched);
   post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise);
   return store;
 }
@@ -1619,7 +1638,8 @@
                              const TypePtr* adr_type,
                              Node* val,
                              BasicType bt,
-                             MemNode::MemOrd mo) {
+                             MemNode::MemOrd mo,
+                             bool mismatched) {
   Compile::AliasType* at = C->alias_type(adr_type);
   const TypeOopPtr* val_type = NULL;
   if (adr_type->isa_instptr()) {
@@ -1638,7 +1658,7 @@
   if (val_type == NULL) {
     val_type = TypeInstPtr::BOTTOM;
   }
-  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo);
+  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo, mismatched);
 }
 
 
--- a/hotspot/src/share/vm/opto/graphKit.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/graphKit.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -513,23 +513,28 @@
   // of volatile fields.
   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
-                  bool require_atomic_access = false) {
+                  bool require_atomic_access = false, bool unaligned = false,
+                  bool mismatched = false) {
     // This version computes alias_index from bottom_type
     return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(),
-                     mo, control_dependency, require_atomic_access);
+                     mo, control_dependency, require_atomic_access,
+                     unaligned, mismatched);
   }
   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type,
                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
-                  bool require_atomic_access = false) {
+                  bool require_atomic_access = false, bool unaligned = false,
+                  bool mismatched = false) {
     // This version computes alias_index from an address type
     assert(adr_type != NULL, "use other make_load factory");
     return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type),
-                     mo, control_dependency, require_atomic_access);
+                     mo, control_dependency, require_atomic_access,
+                     unaligned, mismatched);
   }
   // This is the base version which is given an alias index.
   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx,
                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
-                  bool require_atomic_access = false);
+                  bool require_atomic_access = false, bool unaligned = false,
+                  bool mismatched = false);
 
   // Create & transform a StoreNode and store the effect into the
   // parser's memory state.
@@ -542,19 +547,24 @@
   Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt,
                         const TypePtr* adr_type,
                         MemNode::MemOrd mo,
-                        bool require_atomic_access = false) {
+                        bool require_atomic_access = false,
+                        bool unaligned = false,
+                        bool mismatched = false) {
     // This version computes alias_index from an address type
     assert(adr_type != NULL, "use other store_to_memory factory");
     return store_to_memory(ctl, adr, val, bt,
                            C->get_alias_index(adr_type),
-                           mo, require_atomic_access);
+                           mo, require_atomic_access,
+                           unaligned, mismatched);
   }
   // This is the base version which is given alias index
   // Return the new StoreXNode
   Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt,
                         int adr_idx,
                         MemNode::MemOrd,
-                        bool require_atomic_access = false);
+                        bool require_atomic_access = false,
+                        bool unaligned = false,
+                        bool mismatched = false);
 
 
   // All in one pre-barrier, store, post_barrier
@@ -577,7 +587,8 @@
                   const TypeOopPtr* val_type,
                   BasicType bt,
                   bool use_precise,
-                  MemNode::MemOrd mo);
+                  MemNode::MemOrd mo,
+                  bool mismatched = false);
 
   Node* store_oop_to_object(Node* ctl,
                             Node* obj,   // containing obj
@@ -608,7 +619,8 @@
                              const TypePtr* adr_type,
                              Node* val,
                              BasicType bt,
-                             MemNode::MemOrd mo);
+                             MemNode::MemOrd mo,
+                             bool mismatched = false);
 
   // For the few case where the barriers need special help
   void pre_barrier(bool do_load, Node* ctl,
--- a/hotspot/src/share/vm/opto/idealKit.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/idealKit.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -368,7 +368,8 @@
 
 Node* IdealKit::store(Node* ctl, Node* adr, Node *val, BasicType bt,
                       int adr_idx,
-                      MemNode::MemOrd mo, bool require_atomic_access) {
+                      MemNode::MemOrd mo, bool require_atomic_access,
+                      bool mismatched) {
   assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory");
   const TypePtr* adr_type = NULL;
   debug_only(adr_type = C->get_adr_type(adr_idx));
@@ -379,6 +380,9 @@
   } else {
     st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo);
   }
+  if (mismatched) {
+    st->as_Store()->set_mismatched_access();
+  }
   st = transform(st);
   set_memory(st, adr_idx);
 
--- a/hotspot/src/share/vm/opto/idealKit.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/idealKit.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -229,7 +229,9 @@
               BasicType bt,
               int adr_idx,
               MemNode::MemOrd mo,
-              bool require_atomic_access = false);
+              bool require_atomic_access = false,
+              bool mismatched = false
+              );
 
   // Store a card mark ordered after store_oop
   Node* storeCM(Node* ctl,
--- a/hotspot/src/share/vm/opto/ifnode.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/ifnode.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "ci/ciTypeFlow.hpp"
 #include "memory/allocation.inline.hpp"
 #include "opto/addnode.hpp"
 #include "opto/castnode.hpp"
@@ -305,12 +306,16 @@
   Node *b_c = phase->transform(new BoolNode(cmp_c,b->_test._test));
   Node *b_x = phase->transform(new BoolNode(cmp_x,b->_test._test));
   // Make the IfNode
-  IfNode *iff_c = new IfNode(region_c,b_c,iff->_prob,iff->_fcnt);
+  IfNode* iff_c = iff->clone()->as_If();
+  iff_c->set_req(0, region_c);
+  iff_c->set_req(1, b_c);
   igvn->set_type_bottom(iff_c);
   igvn->_worklist.push(iff_c);
   hook->init_req(2, iff_c);
 
-  IfNode *iff_x = new IfNode(region_x,b_x,iff->_prob, iff->_fcnt);
+  IfNode* iff_x = iff->clone()->as_If();
+  iff_x->set_req(0, region_x);
+  iff_x->set_req(1, b_x);
   igvn->set_type_bottom(iff_x);
   igvn->_worklist.push(iff_x);
   hook->init_req(3, iff_x);
@@ -495,7 +500,7 @@
 // Return 0 if not a range check.  Return 1 if a range check and set index and
 // offset.  Return 2 if we had to negate the test.  Index is NULL if the check
 // is versus a constant.
-int IfNode::is_range_check(Node* &range, Node* &index, jint &offset) {
+int RangeCheckNode::is_range_check(Node* &range, Node* &index, jint &offset) {
   int flip_test = 0;
   Node* l = NULL;
   Node* r = NULL;
@@ -723,7 +728,7 @@
   return ctrl != NULL &&
     ctrl->is_Proj() &&
     ctrl->in(0) != NULL &&
-    ctrl->in(0)->is_If() &&
+    ctrl->in(0)->Opcode() == Op_If &&
     ctrl->in(0)->outcnt() == 2 &&
     ctrl->in(0)->as_If()->cmpi_folds(igvn) &&
     // Must compare same value
@@ -771,6 +776,11 @@
   CallStaticJavaNode* dom_unc = otherproj->is_uncommon_trap_proj(Deoptimization::Reason_none);
 
   if (otherproj->outcnt() == 1 && dom_unc != NULL) {
+    // We need to re-execute the folded Ifs after deoptimization from the merged traps
+    if (!dom_unc->jvms()->should_reexecute()) {
+      return false;
+    }
+
     CallStaticJavaNode* unc = NULL;
     ProjNode* unc_proj = uncommon_trap_proj(unc);
     if (unc_proj != NULL && unc_proj->outcnt() == 1) {
@@ -784,12 +794,37 @@
       } else if (dom_unc->in(0) != otherproj || unc->in(0) != unc_proj) {
         return false;
       }
+
+      // Different methods and methods containing jsrs are not supported.
+      ciMethod* method = unc->jvms()->method();
+      ciMethod* dom_method = dom_unc->jvms()->method();
+      if (method != dom_method || method->has_jsrs()) {
+        return false;
+      }
+      // Check that both traps are in the same activation of the method (instead
+      // of two activations being inlined through different call sites) by verifying
+      // that the call stacks are equal for both JVMStates.
+      JVMState* dom_caller = dom_unc->jvms()->caller();
+      JVMState* caller = unc->jvms()->caller();
+      if (!dom_caller->same_calls_as(caller)) {
+        return false;
+      }
+      // Check that the bci of the dominating uncommon trap dominates the bci
+      // of the dominated uncommon trap. Otherwise we may not re-execute
+      // the dominated check after deoptimization from the merged uncommon trap.
+      ciTypeFlow* flow = dom_method->get_flow_analysis();
+      int bci = unc->jvms()->bci();
+      int dom_bci = dom_unc->jvms()->bci();
+      if (!flow->is_dominated_by(bci, dom_bci)) {
+        return false;
+      }
+
       // See merge_uncommon_traps: the reason of the uncommon trap
       // will be changed and the state of the dominating If will be
       // used. Checked that we didn't apply this transformation in a
       // previous compilation and it didn't cause too many traps
-      if (!igvn->C->too_many_traps(dom_unc->jvms()->method(), dom_unc->jvms()->bci(), Deoptimization::Reason_unstable_fused_if) &&
-          !igvn->C->too_many_traps(dom_unc->jvms()->method(), dom_unc->jvms()->bci(), Deoptimization::Reason_range_check)) {
+      if (!igvn->C->too_many_traps(dom_method, dom_bci, Deoptimization::Reason_unstable_fused_if) &&
+          !igvn->C->too_many_traps(dom_method, dom_bci, Deoptimization::Reason_range_check)) {
         success = unc_proj;
         fail = unc_proj->other_if_proj();
         return true;
@@ -941,8 +976,8 @@
         if (failtype->_lo > failtype->_hi) {
           // previous if determines the result of this if so
           // replace Bool with constant
-          igvn->hash_delete(this);
-          set_req(1, igvn->intcon(success->_con));
+          igvn->_worklist.push(in(1));
+          igvn->replace_input_of(this, 1, igvn->intcon(success->_con));
           return true;
         }
       }
@@ -961,7 +996,8 @@
     Node* newbool = igvn->transform(new BoolNode(newcmp, cond));
 
     igvn->replace_input_of(dom_iff, 1, igvn->intcon(proj->_con));
-    set_req(1, newbool);
+    igvn->_worklist.push(in(1));
+    igvn->replace_input_of(this, 1, newbool);
 
     return true;
   }
@@ -971,7 +1007,10 @@
 // Merge the branches that trap for this If and the dominating If into
 // a single region that branches to the uncommon trap for the
 // dominating If
-void IfNode::merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn) {
+Node* IfNode::merge_uncommon_traps(ProjNode* proj, ProjNode* success, ProjNode* fail, PhaseIterGVN* igvn) {
+  Node* res = this;
+  assert(success->in(0) == this, "bad projection");
+
   ProjNode* otherproj = proj->other_if_proj();
 
   CallStaticJavaNode* unc = success->is_uncommon_trap_proj(Deoptimization::Reason_none);
@@ -1007,6 +1046,8 @@
     trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_range_check, action);
 
     improve_address_types(l, r, fail, igvn);
+
+    res = igvn->transform(new RangeCheckNode(in(0), in(1), _prob, _fcnt));
   } else if (unc != dom_unc) {
     // If we trap we won't know what CmpI would have caused the trap
     // so use a special trap reason to mark this pair of CmpI nodes as
@@ -1016,6 +1057,7 @@
     trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_unstable_fused_if, action);
   }
   igvn->replace_input_of(dom_unc, TypeFunc::Parms, igvn->intcon(trap_request));
+  return res;
 }
 
 // If we are turning 2 CmpI nodes into a CmpU that follows the pattern
@@ -1209,8 +1251,7 @@
       if (has_only_uncommon_traps(dom_cmp, success, fail, igvn) &&
           // Next call modifies graph so must be last
           fold_compares_helper(dom_cmp, success, fail, igvn)) {
-        merge_uncommon_traps(dom_cmp, success, fail, igvn);
-        return this;
+        return merge_uncommon_traps(dom_cmp, success, fail, igvn);
       }
       return NULL;
     } else if (ctrl->in(0) != NULL &&
@@ -1229,8 +1270,7 @@
           // Next call modifies graph so must be last
           fold_compares_helper(dom_cmp, success, fail, igvn)) {
         reroute_side_effect_free_unc(other_cmp, dom_cmp, igvn);
-        merge_uncommon_traps(dom_cmp, success, fail, igvn);
-        return this;
+        return merge_uncommon_traps(dom_cmp, success, fail, igvn);
       }
     }
   }
@@ -1311,14 +1351,10 @@
   jint off;
 };
 
-//------------------------------Ideal------------------------------------------
-// Return a node which is more "ideal" than the current node.  Strip out
-// control copies
-Node *IfNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+Node* IfNode::Ideal_common(PhaseGVN *phase, bool can_reshape) {
   if (remove_dead_region(phase, can_reshape))  return this;
   // No Def-Use info?
   if (!can_reshape)  return NULL;
-  PhaseIterGVN *igvn = phase->is_IterGVN();
 
   // Don't bother trying to transform a dead if
   if (in(0)->is_top())  return NULL;
@@ -1334,24 +1370,291 @@
   if (idt_if != NULL)  return idt_if;
 
   // Try to split the IF
+  PhaseIterGVN *igvn = phase->is_IterGVN();
   Node *s = split_if(this, igvn);
   if (s != NULL)  return s;
 
+  return NodeSentinel;
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Strip out
+// control copies
+Node* IfNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* res = Ideal_common(phase, can_reshape);
+  if (res != NodeSentinel) {
+    return res;
+  }
+
   // Check for people making a useless boolean: things like
   // if( (x < y ? true : false) ) { ... }
   // Replace with if( x < y ) { ... }
   Node *bol2 = remove_useless_bool(this, phase);
   if( bol2 ) return bol2;
 
+  if (in(0) == NULL) return NULL;     // Dead loop?
+
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  Node* result = fold_compares(igvn);
+  if (result != NULL) {
+    return result;
+  }
+
+  // Scan for an equivalent test
+  Node *cmp;
+  int dist = 0;               // Cutoff limit for search
+  int op = Opcode();
+  if( op == Op_If &&
+      (cmp=in(1)->in(1))->Opcode() == Op_CmpP ) {
+    if( cmp->in(2) != NULL && // make sure cmp is not already dead
+        cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) {
+      dist = 64;              // Limit for null-pointer scans
+    } else {
+      dist = 4;               // Do not bother for random pointer tests
+    }
+  } else {
+    dist = 4;                 // Limit for random junky scans
+  }
+
+  Node* prev_dom = search_identical(dist);
+
+  if (prev_dom == NULL) {
+    return NULL;
+  }
+
+  // Replace dominated IfNode
+  return dominated_by(prev_dom, igvn);
+}
+
+//------------------------------dominated_by-----------------------------------
+Node* IfNode::dominated_by(Node* prev_dom, PhaseIterGVN *igvn) {
+#ifndef PRODUCT
+  if (TraceIterativeGVN) {
+    tty->print("   Removing IfNode: "); this->dump();
+  }
+  if (VerifyOpto && !igvn->allow_progress()) {
+    // Found an equivalent dominating test,
+    // we can not guarantee reaching a fix-point for these during iterativeGVN
+    // since intervening nodes may not change.
+    return NULL;
+  }
+#endif
+
+  igvn->hash_delete(this);      // Remove self to prevent spurious V-N
+  Node *idom = in(0);
+  // Need opcode to decide which way 'this' test goes
+  int prev_op = prev_dom->Opcode();
+  Node *top = igvn->C->top(); // Shortcut to top
+
+  // Loop predicates may have depending checks which should not
+  // be skipped. For example, range check predicate has two checks
+  // for lower and upper bounds.
+  ProjNode* unc_proj = proj_out(1 - prev_dom->as_Proj()->_con)->as_Proj();
+  if (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL)
+   prev_dom = idom;
+
+  // Now walk the current IfNode's projections.
+  // Loop ends when 'this' has no more uses.
+  for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) {
+    Node *ifp = last_out(i);     // Get IfTrue/IfFalse
+    igvn->add_users_to_worklist(ifp);
+    // Check which projection it is and set target.
+    // Data-target is either the dominating projection of the same type
+    // or TOP if the dominating projection is of opposite type.
+    // Data-target will be used as the new control edge for the non-CFG
+    // nodes like Casts and Loads.
+    Node *data_target = (ifp->Opcode() == prev_op) ? prev_dom : top;
+    // Control-target is just the If's immediate dominator or TOP.
+    Node *ctrl_target = (ifp->Opcode() == prev_op) ?     idom : top;
+
+    // For each child of an IfTrue/IfFalse projection, reroute.
+    // Loop ends when projection has no more uses.
+    for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) {
+      Node* s = ifp->last_out(j);   // Get child of IfTrue/IfFalse
+      if( !s->depends_only_on_test() ) {
+        // Find the control input matching this def-use edge.
+        // For Regions it may not be in slot 0.
+        uint l;
+        for( l = 0; s->in(l) != ifp; l++ ) { }
+        igvn->replace_input_of(s, l, ctrl_target);
+      } else {                      // Else, for control producers,
+        igvn->replace_input_of(s, 0, data_target); // Move child to data-target
+      }
+    } // End for each child of a projection
+
+    igvn->remove_dead_node(ifp);
+  } // End for each IfTrue/IfFalse child of If
+
+  // Kill the IfNode
+  igvn->remove_dead_node(this);
+
+  // Must return either the original node (now dead) or a new node
+  // (Do not return a top here, since that would break the uniqueness of top.)
+  return new ConINode(TypeInt::ZERO);
+}
+
+Node* IfNode::search_identical(int dist) {
   // Setup to scan up the CFG looking for a dominating test
-  Node *dom = in(0);
-  Node *prev_dom = this;
+  Node* dom = in(0);
+  Node* prev_dom = this;
+  int op = Opcode();
+  // Search up the dominator tree for an If with an identical test
+  while( dom->Opcode() != op    ||  // Not same opcode?
+         dom->in(1)    != in(1) ||  // Not same input 1?
+         (req() == 3 && dom->in(2) != in(2)) || // Not same input 2?
+         prev_dom->in(0) != dom ) { // One path of test does not dominate?
+    if( dist < 0 ) return NULL;
+
+    dist--;
+    prev_dom = dom;
+    dom = up_one_dom( dom );
+    if( !dom ) return NULL;
+  }
+
+  // Check that we did not follow a loop back to ourselves
+  if( this == dom )
+    return NULL;
+
+  if( dist > 2 )              // Add to count of NULL checks elided
+    explicit_null_checks_elided++;
+
+  return prev_dom;
+}
+
+//------------------------------Identity---------------------------------------
+// If the test is constant & we match, then we are the input Control
+Node *IfProjNode::Identity(PhaseTransform *phase) {
+  // Can only optimize if cannot go the other way
+  const TypeTuple *t = phase->type(in(0))->is_tuple();
+  if (t == TypeTuple::IFNEITHER ||
+      // kill dead branch first otherwise the IfNode's control will
+      // have 2 control uses (the IfNode that doesn't go away because
+      // it still has uses and this branch of the
+      // If). Node::has_special_unique_user() will cause this node to
+      // be reprocessed once the dead branch is killed.
+      (always_taken(t) && in(0)->outcnt() == 1)) {
+    // IfNode control
+    return in(0)->in(0);
+  }
+  // no progress
+  return this;
+}
+
+#ifndef PRODUCT
+//-------------------------------related---------------------------------------
+// An IfProjNode's related node set consists of its input (an IfNode) including
+// the IfNode's condition, plus all of its outputs at level 1. In compact mode,
+// the restrictions for IfNode apply (see IfNode::rel).
+void IfProjNode::related(GrowableArray<Node*> *in_rel, GrowableArray<Node*> *out_rel, bool compact) const {
+  Node* ifNode = this->in(0);
+  in_rel->append(ifNode);
+  if (compact) {
+    ifNode->collect_nodes(in_rel, 3, false, true);
+  } else {
+    ifNode->collect_nodes_in_all_data(in_rel, false);
+  }
+  this->collect_nodes(out_rel, -1, false, false);
+}
+
+//------------------------------dump_spec--------------------------------------
+void IfNode::dump_spec(outputStream *st) const {
+  st->print("P=%f, C=%f",_prob,_fcnt);
+}
+
+//-------------------------------related---------------------------------------
+// For an IfNode, the set of related output nodes is just the output nodes till
+// depth 2, i.e, the IfTrue/IfFalse projection nodes plus the nodes they refer.
+// The related input nodes contain no control nodes, but all data nodes
+// pertaining to the condition. In compact mode, the input nodes are collected
+// up to a depth of 3.
+void IfNode::related(GrowableArray <Node *> *in_rel, GrowableArray <Node *> *out_rel, bool compact) const {
+  if (compact) {
+    this->collect_nodes(in_rel, 3, false, true);
+  } else {
+    this->collect_nodes_in_all_data(in_rel, false);
+  }
+  this->collect_nodes(out_rel, -2, false, false);
+}
+#endif
+
+//------------------------------idealize_test----------------------------------
+// Try to canonicalize tests better.  Peek at the Cmp/Bool/If sequence and
+// come up with a canonical sequence.  Bools getting 'eq', 'gt' and 'ge' forms
+// converted to 'ne', 'le' and 'lt' forms.  IfTrue/IfFalse get swapped as
+// needed.
+static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff) {
+  assert(iff->in(0) != NULL, "If must be live");
+
+  if (iff->outcnt() != 2)  return NULL; // Malformed projections.
+  Node* old_if_f = iff->proj_out(false);
+  Node* old_if_t = iff->proj_out(true);
+
+  // CountedLoopEnds want the back-control test to be TRUE, irregardless of
+  // whether they are testing a 'gt' or 'lt' condition.  The 'gt' condition
+  // happens in count-down loops
+  if (iff->is_CountedLoopEnd())  return NULL;
+  if (!iff->in(1)->is_Bool())  return NULL; // Happens for partially optimized IF tests
+  BoolNode *b = iff->in(1)->as_Bool();
+  BoolTest bt = b->_test;
+  // Test already in good order?
+  if( bt.is_canonical() )
+    return NULL;
+
+  // Flip test to be canonical.  Requires flipping the IfFalse/IfTrue and
+  // cloning the IfNode.
+  Node* new_b = phase->transform( new BoolNode(b->in(1), bt.negate()) );
+  if( !new_b->is_Bool() ) return NULL;
+  b = new_b->as_Bool();
+
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  assert( igvn, "Test is not canonical in parser?" );
+
+  // The IF node never really changes, but it needs to be cloned
+  iff = iff->clone()->as_If();
+  iff->set_req(1, b);
+  iff->_prob = 1.0-iff->_prob;
+
+  Node *prior = igvn->hash_find_insert(iff);
+  if( prior ) {
+    igvn->remove_dead_node(iff);
+    iff = (IfNode*)prior;
+  } else {
+    // Cannot call transform on it just yet
+    igvn->set_type_bottom(iff);
+  }
+  igvn->_worklist.push(iff);
+
+  // Now handle projections.  Cloning not required.
+  Node* new_if_f = (Node*)(new IfFalseNode( iff ));
+  Node* new_if_t = (Node*)(new IfTrueNode ( iff ));
+
+  igvn->register_new_node_with_optimizer(new_if_f);
+  igvn->register_new_node_with_optimizer(new_if_t);
+  // Flip test, so flip trailing control
+  igvn->replace_node(old_if_f, new_if_t);
+  igvn->replace_node(old_if_t, new_if_f);
+
+  // Progress
+  return iff;
+}
+
+Node* RangeCheckNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* res = Ideal_common(phase, can_reshape);
+  if (res != NodeSentinel) {
+    return res;
+  }
+
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  // Setup to scan up the CFG looking for a dominating test
+  Node* prev_dom = this;
 
   // Check for range-check vs other kinds of tests
-  Node *index1, *range1;
+  Node* index1;
+  Node* range1;
   jint offset1;
   int flip1 = is_range_check(range1, index1, offset1);
-  if( flip1 ) {
+  if (flip1) {
+    Node* dom = in(0);
     // Try to remove extra range checks.  All 'up_one_dom' gives up at merges
     // so all checks we inspect post-dominate the top-most check we find.
     // If we are going to fail the current check and we reach the top check
@@ -1372,13 +1675,14 @@
 
     // Scan for the top checks and collect range of offsets
     for (int dist = 0; dist < 999; dist++) { // Range-Check scan limit
-      if (dom->Opcode() == Op_If &&  // Not same opcode?
+      if (dom->Opcode() == Op_RangeCheck &&  // Not same opcode?
           prev_dom->in(0) == dom) { // One path of test does dominate?
         if (dom == this) return NULL; // dead loop
         // See if this is a range check
-        Node *index2, *range2;
+        Node* index2;
+        Node* range2;
         jint offset2;
-        int flip2 = dom->as_If()->is_range_check(range2, index2, offset2);
+        int flip2 = dom->as_RangeCheck()->is_range_check(range2, index2, offset2);
         // See if this is a _matching_ range check, checking against
         // the same array bounds.
         if (flip2 == flip1 && range2 == range1 && index2 == index1 &&
@@ -1486,237 +1790,14 @@
         prev_dom = rc0.ctl;
       }
     }
-
-  } else {                      // Scan for an equivalent test
-
-    Node *cmp;
-    int dist = 0;               // Cutoff limit for search
-    int op = Opcode();
-    if( op == Op_If &&
-        (cmp=in(1)->in(1))->Opcode() == Op_CmpP ) {
-      if( cmp->in(2) != NULL && // make sure cmp is not already dead
-          cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) {
-        dist = 64;              // Limit for null-pointer scans
-      } else {
-        dist = 4;               // Do not bother for random pointer tests
-      }
-    } else {
-      dist = 4;                 // Limit for random junky scans
-    }
-
-    // Normal equivalent-test check.
-    if( !dom ) return NULL;     // Dead loop?
-
-    Node* result = fold_compares(igvn);
-    if (result != NULL) {
-      return result;
-    }
+  } else {
+    prev_dom = search_identical(4);
 
-    // Search up the dominator tree for an If with an identical test
-    while( dom->Opcode() != op    ||  // Not same opcode?
-           dom->in(1)    != in(1) ||  // Not same input 1?
-           (req() == 3 && dom->in(2) != in(2)) || // Not same input 2?
-           prev_dom->in(0) != dom ) { // One path of test does not dominate?
-      if( dist < 0 ) return NULL;
-
-      dist--;
-      prev_dom = dom;
-      dom = up_one_dom( dom );
-      if( !dom ) return NULL;
-    }
-
-    // Check that we did not follow a loop back to ourselves
-    if( this == dom )
+    if (prev_dom == NULL) {
       return NULL;
-
-    if( dist > 2 )              // Add to count of NULL checks elided
-      explicit_null_checks_elided++;
-
-  } // End of Else scan for an equivalent test
-
-  // Hit!  Remove this IF
-#ifndef PRODUCT
-  if( TraceIterativeGVN ) {
-    tty->print("   Removing IfNode: "); this->dump();
+    }
   }
-  if( VerifyOpto && !phase->allow_progress() ) {
-    // Found an equivalent dominating test,
-    // we can not guarantee reaching a fix-point for these during iterativeGVN
-    // since intervening nodes may not change.
-    return NULL;
-  }
-#endif
 
   // Replace dominated IfNode
-  dominated_by( prev_dom, igvn );
-
-  // Must return either the original node (now dead) or a new node
-  // (Do not return a top here, since that would break the uniqueness of top.)
-  return new ConINode(TypeInt::ZERO);
-}
-
-//------------------------------dominated_by-----------------------------------
-void IfNode::dominated_by( Node *prev_dom, PhaseIterGVN *igvn ) {
-  igvn->hash_delete(this);      // Remove self to prevent spurious V-N
-  Node *idom = in(0);
-  // Need opcode to decide which way 'this' test goes
-  int prev_op = prev_dom->Opcode();
-  Node *top = igvn->C->top(); // Shortcut to top
-
-  // Loop predicates may have depending checks which should not
-  // be skipped. For example, range check predicate has two checks
-  // for lower and upper bounds.
-  ProjNode* unc_proj = proj_out(1 - prev_dom->as_Proj()->_con)->as_Proj();
-  if (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL)
-   prev_dom = idom;
-
-  // Now walk the current IfNode's projections.
-  // Loop ends when 'this' has no more uses.
-  for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) {
-    Node *ifp = last_out(i);     // Get IfTrue/IfFalse
-    igvn->add_users_to_worklist(ifp);
-    // Check which projection it is and set target.
-    // Data-target is either the dominating projection of the same type
-    // or TOP if the dominating projection is of opposite type.
-    // Data-target will be used as the new control edge for the non-CFG
-    // nodes like Casts and Loads.
-    Node *data_target = (ifp->Opcode() == prev_op) ? prev_dom : top;
-    // Control-target is just the If's immediate dominator or TOP.
-    Node *ctrl_target = (ifp->Opcode() == prev_op) ?     idom : top;
-
-    // For each child of an IfTrue/IfFalse projection, reroute.
-    // Loop ends when projection has no more uses.
-    for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) {
-      Node* s = ifp->last_out(j);   // Get child of IfTrue/IfFalse
-      if( !s->depends_only_on_test() ) {
-        // Find the control input matching this def-use edge.
-        // For Regions it may not be in slot 0.
-        uint l;
-        for( l = 0; s->in(l) != ifp; l++ ) { }
-        igvn->replace_input_of(s, l, ctrl_target);
-      } else {                      // Else, for control producers,
-        igvn->replace_input_of(s, 0, data_target); // Move child to data-target
-      }
-    } // End for each child of a projection
-
-    igvn->remove_dead_node(ifp);
-  } // End for each IfTrue/IfFalse child of If
-
-  // Kill the IfNode
-  igvn->remove_dead_node(this);
-}
-
-//------------------------------Identity---------------------------------------
-// If the test is constant & we match, then we are the input Control
-Node *IfProjNode::Identity(PhaseTransform *phase) {
-  // Can only optimize if cannot go the other way
-  const TypeTuple *t = phase->type(in(0))->is_tuple();
-  if (t == TypeTuple::IFNEITHER ||
-      // kill dead branch first otherwise the IfNode's control will
-      // have 2 control uses (the IfNode that doesn't go away because
-      // it still has uses and this branch of the
-      // If). Node::has_special_unique_user() will cause this node to
-      // be reprocessed once the dead branch is killed.
-      (always_taken(t) && in(0)->outcnt() == 1)) {
-    // IfNode control
-    return in(0)->in(0);
-  }
-  // no progress
-  return this;
+  return dominated_by(prev_dom, igvn);
 }
-
-#ifndef PRODUCT
-//-------------------------------related---------------------------------------
-// An IfProjNode's related node set consists of its input (an IfNode) including
-// the IfNode's condition, plus all of its outputs at level 1. In compact mode,
-// the restrictions for IfNode apply (see IfNode::rel).
-void IfProjNode::related(GrowableArray<Node*> *in_rel, GrowableArray<Node*> *out_rel, bool compact) const {
-  Node* ifNode = this->in(0);
-  in_rel->append(ifNode);
-  if (compact) {
-    ifNode->collect_nodes(in_rel, 3, false, true);
-  } else {
-    ifNode->collect_nodes_in_all_data(in_rel, false);
-  }
-  this->collect_nodes(out_rel, -1, false, false);
-}
-
-//------------------------------dump_spec--------------------------------------
-void IfNode::dump_spec(outputStream *st) const {
-  st->print("P=%f, C=%f",_prob,_fcnt);
-}
-
-//-------------------------------related---------------------------------------
-// For an IfNode, the set of related output nodes is just the output nodes till
-// depth 2, i.e, the IfTrue/IfFalse projection nodes plus the nodes they refer.
-// The related input nodes contain no control nodes, but all data nodes
-// pertaining to the condition. In compact mode, the input nodes are collected
-// up to a depth of 3.
-void IfNode::related(GrowableArray <Node *> *in_rel, GrowableArray <Node *> *out_rel, bool compact) const {
-  if (compact) {
-    this->collect_nodes(in_rel, 3, false, true);
-  } else {
-    this->collect_nodes_in_all_data(in_rel, false);
-  }
-  this->collect_nodes(out_rel, -2, false, false);
-}
-#endif
-
-//------------------------------idealize_test----------------------------------
-// Try to canonicalize tests better.  Peek at the Cmp/Bool/If sequence and
-// come up with a canonical sequence.  Bools getting 'eq', 'gt' and 'ge' forms
-// converted to 'ne', 'le' and 'lt' forms.  IfTrue/IfFalse get swapped as
-// needed.
-static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff) {
-  assert(iff->in(0) != NULL, "If must be live");
-
-  if (iff->outcnt() != 2)  return NULL; // Malformed projections.
-  Node* old_if_f = iff->proj_out(false);
-  Node* old_if_t = iff->proj_out(true);
-
-  // CountedLoopEnds want the back-control test to be TRUE, irregardless of
-  // whether they are testing a 'gt' or 'lt' condition.  The 'gt' condition
-  // happens in count-down loops
-  if (iff->is_CountedLoopEnd())  return NULL;
-  if (!iff->in(1)->is_Bool())  return NULL; // Happens for partially optimized IF tests
-  BoolNode *b = iff->in(1)->as_Bool();
-  BoolTest bt = b->_test;
-  // Test already in good order?
-  if( bt.is_canonical() )
-    return NULL;
-
-  // Flip test to be canonical.  Requires flipping the IfFalse/IfTrue and
-  // cloning the IfNode.
-  Node* new_b = phase->transform( new BoolNode(b->in(1), bt.negate()) );
-  if( !new_b->is_Bool() ) return NULL;
-  b = new_b->as_Bool();
-
-  PhaseIterGVN *igvn = phase->is_IterGVN();
-  assert( igvn, "Test is not canonical in parser?" );
-
-  // The IF node never really changes, but it needs to be cloned
-  iff = new IfNode( iff->in(0), b, 1.0-iff->_prob, iff->_fcnt);
-
-  Node *prior = igvn->hash_find_insert(iff);
-  if( prior ) {
-    igvn->remove_dead_node(iff);
-    iff = (IfNode*)prior;
-  } else {
-    // Cannot call transform on it just yet
-    igvn->set_type_bottom(iff);
-  }
-  igvn->_worklist.push(iff);
-
-  // Now handle projections.  Cloning not required.
-  Node* new_if_f = (Node*)(new IfFalseNode( iff ));
-  Node* new_if_t = (Node*)(new IfTrueNode ( iff ));
-
-  igvn->register_new_node_with_optimizer(new_if_f);
-  igvn->register_new_node_with_optimizer(new_if_t);
-  // Flip test, so flip trailing control
-  igvn->replace_node(old_if_f, new_if_t);
-  igvn->replace_node(old_if_t, new_if_f);
-
-  // Progress
-  return iff;
-}
--- a/hotspot/src/share/vm/opto/library_call.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -238,7 +238,7 @@
   // Generates the guards that check whether the result of
   // Unsafe.getObject should be recorded in an SATB log buffer.
   void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);
-  bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
+  bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool is_unaligned);
   static bool klass_needs_init_guard(Node* kls);
   bool inline_unsafe_allocate();
   bool inline_unsafe_copyMemory();
@@ -544,72 +544,72 @@
   case vmIntrinsics::_inflateStringC:
   case vmIntrinsics::_inflateStringB:           return inline_string_copy(!is_compress);
 
-  case vmIntrinsics::_getObject:                return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,  !is_volatile);
-  case vmIntrinsics::_getBoolean:               return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile);
-  case vmIntrinsics::_getByte:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,    !is_volatile);
-  case vmIntrinsics::_getShort:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_getChar:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_getInt:                   return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_getLong:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile);
-  case vmIntrinsics::_getFloat:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,   !is_volatile);
-  case vmIntrinsics::_getDouble:                return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,  !is_volatile);
-  case vmIntrinsics::_putObject:                return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,  !is_volatile);
-  case vmIntrinsics::_putBoolean:               return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN, !is_volatile);
-  case vmIntrinsics::_putByte:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,    !is_volatile);
-  case vmIntrinsics::_putShort:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_putChar:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_putInt:                   return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_putLong:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile);
-  case vmIntrinsics::_putFloat:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,   !is_volatile);
-  case vmIntrinsics::_putDouble:                return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,  !is_volatile);
-
-  case vmIntrinsics::_getByte_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE,    !is_volatile);
-  case vmIntrinsics::_getShort_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_getChar_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_getInt_raw:               return inline_unsafe_access( is_native_ptr, !is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_getLong_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_LONG,    !is_volatile);
-  case vmIntrinsics::_getFloat_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT,   !is_volatile);
-  case vmIntrinsics::_getDouble_raw:            return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE,  !is_volatile);
-  case vmIntrinsics::_getAddress_raw:           return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile);
-
-  case vmIntrinsics::_putByte_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_BYTE,    !is_volatile);
-  case vmIntrinsics::_putShort_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_putChar_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_putInt_raw:               return inline_unsafe_access( is_native_ptr,  is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_putLong_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_LONG,    !is_volatile);
-  case vmIntrinsics::_putFloat_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_FLOAT,   !is_volatile);
-  case vmIntrinsics::_putDouble_raw:            return inline_unsafe_access( is_native_ptr,  is_store, T_DOUBLE,  !is_volatile);
-  case vmIntrinsics::_putAddress_raw:           return inline_unsafe_access( is_native_ptr,  is_store, T_ADDRESS, !is_volatile);
-
-  case vmIntrinsics::_getObjectVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   is_volatile);
-  case vmIntrinsics::_getBooleanVolatile:       return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  is_volatile);
-  case vmIntrinsics::_getByteVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     is_volatile);
-  case vmIntrinsics::_getShortVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    is_volatile);
-  case vmIntrinsics::_getCharVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     is_volatile);
-  case vmIntrinsics::_getIntVolatile:           return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      is_volatile);
-  case vmIntrinsics::_getLongVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     is_volatile);
-  case vmIntrinsics::_getFloatVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    is_volatile);
-  case vmIntrinsics::_getDoubleVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   is_volatile);
-
-  case vmIntrinsics::_putObjectVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   is_volatile);
-  case vmIntrinsics::_putBooleanVolatile:       return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  is_volatile);
-  case vmIntrinsics::_putByteVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     is_volatile);
-  case vmIntrinsics::_putShortVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    is_volatile);
-  case vmIntrinsics::_putCharVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     is_volatile);
-  case vmIntrinsics::_putIntVolatile:           return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      is_volatile);
-  case vmIntrinsics::_putLongVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     is_volatile);
-  case vmIntrinsics::_putFloatVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    is_volatile);
-  case vmIntrinsics::_putDoubleVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   is_volatile);
-
-  case vmIntrinsics::_getShortUnaligned:        return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_getCharUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_getIntUnaligned:          return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_getLongUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile);
-
-  case vmIntrinsics::_putShortUnaligned:        return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile);
-  case vmIntrinsics::_putCharUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile);
-  case vmIntrinsics::_putIntUnaligned:          return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile);
-  case vmIntrinsics::_putLongUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile);
+  case vmIntrinsics::_getObject:                return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,  !is_volatile, false);
+  case vmIntrinsics::_getBoolean:               return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile, false);
+  case vmIntrinsics::_getByte:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,    !is_volatile, false);
+  case vmIntrinsics::_getShort:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile, false);
+  case vmIntrinsics::_getChar:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile, false);
+  case vmIntrinsics::_getInt:                   return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile, false);
+  case vmIntrinsics::_getLong:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile, false);
+  case vmIntrinsics::_getFloat:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,   !is_volatile, false);
+  case vmIntrinsics::_getDouble:                return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,  !is_volatile, false);
+  case vmIntrinsics::_putObject:                return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,  !is_volatile, false);
+  case vmIntrinsics::_putBoolean:               return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN, !is_volatile, false);
+  case vmIntrinsics::_putByte:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,    !is_volatile, false);
+  case vmIntrinsics::_putShort:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile, false);
+  case vmIntrinsics::_putChar:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile, false);
+  case vmIntrinsics::_putInt:                   return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile, false);
+  case vmIntrinsics::_putLong:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile, false);
+  case vmIntrinsics::_putFloat:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,   !is_volatile, false);
+  case vmIntrinsics::_putDouble:                return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,  !is_volatile, false);
+
+  case vmIntrinsics::_getByte_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE,    !is_volatile, false);
+  case vmIntrinsics::_getShort_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT,   !is_volatile, false);
+  case vmIntrinsics::_getChar_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR,    !is_volatile, false);
+  case vmIntrinsics::_getInt_raw:               return inline_unsafe_access( is_native_ptr, !is_store, T_INT,     !is_volatile, false);
+  case vmIntrinsics::_getLong_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_LONG,    !is_volatile, false);
+  case vmIntrinsics::_getFloat_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT,   !is_volatile, false);
+  case vmIntrinsics::_getDouble_raw:            return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE,  !is_volatile, false);
+  case vmIntrinsics::_getAddress_raw:           return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile, false);
+
+  case vmIntrinsics::_putByte_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_BYTE,    !is_volatile, false);
+  case vmIntrinsics::_putShort_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_SHORT,   !is_volatile, false);
+  case vmIntrinsics::_putChar_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_CHAR,    !is_volatile, false);
+  case vmIntrinsics::_putInt_raw:               return inline_unsafe_access( is_native_ptr,  is_store, T_INT,     !is_volatile, false);
+  case vmIntrinsics::_putLong_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_LONG,    !is_volatile, false);
+  case vmIntrinsics::_putFloat_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_FLOAT,   !is_volatile, false);
+  case vmIntrinsics::_putDouble_raw:            return inline_unsafe_access( is_native_ptr,  is_store, T_DOUBLE,  !is_volatile, false);
+  case vmIntrinsics::_putAddress_raw:           return inline_unsafe_access( is_native_ptr,  is_store, T_ADDRESS, !is_volatile, false);
+
+  case vmIntrinsics::_getObjectVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   is_volatile, false);
+  case vmIntrinsics::_getBooleanVolatile:       return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  is_volatile, false);
+  case vmIntrinsics::_getByteVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     is_volatile, false);
+  case vmIntrinsics::_getShortVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    is_volatile, false);
+  case vmIntrinsics::_getCharVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     is_volatile, false);
+  case vmIntrinsics::_getIntVolatile:           return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      is_volatile, false);
+  case vmIntrinsics::_getLongVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     is_volatile, false);
+  case vmIntrinsics::_getFloatVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    is_volatile, false);
+  case vmIntrinsics::_getDoubleVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   is_volatile, false);
+
+  case vmIntrinsics::_putObjectVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   is_volatile, false);
+  case vmIntrinsics::_putBooleanVolatile:       return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  is_volatile, false);
+  case vmIntrinsics::_putByteVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     is_volatile, false);
+  case vmIntrinsics::_putShortVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    is_volatile, false);
+  case vmIntrinsics::_putCharVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     is_volatile, false);
+  case vmIntrinsics::_putIntVolatile:           return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      is_volatile, false);
+  case vmIntrinsics::_putLongVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     is_volatile, false);
+  case vmIntrinsics::_putFloatVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    is_volatile, false);
+  case vmIntrinsics::_putDoubleVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   is_volatile, false);
+
+  case vmIntrinsics::_getShortUnaligned:        return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile, true);
+  case vmIntrinsics::_getCharUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile, true);
+  case vmIntrinsics::_getIntUnaligned:          return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile, true);
+  case vmIntrinsics::_getLongUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile, true);
+
+  case vmIntrinsics::_putShortUnaligned:        return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile, true);
+  case vmIntrinsics::_putCharUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile, true);
+  case vmIntrinsics::_putIntUnaligned:          return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile, true);
+  case vmIntrinsics::_putLongUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile, true);
 
   case vmIntrinsics::_compareAndSwapObject:     return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg);
   case vmIntrinsics::_compareAndSwapInt:        return inline_unsafe_load_store(T_INT,    LS_cmpxchg);
@@ -2385,7 +2385,7 @@
   return NULL;
 }
 
-bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile) {
+bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool unaligned) {
   if (callee()->is_static())  return false;  // caller must have the capability!
 
 #ifndef PRODUCT
@@ -2527,7 +2527,24 @@
   // of safe & unsafe memory.
   if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
 
-   if (!is_store) {
+  assert(alias_type->adr_type() == TypeRawPtr::BOTTOM || alias_type->adr_type() == TypeOopPtr::BOTTOM ||
+         alias_type->field() != NULL || alias_type->element() != NULL, "field, array element or unknown");
+  bool mismatched = false;
+  if (alias_type->element() != NULL || alias_type->field() != NULL) {
+    BasicType bt;
+    if (alias_type->element() != NULL) {
+      const Type* element = alias_type->element();
+      bt = element->isa_narrowoop() ? T_OBJECT : element->array_element_basic_type();
+    } else {
+      bt = alias_type->field()->type()->basic_type();
+    }
+    if (bt != type) {
+      mismatched = true;
+    }
+  }
+  assert(type != T_OBJECT || !unaligned, "unaligned access not supported with object type");
+
+  if (!is_store) {
     Node* p = NULL;
     // Try to constant fold a load from a constant field
     ciField* field = alias_type->field();
@@ -2543,7 +2560,7 @@
       MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered;
       // To be valid, unsafe loads may depend on other conditions than
       // the one that guards them: pin the Load node
-      p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile);
+      p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile, unaligned, mismatched);
       // load value
       switch (type) {
       case T_BOOLEAN:
@@ -2590,12 +2607,12 @@
 
     MemNode::MemOrd mo = is_volatile ? MemNode::release : MemNode::unordered;
     if (type != T_OBJECT ) {
-      (void) store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile);
+      (void) store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile, unaligned, mismatched);
     } else {
       // Possibly an oop being stored to Java heap or native memory
       if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) {
         // oop to Java heap.
-        (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo);
+        (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched);
       } else {
         // We can't tell at compile time if we are storing in the Java heap or outside
         // of it. So we need to emit code to conditionally do the proper type of
@@ -2607,11 +2624,11 @@
         __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
           // Sync IdealKit and graphKit.
           sync_kit(ideal);
-          Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo);
+          Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched);
           // Update IdealKit memory.
           __ sync_kit(this);
         } __ else_(); {
-          __ store(__ ctrl(), adr, val, type, alias_type->index(), mo, is_volatile);
+          __ store(__ ctrl(), adr, val, type, alias_type->index(), mo, is_volatile, mismatched);
         } __ end_if();
         // Final sync IdealKit and GraphKit.
         final_sync(ideal);
--- a/hotspot/src/share/vm/opto/loopPredicate.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/loopPredicate.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -91,7 +91,8 @@
 // The true projecttion (if_cont) of the new_iff is returned.
 // This code is also used to clone predicates to cloned loops.
 ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
-                                                      Deoptimization::DeoptReason reason) {
+                                                      Deoptimization::DeoptReason reason,
+                                                      int opcode) {
   assert(cont_proj->is_uncommon_trap_if_pattern(reason), "must be a uct if pattern!");
   IfNode* iff = cont_proj->in(0)->as_If();
 
@@ -133,8 +134,13 @@
   }
   // Create new_iff
   IdealLoopTree* lp = get_loop(entry);
-  IfNode *new_iff = iff->clone()->as_If();
-  new_iff->set_req(0, entry);
+  IfNode* new_iff = NULL;
+  if (opcode == Op_If) {
+    new_iff = new IfNode(entry, iff->in(1), iff->_prob, iff->_fcnt);
+  } else {
+    assert(opcode == Op_RangeCheck, "no other if variant here");
+    new_iff = new RangeCheckNode(entry, iff->in(1), iff->_prob, iff->_fcnt);
+  }
   register_control(new_iff, lp, entry);
   Node *if_cont = new IfTrueNode(new_iff);
   Node *if_uct  = new IfFalseNode(new_iff);
@@ -183,7 +189,8 @@
 //------------------------------create_new_if_for_predicate------------------------
 // Create a new if below new_entry for the predicate to be cloned (IGVN optimization)
 ProjNode* PhaseIterGVN::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
-                                                    Deoptimization::DeoptReason reason) {
+                                                    Deoptimization::DeoptReason reason,
+                                                    int opcode) {
   assert(new_entry != 0, "only used for clone predicate");
   assert(cont_proj->is_uncommon_trap_if_pattern(reason), "must be a uct if pattern!");
   IfNode* iff = cont_proj->in(0)->as_If();
@@ -208,8 +215,13 @@
   }
 
   // Create new_iff in new location.
-  IfNode *new_iff = iff->clone()->as_If();
-  new_iff->set_req(0, new_entry);
+  IfNode* new_iff = NULL;
+  if (opcode == Op_If) {
+    new_iff = new IfNode(new_entry, iff->in(1), iff->_prob, iff->_fcnt);
+  } else {
+    assert(opcode == Op_RangeCheck, "no other if variant here");
+    new_iff = new RangeCheckNode(new_entry, iff->in(1), iff->_prob, iff->_fcnt);
+  }
 
   register_new_node_with_optimizer(new_iff);
   Node *if_cont = new IfTrueNode(new_iff);
@@ -249,9 +261,9 @@
                                           PhaseIterGVN* igvn) {
   ProjNode* new_predicate_proj;
   if (loop_phase != NULL) {
-    new_predicate_proj = loop_phase->create_new_if_for_predicate(predicate_proj, new_entry, reason);
+    new_predicate_proj = loop_phase->create_new_if_for_predicate(predicate_proj, new_entry, reason, Op_If);
   } else {
-    new_predicate_proj =       igvn->create_new_if_for_predicate(predicate_proj, new_entry, reason);
+    new_predicate_proj =       igvn->create_new_if_for_predicate(predicate_proj, new_entry, reason, Op_If);
   }
   IfNode* iff = new_predicate_proj->in(0)->as_If();
   Node* ctrl  = iff->in(0);
@@ -714,7 +726,8 @@
   while (current_proj != head) {
     if (loop == get_loop(current_proj) && // still in the loop ?
         current_proj->is_Proj()        && // is a projection  ?
-        current_proj->in(0)->Opcode() == Op_If) { // is a if projection ?
+        (current_proj->in(0)->Opcode() == Op_If ||
+         current_proj->in(0)->Opcode() == Op_RangeCheck)) { // is a if projection ?
       if_proj_list.push(current_proj);
     }
     current_proj = idom(current_proj);
@@ -753,7 +766,8 @@
     if (invar.is_invariant(bol)) {
       // Invariant test
       new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL,
-                                                       Deoptimization::Reason_predicate);
+                                                       Deoptimization::Reason_predicate,
+                                                       iff->Opcode());
       Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
       BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool();
 
@@ -797,8 +811,8 @@
       // lower_bound test will dominate the upper bound test and all
       // cloned or created nodes will use the lower bound test as
       // their declared control.
-      ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate);
-      ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate);
+      ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, iff->Opcode());
+      ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, iff->Opcode());
       assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate");
       Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0);
 
--- a/hotspot/src/share/vm/opto/loopTransform.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -290,7 +290,7 @@
       if (ctrl->is_top())
         return false;           // Found dead test on live IF?  No peeling!
       // Standard IF only has one input value to check for loop invariance
-      assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
+      assert(test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
       // Condition is not a member of this loop?
       if( !is_member(phase->get_loop(ctrl)) &&
           is_loop_exit(test) )
@@ -792,8 +792,10 @@
     return false;
   }
 
-  if(cl->do_unroll_only()) {
-    NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct));
+  if (cl->do_unroll_only()) {
+    if (TraceSuperWordLoopUnrollAnalysis) {
+      tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_ct);
+    }
   }
 
   // Unroll once!  (Each trip will soon do double iterations)
@@ -818,7 +820,9 @@
       if (slp_max_unroll_factor >= future_unroll_ct) {
         int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
         if (new_limit > LoopUnrollLimit) {
-          NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit));
+          if (TraceSuperWordLoopUnrollAnalysis) {
+            tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit);
+          }
           _local_loop_unroll_limit = new_limit;
         }
       }
@@ -856,7 +860,8 @@
   // loop-invariant.
   for (uint i = 0; i < _body.size(); i++) {
     Node *iff = _body[i];
-    if (iff->Opcode() == Op_If) { // Test?
+    if (iff->Opcode() == Op_If ||
+        iff->Opcode() == Op_RangeCheck) { // Test?
 
       // Comparing trip+off vs limit
       Node *bol = iff->in(1);
@@ -2035,8 +2040,8 @@
   // loop-invariant.
   for( uint i = 0; i < loop->_body.size(); i++ ) {
     Node *iff = loop->_body[i];
-    if( iff->Opcode() == Op_If ) { // Test?
-
+    if (iff->Opcode() == Op_If ||
+        iff->Opcode() == Op_RangeCheck) { // Test?
       // Test is an IfNode, has 2 projections.  If BOTH are in the loop
       // we need loop unswitching instead of iteration splitting.
       Node *exit = loop->is_loop_exit(iff);
@@ -2119,10 +2124,9 @@
             conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
           }
         } else {
-#ifndef PRODUCT
-          if( PrintOpto )
+          if (PrintOpto) {
             tty->print_cr("missed RCE opportunity");
-#endif
+          }
           continue;             // In release mode, ignore it
         }
       } else {                  // Otherwise work on normal compares
@@ -2157,10 +2161,9 @@
           }
           break;
         default:
-#ifndef PRODUCT
-          if( PrintOpto )
+          if (PrintOpto) {
             tty->print_cr("missed RCE opportunity");
-#endif
+          }
           continue;             // Unhandled case
         }
       }
@@ -2504,9 +2507,7 @@
       return false;
     }
     if (should_peel) {            // Should we peel?
-#ifndef PRODUCT
-      if (PrintOpto) tty->print_cr("should_peel");
-#endif
+      if (PrintOpto) { tty->print_cr("should_peel"); }
       phase->do_peeling(this,old_new);
     } else if (should_unswitch) {
       phase->do_unswitching(this, old_new);
--- a/hotspot/src/share/vm/opto/loopUnswitch.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/loopUnswitch.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -132,7 +132,7 @@
     head->as_CountedLoop()->set_normal_loop();
   }
 
-  ProjNode* proj_true = create_slow_version_of_loop(loop, old_new);
+  ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode());
 
 #ifdef ASSERT
   Node* uniqc = proj_true->unique_ctrl_out();
@@ -222,7 +222,8 @@
 // and inserting an if to select fast-slow versions.
 // Return control projection of the entry to the fast version.
 ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
-                                                      Node_List &old_new) {
+                                                      Node_List &old_new,
+                                                      int opcode) {
   LoopNode* head  = loop->_head->as_Loop();
   bool counted_loop = head->is_CountedLoop();
   Node*     entry = head->in(LoopNode::EntryControl);
@@ -235,7 +236,8 @@
   register_node(opq, outer_loop, entry, dom_depth(entry));
   Node *bol       = new Conv2BNode(opq);
   register_node(bol, outer_loop, entry, dom_depth(entry));
-  IfNode* iff = new IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN);
+  IfNode* iff = (opcode == Op_RangeCheck) ? new RangeCheckNode(entry, bol, PROB_MAX, COUNT_UNKNOWN) :
+    new IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN);
   register_node(iff, outer_loop, entry, dom_depth(entry));
   ProjNode* iffast = new IfTrueNode(iff);
   register_node(iffast, outer_loop, iff, dom_depth(iff));
@@ -359,16 +361,22 @@
   }
 
   if(!_lpt->_head->is_CountedLoop()) {
-    NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not counted loop", _lpt->_head->_idx);})
+    if (TraceLoopOpts) {
+      tty->print_cr("CountedLoopReserveKit::create_reserve: %d not counted loop", _lpt->_head->_idx);
+    }
     return false;
   }
   CountedLoopNode *cl = _lpt->_head->as_CountedLoop();
   if (!cl->is_valid_counted_loop()) {
-    NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not valid counted loop", cl->_idx);})
+    if (TraceLoopOpts) {
+      tty->print_cr("CountedLoopReserveKit::create_reserve: %d not valid counted loop", cl->_idx);
+    }
     return false; // skip malformed counted loop
   }
   if (!cl->is_main_loop()) {
-    NOT_PRODUCT(if(TraceLoopOpts) {tty->print_cr("CountedLoopReserveKit::create_reserve: %d not main loop", cl->_idx);})
+    if (TraceLoopOpts) {
+      tty->print_cr("CountedLoopReserveKit::create_reserve: %d not main loop", cl->_idx);
+    }
     return false; // skip normal, pre, and post loops
   }
 
--- a/hotspot/src/share/vm/opto/loopnode.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/loopnode.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -2397,11 +2397,9 @@
   // After that switch predicates off and do more loop optimizations.
   if (!C->major_progress() && (C->predicate_count() > 0)) {
      C->cleanup_loop_predicates(_igvn);
-#ifndef PRODUCT
      if (TraceLoopOpts) {
        tty->print_cr("PredicatesOff");
      }
-#endif
      C->set_major_progress();
   }
 
--- a/hotspot/src/share/vm/opto/loopnode.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/loopnode.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -916,7 +916,8 @@
 
   // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted
   ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
-                                        Deoptimization::DeoptReason reason);
+                                        Deoptimization::DeoptReason reason,
+                                        int opcode);
   void register_control(Node* n, IdealLoopTree *loop, Node* pred);
 
   // Clone loop predicates to cloned loops (peeled, unswitched)
@@ -966,7 +967,8 @@
   // Create a slow version of the loop by cloning the loop
   // and inserting an if to select fast-slow versions.
   ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
-                                        Node_List &old_new);
+                                        Node_List &old_new,
+                                        int opcode);
 
   // Clone a loop and return the clone head (clone_loop_head).
   // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
--- a/hotspot/src/share/vm/opto/loopopts.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/loopopts.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -199,14 +199,11 @@
 // IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
 // live path up to the dominating control.
 void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
-#ifndef PRODUCT
-  if (VerifyLoopOptimizations && PrintOpto) tty->print_cr("dominating test");
-#endif
-
+  if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); }
 
   // prevdom is the dominating projection of the dominating test.
   assert( iff->is_If(), "" );
-  assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
+  assert(iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd || iff->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
   int pop = prevdom->Opcode();
   assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
   if (flip) {
@@ -617,9 +614,7 @@
       }
     }
     if (phi == NULL)  break;
-#ifndef PRODUCT
-    if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV");
-#endif
+    if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); }
     // Move speculative ops
     for (uint j = 1; j < region->req(); j++) {
       Node *proj = region->in(j);
@@ -963,10 +958,9 @@
   }
   int nodes_left = C->max_node_limit() - C->live_nodes();
   if (weight * 8 > nodes_left) {
-#ifndef PRODUCT
-    if (PrintOpto)
+    if (PrintOpto) {
       tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
-#endif
+    }
     return true;
   } else {
     return false;
@@ -1123,7 +1117,8 @@
   int n_op = n->Opcode();
 
   // Check for an IF being dominated by another IF same test
-  if (n_op == Op_If) {
+  if (n_op == Op_If ||
+      n_op == Op_RangeCheck) {
     Node *bol = n->in(1);
     uint max = bol->outcnt();
     // Check for same test used more than once?
@@ -1489,14 +1484,12 @@
 void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
                                  Node* side_by_side_idom) {
 
-#ifndef PRODUCT
   if (C->do_vector_loop() && PrintOpto) {
     const char* mname = C->method()->name()->as_quoted_ascii();
     if (mname != NULL) {
       tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
     }
   }
-#endif
 
   CloneMap& cm = C->clone_map();
   Dict* dict = cm.dict();
@@ -1945,7 +1938,10 @@
   BoolNode* bol = new BoolNode(cmp, relop);
   register_node(bol, loop, proj2, ddepth);
 
-  IfNode* new_if = new IfNode(proj2, bol, iff->_prob, iff->_fcnt);
+  int opcode = iff->Opcode();
+  assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode");
+  IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt):
+    new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt);
   register_node(new_if, loop, proj2, ddepth);
 
   proj->set_req(0, new_if); // reattach
--- a/hotspot/src/share/vm/opto/matcher.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/matcher.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -1569,13 +1569,11 @@
         // Can NOT include the match of a subtree when its memory state
         // is used by any of the other subtrees
         (input_mem == NodeSentinel) ) {
-#ifndef PRODUCT
       // Print when we exclude matching due to different memory states at input-loads
-      if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
-        && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) {
+      if (PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
+        && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem)) {
         tty->print_cr("invalid input_mem");
       }
-#endif
       // Switch to a register-only opcode; this value must be in a register
       // and cannot be subsumed as part of a larger instruction.
       s->DFA( m->ideal_reg(), m );
--- a/hotspot/src/share/vm/opto/matcher.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/matcher.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -269,6 +269,10 @@
   // should generate this one.
   static const bool match_rule_supported(int opcode);
 
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  static const bool match_rule_supported_vector(int opcode, int vlen);
+
   // Some uarchs have different sized float register resources
   static const int float_pressure(int default_pressure_threshold);
 
--- a/hotspot/src/share/vm/opto/memnode.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/memnode.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -72,8 +72,15 @@
   dump_adr_type(this, _adr_type, st);
 
   Compile* C = Compile::current();
-  if( C->alias_type(_adr_type)->is_volatile() )
+  if (C->alias_type(_adr_type)->is_volatile()) {
     st->print(" Volatile!");
+  }
+  if (_unaligned_access) {
+    st->print(" unaligned");
+  }
+  if (_mismatched_access) {
+    st->print(" mismatched");
+  }
 }
 
 void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) {
@@ -2393,7 +2400,8 @@
              st->Opcode() == Op_StoreVector ||
              Opcode() == Op_StoreVector ||
              phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw ||
-             (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI), // expanded ClearArrayNode
+             (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI) || // expanded ClearArrayNode
+             (is_mismatched_access() || st->as_Store()->is_mismatched_access()),
              "no mismatched stores, except on raw memory: %s %s", NodeClassNames[Opcode()], NodeClassNames[st->Opcode()]);
 
       if (st->in(MemNode::Address)->eqv_uncast(address) &&
@@ -3213,6 +3221,9 @@
 // within the initialized memory.
 intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseTransform* phase, bool can_reshape) {
   const int FAIL = 0;
+  if (st->is_unaligned_access()) {
+    return FAIL;
+  }
   if (st->req() != MemNode::ValueIn + 1)
     return FAIL;                // an inscrutable StoreNode (card mark?)
   Node* ctl = st->in(MemNode::Control);
--- a/hotspot/src/share/vm/opto/memnode.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/memnode.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -39,11 +39,14 @@
 //------------------------------MemNode----------------------------------------
 // Load or Store, possibly throwing a NULL pointer exception
 class MemNode : public Node {
+private:
+  bool _unaligned_access; // Unaligned access from unsafe
+  bool _mismatched_access; // Mismatched access from unsafe: byte read in integer array for instance
 protected:
 #ifdef ASSERT
   const TypePtr* _adr_type;     // What kind of memory is being addressed?
 #endif
-  virtual uint size_of() const; // Size is bigger (ASSERT only)
+  virtual uint size_of() const;
 public:
   enum { Control,               // When is it safe to do this load?
          Memory,                // Chunk of memory is being loaded from
@@ -57,17 +60,17 @@
   } MemOrd;
 protected:
   MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at )
-    : Node(c0,c1,c2   ) {
+    : Node(c0,c1,c2   ), _unaligned_access(false), _mismatched_access(false) {
     init_class_id(Class_Mem);
     debug_only(_adr_type=at; adr_type();)
   }
   MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 )
-    : Node(c0,c1,c2,c3) {
+    : Node(c0,c1,c2,c3), _unaligned_access(false), _mismatched_access(false) {
     init_class_id(Class_Mem);
     debug_only(_adr_type=at; adr_type();)
   }
   MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4)
-    : Node(c0,c1,c2,c3,c4) {
+    : Node(c0,c1,c2,c3,c4), _unaligned_access(false), _mismatched_access(false) {
     init_class_id(Class_Mem);
     debug_only(_adr_type=at; adr_type();)
   }
@@ -127,6 +130,11 @@
   // the given memory state?  (The state may or may not be in(Memory).)
   Node* can_see_stored_value(Node* st, PhaseTransform* phase) const;
 
+  void set_unaligned_access() { _unaligned_access = true; }
+  bool is_unaligned_access() const { return _unaligned_access; }
+  void set_mismatched_access() { _mismatched_access = true; }
+  bool is_mismatched_access() const { return _mismatched_access; }
+
 #ifndef PRODUCT
   static void dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st);
   virtual void dump_spec(outputStream *st) const;
--- a/hotspot/src/share/vm/opto/movenode.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/movenode.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -230,9 +230,7 @@
 
   // Convert to a bool (flipped)
   // Build int->bool conversion
-#ifndef PRODUCT
-  if( PrintOpto ) tty->print_cr("CMOV to I2B");
-#endif
+  if (PrintOpto) { tty->print_cr("CMOV to I2B"); }
   Node *n = new Conv2BNode( cmp->in(1) );
   if( flip )
   n = new XorINode( phase->transform(n), phase->intcon(1) );
--- a/hotspot/src/share/vm/opto/multnode.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/multnode.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -44,14 +44,14 @@
 //------------------------------proj_out---------------------------------------
 // Get a named projection
 ProjNode* MultiNode::proj_out(uint which_proj) const {
-  assert(Opcode() != Op_If || which_proj == (uint)true || which_proj == (uint)false, "must be 1 or 0");
-  assert(Opcode() != Op_If || outcnt() == 2, "bad if #1");
+  assert((Opcode() != Op_If && Opcode() != Op_RangeCheck) || which_proj == (uint)true || which_proj == (uint)false, "must be 1 or 0");
+  assert((Opcode() != Op_If && Opcode() != Op_RangeCheck) || outcnt() == 2, "bad if #1");
   for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) {
     Node *p = fast_out(i);
     if (p->is_Proj()) {
       ProjNode *proj = p->as_Proj();
       if (proj->_con == which_proj) {
-        assert(Opcode() != Op_If || proj->Opcode() == (which_proj?Op_IfTrue:Op_IfFalse), "bad if #2");
+        assert((Opcode() != Op_If && Opcode() != Op_RangeCheck) || proj->Opcode() == (which_proj ? Op_IfTrue : Op_IfFalse), "bad if #2");
         return proj;
       }
     } else {
--- a/hotspot/src/share/vm/opto/node.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/node.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -125,6 +125,7 @@
 class PhiNode;
 class Pipeline;
 class ProjNode;
+class RangeCheckNode;
 class RegMask;
 class RegionNode;
 class RootNode;
@@ -584,6 +585,7 @@
           DEFINE_CLASS_ID(Jump,        PCTable, 1)
         DEFINE_CLASS_ID(If,          MultiBranch, 1)
           DEFINE_CLASS_ID(CountedLoopEnd, If, 0)
+          DEFINE_CLASS_ID(RangeCheck, If, 1)
         DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2)
       DEFINE_CLASS_ID(Start,       Multi, 2)
       DEFINE_CLASS_ID(MemBar,      Multi, 3)
@@ -758,6 +760,7 @@
   DEFINE_CLASS_QUERY(FastLock)
   DEFINE_CLASS_QUERY(FastUnlock)
   DEFINE_CLASS_QUERY(If)
+  DEFINE_CLASS_QUERY(RangeCheck)
   DEFINE_CLASS_QUERY(IfFalse)
   DEFINE_CLASS_QUERY(IfTrue)
   DEFINE_CLASS_QUERY(Initialize)
--- a/hotspot/src/share/vm/opto/output.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/output.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -91,13 +91,10 @@
   }
 
   // Break before main entry point
-  if( (_method && C->directive()->BreakAtExecuteOption)
-#ifndef PRODUCT
-    ||(OptoBreakpoint && is_method_compilation())
-    ||(OptoBreakpointOSR && is_osr_compilation())
-    ||(OptoBreakpointC2R && !_method)
-#endif
-    ) {
+  if ((_method && C->directive()->BreakAtExecuteOption) ||
+      (OptoBreakpoint && is_method_compilation())       ||
+      (OptoBreakpointOSR && is_osr_compilation())       ||
+      (OptoBreakpointC2R && !_method)                   ) {
     // checking for _method means that OptoBreakpoint does not apply to
     // runtime stubs or frame converters
     _cfg->insert( entry, 1, new MachBreakpointNode() );
--- a/hotspot/src/share/vm/opto/parse1.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/parse1.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -958,12 +958,10 @@
            PPC64_ONLY(wrote_volatile() ||)
            (AlwaysSafeConstructors && wrote_fields()))) {
     _exits.insert_mem_bar(Op_MemBarRelease, alloc_with_final());
-#ifndef PRODUCT
     if (PrintOpto && (Verbose || WizardMode)) {
       method()->print_name();
       tty->print_cr(" writes finals and needs a memory barrier");
     }
-#endif
   }
 
   // Any method can write a @Stable field; insert memory barriers after
@@ -971,12 +969,10 @@
   // barrier there.
   if (wrote_stable()) {
     _exits.insert_mem_bar(Op_MemBarRelease, alloc_with_final());
-#ifndef PRODUCT
     if (PrintOpto && (Verbose || WizardMode)) {
       method()->print_name();
       tty->print_cr(" writes @Stable and needs a memory barrier");
     }
-#endif
   }
 
   for (MergeMemStream mms(_exits.merged_memory()); mms.next_non_empty(); ) {
--- a/hotspot/src/share/vm/opto/parse2.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/parse2.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -136,8 +136,16 @@
       BoolTest::mask btest = BoolTest::lt;
       tst = _gvn.transform( new BoolNode(chk, btest) );
     }
+    RangeCheckNode* rc = new RangeCheckNode(control(), tst, PROB_MAX, COUNT_UNKNOWN);
+    _gvn.set_type(rc, rc->Value(&_gvn));
+    if (!tst->is_Con()) {
+      record_for_igvn(rc);
+    }
+    set_control(_gvn.transform(new IfTrueNode(rc)));
     // Branch to failure if out of bounds
-    { BuildCutout unless(this, tst, PROB_MAX);
+    {
+      PreserveJVMState pjvms(this);
+      set_control(_gvn.transform(new IfFalseNode(rc)));
       if (C->allow_range_check_smearing()) {
         // Do not use builtin_throw, since range checks are sometimes
         // made more stringent by an optimistic transformation.
@@ -940,13 +948,11 @@
 //-------------------------------repush_if_args--------------------------------
 // Push arguments of an "if" bytecode back onto the stack by adjusting _sp.
 inline int Parse::repush_if_args() {
-#ifndef PRODUCT
   if (PrintOpto && WizardMode) {
     tty->print("defending against excessive implicit null exceptions on %s @%d in ",
                Bytecodes::name(iter().cur_bc()), iter().cur_bci());
     method()->print_name(); tty->cr();
   }
-#endif
   int bc_depth = - Bytecodes::depth(iter().cur_bc());
   assert(bc_depth == 1 || bc_depth == 2, "only two kinds of branches");
   DEBUG_ONLY(sync_jvms());   // argument(n) requires a synced jvms
@@ -967,10 +973,9 @@
   float prob = branch_prediction(cnt, btest, target_bci, c);
   if (prob == PROB_UNKNOWN) {
     // (An earlier version of do_ifnull omitted this trap for OSR methods.)
-#ifndef PRODUCT
-    if (PrintOpto && Verbose)
-      tty->print_cr("Never-taken edge stops compilation at bci %d",bci());
-#endif
+    if (PrintOpto && Verbose) {
+      tty->print_cr("Never-taken edge stops compilation at bci %d", bci());
+    }
     repush_if_args(); // to gather stats on loop
     // We need to mark this branch as taken so that if we recompile we will
     // see that it is possible. In the tiered system the interpreter doesn't
@@ -1049,10 +1054,9 @@
   float untaken_prob = 1.0 - prob;
 
   if (prob == PROB_UNKNOWN) {
-#ifndef PRODUCT
-    if (PrintOpto && Verbose)
-      tty->print_cr("Never-taken edge stops compilation at bci %d",bci());
-#endif
+    if (PrintOpto && Verbose) {
+      tty->print_cr("Never-taken edge stops compilation at bci %d", bci());
+    }
     repush_if_args(); // to gather stats on loop
     // We need to mark this branch as taken so that if we recompile we will
     // see that it is possible. In the tiered system the interpreter doesn't
--- a/hotspot/src/share/vm/opto/parse3.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/parse3.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -213,11 +213,9 @@
     // not need to mention the class index, since the class will
     // already have been loaded if we ever see a non-null value.)
     // uncommon_trap(iter().get_field_signature_index());
-#ifndef PRODUCT
     if (PrintOpto && (Verbose || WizardMode)) {
       method()->print_name(); tty->print_cr(" asserting nullness of field at bci: %d", bci());
     }
-#endif
     if (C->log() != NULL) {
       C->log()->elem("assert_null reason='field' klass='%d'",
                      C->log()->identify(field->type()));
--- a/hotspot/src/share/vm/opto/phaseX.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/phaseX.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -521,7 +521,8 @@
   Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check);
   // Create a new if below new_entry for the predicate to be cloned
   ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
-                                        Deoptimization::DeoptReason reason);
+                                        Deoptimization::DeoptReason reason,
+                                        int opcode);
 
   void remove_speculative_types();
   void check_no_speculative_types() {
--- a/hotspot/src/share/vm/opto/reg_split.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/reg_split.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -506,10 +506,9 @@
       // Initialize the split counts to zero
       splits.append(0);
 #endif
-#ifndef PRODUCT
-      if( PrintOpto && WizardMode && lrgs(bidx)._was_spilled1 )
+      if (PrintOpto && WizardMode && lrgs(bidx)._was_spilled1) {
         tty->print_cr("Warning, 2nd spill of L%d",bidx);
-#endif
+      }
     }
   }
 
--- a/hotspot/src/share/vm/opto/split_if.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/split_if.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -390,13 +390,13 @@
 // Found an If getting its condition-code input from a Phi in the same block.
 // Split thru the Region.
 void PhaseIdealLoop::do_split_if( Node *iff ) {
-#ifndef PRODUCT
-  if( PrintOpto && VerifyLoopOptimizations )
+  if (PrintOpto && VerifyLoopOptimizations) {
     tty->print_cr("Split-if");
+  }
   if (TraceLoopOpts) {
     tty->print_cr("SplitIf");
   }
-#endif
+
   C->set_major_progress();
   Node *region = iff->in(0);
   Node *region_dom = idom(region);
--- a/hotspot/src/share/vm/opto/superword.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/superword.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -276,7 +276,9 @@
       // stop looking, we already have the max vector to map to.
       if (cur_max_vector < local_loop_unroll_factor) {
         is_slp = false;
-        NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis fails: unroll limit greater than max vector\n"));
+        if (TraceSuperWordLoopUnrollAnalysis) {
+          tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
+        }
         break;
       }
 
@@ -389,11 +391,9 @@
 
   if (_do_vector_loop) {
     if (_packset.length() == 0) {
-#ifndef PRODUCT
       if (TraceSuperWord) {
         tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
       }
-#endif
       pack_parallel();
     }
   }
@@ -558,9 +558,11 @@
           assert(!same_velt_type(s, mem_ref), "sanity");
           memops.push(s);
         }
-        MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
+        best_align_to_mem_ref = find_align_to_ref(memops);
         if (best_align_to_mem_ref == NULL) {
-          NOT_PRODUCT(if (TraceSuperWord) tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");)
+          if (TraceSuperWord) {
+            tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");
+          }
           break;
         }
         best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
@@ -582,12 +584,10 @@
   } // while (memops.size() != 0
   set_align_to_ref(best_align_to_mem_ref);
 
-#ifndef PRODUCT
   if (TraceSuperWord) {
     tty->print_cr("\nAfter find_adjacent_refs");
     print_packset();
   }
-#endif
 }
 
 #ifndef PRODUCT
@@ -874,7 +874,7 @@
         _dg.make_edge(s1, slice_sink);
       }
     }
-#ifndef PRODUCT
+
     if (TraceSuperWord) {
       tty->print_cr("\nDependence graph for slice: %d", n->_idx);
       for (int q = 0; q < _nlist.length(); q++) {
@@ -882,11 +882,10 @@
       }
       tty->cr();
     }
-#endif
+
     _nlist.clear();
   }
 
-#ifndef PRODUCT
   if (TraceSuperWord) {
     tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE");
     for (int r = 0; r < _disjoint_ptrs.length(); r++) {
@@ -895,7 +894,7 @@
     }
     tty->cr();
   }
-#endif
+
 }
 
 //---------------------------mem_slice_preds---------------------------
@@ -912,7 +911,9 @@
       if (out->is_Load()) {
         if (in_bb(out)) {
           preds.push(out);
-          NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);)
+          if (TraceSuperWord && Verbose) {
+            tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);
+          }
         }
       } else {
         // FIXME
@@ -931,7 +932,9 @@
     }//for
     if (n == stop) break;
     preds.push(n);
-    NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);)
+    if (TraceSuperWord && Verbose) {
+      tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);
+    }
     prev = n;
     assert(n->is_Mem(), "unexpected node %s", n->Name());
     n = n->in(MemNode::Memory);
@@ -1123,12 +1126,10 @@
     }
   }
 
-#ifndef PRODUCT
   if (TraceSuperWord) {
     tty->print_cr("\nAfter extend_packlist");
     print_packset();
   }
-#endif
 }
 
 //------------------------------follow_use_defs---------------------------
@@ -1412,12 +1413,10 @@
     }
   }
 
-#ifndef PRODUCT
   if (TraceSuperWord) {
     tty->print_cr("\nAfter combine_packs");
     print_packset();
   }
-#endif
 }
 
 //-----------------------------construct_my_pack_map--------------------------
@@ -2244,10 +2243,15 @@
     if (cl->has_passed_slp()) {
       uint slp_max_unroll_factor = cl->slp_max_unroll();
       if (slp_max_unroll_factor == max_vlen) {
-        NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte));
+        if (TraceSuperWordLoopUnrollAnalysis) {
+          tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte);
+        }
         // For atomic unrolled loops which are vector mapped, instigate more unrolling.
         cl->set_notpassed_slp();
-        C->set_major_progress();
+        // if vector resources are limited, do not allow additional unrolling
+        if (FLOATPRESSURE > 8) {
+          C->set_major_progress();
+        }
         cl->mark_do_unroll_only();
       }
     }
@@ -2650,10 +2654,10 @@
     }
     ct++;
   } while (again);
-#ifndef PRODUCT
-  if (TraceSuperWord && Verbose)
+
+  if (TraceSuperWord && Verbose) {
     tty->print_cr("compute_max_depth iterated: %d times", ct);
-#endif
+  }
 }
 
 //-------------------------compute_vector_element_type-----------------------
@@ -2664,10 +2668,9 @@
 // Normally the type of the add is integer, but for packed character
 // operations the type of the add needs to be char.
 void SuperWord::compute_vector_element_type() {
-#ifndef PRODUCT
-  if (TraceSuperWord && Verbose)
+  if (TraceSuperWord && Verbose) {
     tty->print_cr("\ncompute_velt_type:");
-#endif
+  }
 
   // Initial type
   for (int i = 0; i < _block.length(); i++) {
@@ -2758,7 +2761,9 @@
   offset     += iv_adjust*p.memory_size();
   int off_rem = offset % vw;
   int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
-  NOT_PRODUCT(if(TraceSuperWord && Verbose) tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);)
+  if (TraceSuperWord && Verbose) {
+    tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);
+  }
   return off_mod;
 }
 
@@ -4046,11 +4051,9 @@
   }//for (int i...
 
   if (_ii_first == -1 || _ii_last == -1) {
-#ifndef PRODUCT
     if (TraceSuperWord && Verbose) {
       tty->print_cr("SuperWord::mark_generations unknown error, something vent wrong");
     }
-#endif
     return -1; // something vent wrong
   }
   // collect nodes in the first and last generations
@@ -4083,11 +4086,9 @@
       }//for
 
       if (found == false) {
-#ifndef PRODUCT
         if (TraceSuperWord && Verbose) {
           tty->print_cr("SuperWord::mark_generations: Cannot build order of iterations - no dependent Store for %d", nd->_idx);
         }
-#endif
         _ii_order.clear();
         return -1;
       }
@@ -4153,11 +4154,10 @@
     return true;
   }
 
-#ifndef PRODUCT
   if (TraceSuperWord && Verbose) {
     tty->print_cr("SuperWord::fix_commutative_inputs: cannot fix node %d", fix->_idx);
   }
-#endif
+
   return false;
 }
 
@@ -4224,11 +4224,9 @@
   for (int i = 0; i < _mem_slice_head.length(); i++) {
     Node* n = _mem_slice_head.at(i);
     if ( !in_bb(n) || !n->is_Phi() || n->bottom_type() != Type::MEMORY) {
-#ifndef PRODUCT
       if (TraceSuperWord && Verbose) {
         tty->print_cr("SuperWord::hoist_loads_in_graph: skipping unexpected node n=%d", n->_idx);
       }
-#endif
       continue;
     }
 
@@ -4275,11 +4273,10 @@
 
   restart(); // invalidate all basic structures, since we rebuilt the graph
 
-#ifndef PRODUCT
   if (TraceSuperWord && Verbose) {
     tty->print_cr("\nSuperWord::hoist_loads_in_graph() the graph was rebuilt, all structures invalidated and need rebuild");
   }
-#endif
+
   return true;
 }
 
--- a/hotspot/src/share/vm/opto/vectornode.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/opto/vectornode.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -188,7 +188,7 @@
       (vlen > 1) && is_power_of_2(vlen) &&
       Matcher::vector_size_supported(bt, vlen)) {
     int vopc = VectorNode::opcode(opc, bt);
-    return vopc > 0 && Matcher::match_rule_supported(vopc) && (vopc != Op_CMoveD || vlen == 4);
+    return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
   }
   return false;
 }
--- a/hotspot/src/share/vm/prims/jni.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/prims/jni.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -3902,7 +3902,7 @@
 void TestBufferingOopClosure_test();
 void TestCodeCacheRemSet_test();
 void FreeRegionList_test();
-void test_memset_with_concurrent_readers();
+void test_memset_with_concurrent_readers() NOT_DEBUG_RETURN;
 void TestPredictions_test();
 void WorkerDataArray_test();
 #endif
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -457,7 +457,7 @@
   return flag_name;
 }
 
-#ifndef PRODUCT
+#ifdef ASSERT
 static bool lookup_special_flag(const char *flag_name, size_t skip_index) {
   for (size_t i = 0; special_jvm_flags[i].name != NULL; i++) {
     if ((i != skip_index) && (strcmp(special_jvm_flags[i].name, flag_name) == 0)) {
--- a/hotspot/src/share/vm/runtime/deoptimization.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/runtime/deoptimization.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -68,7 +68,8 @@
                                          int  number_of_frames,
                                          intptr_t* frame_sizes,
                                          address* frame_pcs,
-                                         BasicType return_type) {
+                                         BasicType return_type,
+                                         int exec_mode) {
   _size_of_deoptimized_frame = size_of_deoptimized_frame;
   _caller_adjustment         = caller_adjustment;
   _caller_actual_parameters  = caller_actual_parameters;
@@ -80,10 +81,11 @@
   _initial_info              = 0;
   // PD (x86 only)
   _counter_temp              = 0;
-  _unpack_kind               = 0;
+  _unpack_kind               = exec_mode;
   _sender_sp_temp            = 0;
 
   _total_frame_sizes         = size_of_frames();
+  assert(exec_mode >= 0 && exec_mode < Unpack_LIMIT, "Unexpected exec_mode");
 }
 
 
@@ -128,7 +130,7 @@
 // ResetNoHandleMark and HandleMark were removed from it. The actual reallocation
 // of previously eliminated objects occurs in realloc_objects, which is
 // called from the method fetch_unroll_info_helper below.
-JRT_BLOCK_ENTRY(Deoptimization::UnrollBlock*, Deoptimization::fetch_unroll_info(JavaThread* thread))
+JRT_BLOCK_ENTRY(Deoptimization::UnrollBlock*, Deoptimization::fetch_unroll_info(JavaThread* thread, int exec_mode))
   // It is actually ok to allocate handles in a leaf method. It causes no safepoints,
   // but makes the entry a little slower. There is however a little dance we have to
   // do in debug mode to get around the NoHandleMark code in the JRT_LEAF macro
@@ -142,12 +144,12 @@
   }
   thread->inc_in_deopt_handler();
 
-  return fetch_unroll_info_helper(thread);
+  return fetch_unroll_info_helper(thread, exec_mode);
 JRT_END
 
 
 // This is factored, since it is both called from a JRT_LEAF (deoptimization) and a JRT_ENTRY (uncommon_trap)
-Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread* thread) {
+Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread* thread, int exec_mode) {
 
   // Note: there is a safepoint safety issue here. No matter whether we enter
   // via vanilla deopt or uncommon trap we MUST NOT stop at a safepoint once
@@ -186,6 +188,19 @@
   assert(vf->is_compiled_frame(), "Wrong frame type");
   chunk->push(compiledVFrame::cast(vf));
 
+  ScopeDesc* trap_scope = chunk->at(0)->scope();
+  Handle exceptionObject;
+  if (trap_scope->rethrow_exception()) {
+    if (PrintDeoptimizationDetails) {
+      tty->print_cr("Exception to be rethrown in the interpreter for method %s::%s at bci %d", trap_scope->method()->method_holder()->name()->as_C_string(), trap_scope->method()->name()->as_C_string(), trap_scope->bci());
+    }
+    GrowableArray<ScopeValue*>* expressions = trap_scope->expressions();
+    guarantee(expressions != NULL && expressions->length() > 0, "must have exception to throw");
+    ScopeValue* topOfStack = expressions->top();
+    exceptionObject = StackValue::create_stack_value(&deoptee, &map, topOfStack)->get_obj();
+    assert(exceptionObject() != NULL, "exception oop can not be null");
+  }
+
   bool realloc_failures = false;
 
 #if defined(COMPILER2) || INCLUDE_JVMCI
@@ -474,13 +489,21 @@
   assert(CodeCache::find_blob_unsafe(frame_pcs[0]) != NULL, "bad pc");
 #endif // SHARK
 
+#ifdef INCLUDE_JVMCI
+  if (exceptionObject() != NULL) {
+    thread->set_exception_oop(exceptionObject());
+    exec_mode = Unpack_exception;
+  }
+#endif
+
   UnrollBlock* info = new UnrollBlock(array->frame_size() * BytesPerWord,
                                       caller_adjustment * BytesPerWord,
                                       caller_was_method_handle ? 0 : callee_parameters,
                                       number_of_frames,
                                       frame_sizes,
                                       frame_pcs,
-                                      return_type);
+                                      return_type,
+                                      exec_mode);
   // On some platforms, we need a way to pass some platform dependent
   // information to the unpacking code so the skeletal frames come out
   // correct (initial fp value, unextended sp, ...)
@@ -1495,18 +1518,6 @@
 #endif
 
     Bytecodes::Code trap_bc     = trap_method->java_code_at(trap_bci);
-
-    if (trap_scope->rethrow_exception()) {
-      if (PrintDeoptimizationDetails) {
-        tty->print_cr("Exception to be rethrown in the interpreter for method %s::%s at bci %d", trap_method->method_holder()->name()->as_C_string(), trap_method->name()->as_C_string(), trap_bci);
-      }
-      GrowableArray<ScopeValue*>* expressions = trap_scope->expressions();
-      guarantee(expressions != NULL, "must have exception to throw");
-      ScopeValue* topOfStack = expressions->top();
-      Handle topOfStackObj = StackValue::create_stack_value(&fr, &reg_map, topOfStack)->get_obj();
-      THREAD->set_pending_exception(topOfStackObj(), NULL, 0);
-    }
-
     // Record this event in the histogram.
     gather_statistics(reason, action, trap_bc);
 
@@ -1985,7 +1996,7 @@
                            ignore_maybe_prior_recompile);
 }
 
-Deoptimization::UnrollBlock* Deoptimization::uncommon_trap(JavaThread* thread, jint trap_request) {
+Deoptimization::UnrollBlock* Deoptimization::uncommon_trap(JavaThread* thread, jint trap_request, jint exec_mode) {
   if (TraceDeoptimization) {
     tty->print("Uncommon trap ");
   }
@@ -1994,7 +2005,7 @@
     // This enters VM and may safepoint
     uncommon_trap_inner(thread, trap_request);
   }
-  return fetch_unroll_info_helper(thread);
+  return fetch_unroll_info_helper(thread, exec_mode);
 }
 
 // Local derived constants.
--- a/hotspot/src/share/vm/runtime/deoptimization.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/runtime/deoptimization.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -123,7 +123,8 @@
     Unpack_deopt                = 0, // normal deoptimization, use pc computed in unpack_vframe_on_stack
     Unpack_exception            = 1, // exception is pending
     Unpack_uncommon_trap        = 2, // redo last byte code (C2 only)
-    Unpack_reexecute            = 3  // reexecute bytecode (C1 only)
+    Unpack_reexecute            = 3, // reexecute bytecode (C1 only)
+    Unpack_LIMIT                = 4
   };
 
   // Checks all compiled methods. Invalid methods are deleted and
@@ -179,13 +180,13 @@
     intptr_t  _initial_info;              // Platform dependent data for the sender frame (was FP on x86)
     int       _caller_actual_parameters;  // The number of actual arguments at the
                                           // interpreted caller of the deoptimized frame
+    int       _unpack_kind;               // exec_mode that can be changed during fetch_unroll_info
 
     // The following fields are used as temps during the unpacking phase
     // (which is tight on registers, especially on x86). They really ought
     // to be PD variables but that involves moving this class into its own
     // file to use the pd include mechanism. Maybe in a later cleanup ...
     intptr_t  _counter_temp;              // SHOULD BE PD VARIABLE (x86 frame count temp)
-    intptr_t  _unpack_kind;               // SHOULD BE PD VARIABLE (x86 unpack kind)
     intptr_t  _sender_sp_temp;            // SHOULD BE PD VARIABLE (x86 sender_sp)
    public:
     // Constructor
@@ -195,7 +196,8 @@
                 int  number_of_frames,
                 intptr_t* frame_sizes,
                 address* frames_pcs,
-                BasicType return_type);
+                BasicType return_type,
+                int unpack_kind);
     ~UnrollBlock();
 
     // Returns where a register is located.
@@ -205,6 +207,7 @@
     intptr_t* frame_sizes()  const { return _frame_sizes; }
     int number_of_frames()  const { return _number_of_frames; }
     address*  frame_pcs()   const { return _frame_pcs ; }
+    int  unpack_kind()   const { return _unpack_kind; }
 
     // Returns the total size of frames
     int size_of_frames() const;
@@ -237,7 +240,7 @@
   // deoptimized frame.
   // @argument thread.     Thread where stub_frame resides.
   // @see OptoRuntime::deoptimization_fetch_unroll_info_C
-  static UnrollBlock* fetch_unroll_info(JavaThread* thread);
+  static UnrollBlock* fetch_unroll_info(JavaThread* thread, int exec_mode);
 
   //** Unpacks vframeArray onto execution stack
   // Called by assembly stub after execution has returned to
@@ -262,7 +265,7 @@
 
   //** Performs an uncommon trap for compiled code.
   // The top most compiler frame is converted into interpreter frames
-  static UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
+  static UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode);
   // Helper routine that enters the VM and may block
   static void uncommon_trap_inner(JavaThread* thread, jint unloaded_class_index);
 
@@ -423,7 +426,7 @@
   static void load_class_by_index(const constantPoolHandle& constant_pool, int index, TRAPS);
   static void load_class_by_index(const constantPoolHandle& constant_pool, int index);
 
-  static UnrollBlock* fetch_unroll_info_helper(JavaThread* thread);
+  static UnrollBlock* fetch_unroll_info_helper(JavaThread* thread, int exec_mode);
 
   static DeoptAction _unloaded_action; // == Action_reinterpret;
   static const char* _trap_reason_name[];
--- a/hotspot/src/share/vm/runtime/globals.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/runtime/globals.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -528,59 +528,57 @@
 // 4991491 do not "optimize out" the was_set false values: omitting them
 // tickles a Microsoft compiler bug causing flagTable to be malformed
 
-#define NAME(name) NOT_PRODUCT(&name) PRODUCT_ONLY(&CONST_##name)
+#define RUNTIME_PRODUCT_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT) },
+#define RUNTIME_PD_PRODUCT_FLAG_STRUCT(  type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
+#define RUNTIME_DIAGNOSTIC_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DIAGNOSTIC) },
+#define RUNTIME_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_EXPERIMENTAL) },
+#define RUNTIME_MANAGEABLE_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_MANAGEABLE) },
+#define RUNTIME_PRODUCT_RW_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_READ_WRITE) },
+#define RUNTIME_DEVELOP_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP) },
+#define RUNTIME_PD_DEVELOP_FLAG_STRUCT(  type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
+#define RUNTIME_NOTPRODUCT_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_NOT_PRODUCT) },
 
-#define RUNTIME_PRODUCT_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT) },
-#define RUNTIME_PD_PRODUCT_FLAG_STRUCT(  type, name,        doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
-#define RUNTIME_DIAGNOSTIC_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DIAGNOSTIC) },
-#define RUNTIME_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_EXPERIMENTAL) },
-#define RUNTIME_MANAGEABLE_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_MANAGEABLE) },
-#define RUNTIME_PRODUCT_RW_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_PRODUCT | Flag::KIND_READ_WRITE) },
-#define RUNTIME_DEVELOP_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP) },
-#define RUNTIME_PD_DEVELOP_FLAG_STRUCT(  type, name,        doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
-#define RUNTIME_NOTPRODUCT_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_NOT_PRODUCT) },
-
-#define JVMCI_PRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT) },
-#define JVMCI_PD_PRODUCT_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
-#define JVMCI_DEVELOP_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP) },
-#define JVMCI_PD_DEVELOP_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
-#define JVMCI_DIAGNOSTIC_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DIAGNOSTIC) },
-#define JVMCI_EXPERIMENTAL_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_EXPERIMENTAL) },
-#define JVMCI_NOTPRODUCT_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_NOT_PRODUCT) },
+#define JVMCI_PRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT) },
+#define JVMCI_PD_PRODUCT_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
+#define JVMCI_DIAGNOSTIC_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DIAGNOSTIC) },
+#define JVMCI_EXPERIMENTAL_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_EXPERIMENTAL) },
+#define JVMCI_DEVELOP_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP) },
+#define JVMCI_PD_DEVELOP_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
+#define JVMCI_NOTPRODUCT_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_JVMCI | Flag::KIND_NOT_PRODUCT) },
 
 #ifdef _LP64
-#define RUNTIME_LP64_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_LP64_PRODUCT) },
+#define RUNTIME_LP64_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_LP64_PRODUCT) },
 #else
 #define RUNTIME_LP64_PRODUCT_FLAG_STRUCT(type, name, value, doc) /* flag is constant */
 #endif // _LP64
 
-#define C1_PRODUCT_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT) },
-#define C1_PD_PRODUCT_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
-#define C1_DIAGNOSTIC_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DIAGNOSTIC) },
-#define C1_DEVELOP_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP) },
-#define C1_PD_DEVELOP_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
-#define C1_NOTPRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_NOT_PRODUCT) },
+#define C1_PRODUCT_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT) },
+#define C1_PD_PRODUCT_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
+#define C1_DIAGNOSTIC_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DIAGNOSTIC) },
+#define C1_DEVELOP_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP) },
+#define C1_PD_DEVELOP_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
+#define C1_NOTPRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C1 | Flag::KIND_NOT_PRODUCT) },
 
-#define C2_PRODUCT_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT) },
-#define C2_PD_PRODUCT_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
-#define C2_DIAGNOSTIC_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DIAGNOSTIC) },
-#define C2_EXPERIMENTAL_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_EXPERIMENTAL) },
-#define C2_DEVELOP_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP) },
-#define C2_PD_DEVELOP_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
-#define C2_NOTPRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_NOT_PRODUCT) },
+#define C2_PRODUCT_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT) },
+#define C2_PD_PRODUCT_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
+#define C2_DIAGNOSTIC_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DIAGNOSTIC) },
+#define C2_EXPERIMENTAL_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_EXPERIMENTAL) },
+#define C2_DEVELOP_FLAG_STRUCT(          type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP) },
+#define C2_PD_DEVELOP_FLAG_STRUCT(       type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
+#define C2_NOTPRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_C2 | Flag::KIND_NOT_PRODUCT) },
 
-#define ARCH_PRODUCT_FLAG_STRUCT(        type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_PRODUCT) },
-#define ARCH_DIAGNOSTIC_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DIAGNOSTIC) },
-#define ARCH_EXPERIMENTAL_FLAG_STRUCT(   type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_EXPERIMENTAL) },
-#define ARCH_DEVELOP_FLAG_STRUCT(        type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DEVELOP) },
-#define ARCH_NOTPRODUCT_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_NOT_PRODUCT) },
+#define ARCH_PRODUCT_FLAG_STRUCT(        type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_PRODUCT) },
+#define ARCH_DIAGNOSTIC_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DIAGNOSTIC) },
+#define ARCH_EXPERIMENTAL_FLAG_STRUCT(   type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_EXPERIMENTAL) },
+#define ARCH_DEVELOP_FLAG_STRUCT(        type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_DEVELOP) },
+#define ARCH_NOTPRODUCT_FLAG_STRUCT(     type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_ARCH | Flag::KIND_NOT_PRODUCT) },
 
-#define SHARK_PRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT) },
-#define SHARK_PD_PRODUCT_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
-#define SHARK_DIAGNOSTIC_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), &name,      NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DIAGNOSTIC) },
-#define SHARK_DEVELOP_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP) },
-#define SHARK_PD_DEVELOP_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
-#define SHARK_NOTPRODUCT_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), NAME(name), NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_NOT_PRODUCT) },
+#define SHARK_PRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT) },
+#define SHARK_PD_PRODUCT_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_PRODUCT | Flag::KIND_PLATFORM_DEPENDENT) },
+#define SHARK_DIAGNOSTIC_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DIAGNOSTIC) },
+#define SHARK_DEVELOP_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP) },
+#define SHARK_PD_DEVELOP_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_DEVELOP | Flag::KIND_PLATFORM_DEPENDENT) },
+#define SHARK_NOTPRODUCT_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) Flag::Flags(Flag::DEFAULT | Flag::KIND_SHARK | Flag::KIND_NOT_PRODUCT) },
 
 static Flag flagTable[] = {
  RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, \
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Fri Nov 13 13:31:48 2015 +0100
@@ -4290,9 +4290,9 @@
 #define DECLARE_MANAGEABLE_FLAG(type, name, value, doc)   extern "C" type name;
 #define DECLARE_PRODUCT_RW_FLAG(type, name, value, doc)   extern "C" type name;
 #ifdef PRODUCT
-#define DECLARE_DEVELOPER_FLAG(type, name, value, doc)    extern "C" type CONST_##name; const type name = value;
-#define DECLARE_PD_DEVELOPER_FLAG(type, name, doc)        extern "C" type CONST_##name; const type name = pd_##name;
-#define DECLARE_NOTPRODUCT_FLAG(type, name, value, doc)   extern "C" type CONST_##name;
+#define DECLARE_DEVELOPER_FLAG(type, name, value, doc)    const type name = value;
+#define DECLARE_PD_DEVELOPER_FLAG(type, name, doc)        const type name = pd_##name;
+#define DECLARE_NOTPRODUCT_FLAG(type, name, value, doc)   const type name = value;
 #else
 #define DECLARE_DEVELOPER_FLAG(type, name, value, doc)    extern "C" type name;
 #define DECLARE_PD_DEVELOPER_FLAG(type, name, doc)        extern "C" type name;
@@ -4313,9 +4313,9 @@
 #define MATERIALIZE_MANAGEABLE_FLAG(type, name, value, doc)   type name = value;
 #define MATERIALIZE_PRODUCT_RW_FLAG(type, name, value, doc)   type name = value;
 #ifdef PRODUCT
-#define MATERIALIZE_DEVELOPER_FLAG(type, name, value, doc)    type CONST_##name = value;
-#define MATERIALIZE_PD_DEVELOPER_FLAG(type, name, doc)        type CONST_##name = pd_##name;
-#define MATERIALIZE_NOTPRODUCT_FLAG(type, name, value, doc)   type CONST_##name = value;
+#define MATERIALIZE_DEVELOPER_FLAG(type, name, value, doc)
+#define MATERIALIZE_PD_DEVELOPER_FLAG(type, name, doc)
+#define MATERIALIZE_NOTPRODUCT_FLAG(type, name, value, doc)
 #else
 #define MATERIALIZE_DEVELOPER_FLAG(type, name, value, doc)    type name = value;
 #define MATERIALIZE_PD_DEVELOPER_FLAG(type, name, doc)        type name = pd_##name;
--- a/hotspot/src/share/vm/runtime/sweeper.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/runtime/sweeper.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -297,7 +297,7 @@
 void NMethodSweeper::handle_safepoint_request() {
   if (SafepointSynchronize::is_synchronizing()) {
     if (PrintMethodFlushing && Verbose) {
-      tty->print_cr("### Sweep at %d out of %d, yielding to safepoint", _seen, CodeCache::nof_nmethods());
+      tty->print_cr("### Sweep at %d out of %d, yielding to safepoint", _seen, CodeCache::nmethod_count());
     }
     MutexUnlockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
 
@@ -401,7 +401,7 @@
   int flushed_c2_count     = 0;
 
   if (PrintMethodFlushing && Verbose) {
-    tty->print_cr("### Sweep at %d out of %d", _seen, CodeCache::nof_nmethods());
+    tty->print_cr("### Sweep at %d out of %d", _seen, CodeCache::nmethod_count());
   }
 
   int swept_count = 0;
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -969,6 +969,7 @@
   nonstatic_field(Deoptimization::UnrollBlock, _caller_adjustment,                            int)                                   \
   nonstatic_field(Deoptimization::UnrollBlock, _number_of_frames,                             int)                                   \
   nonstatic_field(Deoptimization::UnrollBlock, _total_frame_sizes,                            int)                                   \
+  nonstatic_field(Deoptimization::UnrollBlock, _unpack_kind,                                  int)                                   \
   nonstatic_field(Deoptimization::UnrollBlock, _frame_sizes,                                  intptr_t*)                             \
   nonstatic_field(Deoptimization::UnrollBlock, _frame_pcs,                                    address*)                              \
   nonstatic_field(Deoptimization::UnrollBlock, _register_block,                               intptr_t*)                             \
--- a/hotspot/src/share/vm/shark/sharkRuntime.cpp	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/src/share/vm/shark/sharkRuntime.cpp	Fri Nov 13 13:31:48 2015 +0100
@@ -213,8 +213,9 @@
   // Initiate the trap
   thread->set_last_Java_frame();
   Deoptimization::UnrollBlock *urb =
-    Deoptimization::uncommon_trap(thread, trap_request);
+    Deoptimization::uncommon_trap(thread, trap_request, Deoptimization::Unpack_uncommon_trap);
   thread->reset_last_Java_frame();
+  assert(urb->unpack_kind() == Deoptimization::Unpack_uncommon_trap, "expected Unpack_uncommon_trap");
 
   // Pop our dummy frame and the frame being deoptimized
   thread->pop_zero_frame();
--- a/hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java	Fri Nov 13 13:31:48 2015 +0100
@@ -25,7 +25,7 @@
  * @test
  * @bug 8072016
  * @summary Infinite deoptimization/recompilation cycles in case of arraycopy with tightly coupled allocation
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.base/sun.misc
  *          java.management
  * @build TestArrayCopyNoInitDeopt
@@ -42,6 +42,7 @@
 import sun.hotspot.code.NMethod;
 import jdk.test.lib.Platform;
 import java.lang.reflect.*;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class TestArrayCopyNoInitDeopt {
 
--- a/hotspot/test/compiler/floatingpoint/TestPow2.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/floatingpoint/TestPow2.java	Fri Nov 13 13:31:48 2015 +0100
@@ -25,7 +25,7 @@
  * @test
  * @bug 8063086
  * @summary X^2 special case for C2 yields different result than interpreter
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.management
  * @build TestPow2
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
@@ -36,6 +36,7 @@
 
 import java.lang.reflect.*;
 import sun.hotspot.WhiteBox;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class TestPow2 {
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/inlining/InlineAccessors.java	Fri Nov 13 13:31:48 2015 +0100
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+/**
+ * @test
+ * @bug 8140650
+ * @summary Method::is_accessor should cover getters and setters for all types
+ * @library /testlibrary
+ * @run main/othervm InlineAccessors
+ */
+import java.lang.invoke.*;
+import jdk.test.lib.*;
+import static jdk.test.lib.Asserts.*;
+
+public class InlineAccessors {
+    public static void main(String[] args) throws Exception {
+        // try some sanity checks first
+        doTest();
+
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+                "-XX:+IgnoreUnrecognizedVMOptions", "-showversion",
+                "-server", "-XX:-TieredCompilation", "-Xbatch", "-Xcomp",
+                "-XX:+PrintCompilation", "-XX:+UnlockDiagnosticVMOptions", "-XX:+PrintInlining",
+                    "InlineAccessors$Launcher");
+
+        OutputAnalyzer analyzer = new OutputAnalyzer(pb.start());
+
+        analyzer.shouldHaveExitValue(0);
+
+        // The test is applicable only to C2 (present in Server VM).
+        if (analyzer.getStderr().contains("Server VM")) {
+            analyzer.shouldContain("InlineAccessors::setBool (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setByte (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setChar (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setShort (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setInt (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setFloat (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setLong (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setDouble (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setObject (6 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::setArray (6 bytes)   accessor");
+
+            analyzer.shouldContain("InlineAccessors::getBool (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getByte (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getChar (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getShort (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getInt (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getFloat (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getLong (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getDouble (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getObject (5 bytes)   accessor");
+            analyzer.shouldContain("InlineAccessors::getArray (5 bytes)   accessor");
+        }
+    }
+
+    boolean bool;
+    byte b;
+    char c;
+    short s;
+    int i;
+    float f;
+    long l;
+    double d;
+    Object o;
+    Object[] a;
+
+    public void setBool(boolean v)   { bool = v; }
+    public void setByte(byte v)      { b = v; }
+    public void setChar(char v)      { c = v; }
+    public void setShort(short v)    { s = v; }
+    public void setInt(int v)        { i = v; }
+    public void setFloat(float v)    { f = v; }
+    public void setLong(long v)      { l = v; }
+    public void setDouble(double v)  { d = v; }
+    public void setObject(Object v)  { o = v; }
+    public void setArray(Object[] v) { a = v; }
+
+    public boolean  getBool()        { return bool; }
+    public byte     getByte()        { return b; }
+    public char     getChar()        { return c; }
+    public short    getShort()       { return s; }
+    public int      getInt()         { return i; }
+    public float    getFloat()       { return f; }
+    public long     getLong()        { return l; }
+    public double   getDouble()      { return d; }
+    public Object   getObject()      { return o; }
+    public Object[] getArray()       { return a; }
+
+    static void doTest() {
+        InlineAccessors o = new InlineAccessors();
+        o.setBool(false);
+        o.setByte((byte)0);
+        o.setChar('a');
+        o.setShort((short)0);
+        o.setInt(0);
+        o.setFloat(0F);
+        o.setLong(0L);
+        o.setDouble(0D);
+        o.setObject(new Object());
+        o.setArray(new Object[1]);
+
+        o.getBool();
+        o.getByte();
+        o.getChar();
+        o.getShort();
+        o.getInt();
+        o.getFloat();
+        o.getLong();
+        o.getDouble();
+        o.getObject();
+        o.getArray();
+    }
+
+    static class Launcher {
+        public static void main(String[] args) throws Exception {
+            for (int c = 0; c < 20_000; c++) {
+              doTest();
+            }
+        }
+    }
+}
--- a/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,10 +23,11 @@
 import java.lang.reflect.Executable;
 import java.util.concurrent.Callable;
 import java.util.Objects;
+import compiler.whitebox.CompilerWhiteBoxTest;
 /*
  * @test
  * @bug 8130832
- * @library /testlibrary /test/lib /compiler/whitebox /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox /compiler/testlibrary /
  * @build IntrinsicAvailableTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestI.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestI.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build AddnTestI
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestL.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestL.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build AddnTestL
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestI.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestI.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsiTestI
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestL.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsiTestL.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsiTestL
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestI.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestI.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsmskTestI
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestL.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsmskTestL.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsmskTestL
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestI.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestI.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsrTestI
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestL.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BlsrTestL.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build BlsrTestL
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/BmiIntrinsicBase.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/BmiIntrinsicBase.java	Fri Nov 13 13:31:48 2015 +0100
@@ -32,6 +32,7 @@
 import java.lang.reflect.Method;
 import java.util.concurrent.Callable;
 import java.util.function.Function;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class BmiIntrinsicBase extends CompilerWhiteBoxTest {
 
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestI.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestI.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build LZcntTestI
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestL.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/LZcntTestL.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build LZcntTestL
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestI.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestI.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build TZcntTestI
--- a/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestL.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/bmi/verifycode/TZcntTestL.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test
  * @bug 8031321
- * @library /testlibrary /test/lib /compiler/whitebox ..
+ * @library /testlibrary /test/lib /compiler/whitebox / ..
  * @modules java.base/sun.misc
  *          java.management
  * @build TZcntTestL
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build AddExactIntTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build AddExactLongTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build DecrementExactIntTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build DecrementExactLongTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build IncrementExactIntTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build IncrementExactLongTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java	Fri Nov 13 13:31:48 2015 +0100
@@ -27,6 +27,7 @@
 import java.io.FileOutputStream;
 import java.lang.reflect.Executable;
 import java.util.Properties;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public abstract class IntrinsicBase extends CompilerWhiteBoxTest {
     protected String javaVmName;
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,6 +23,7 @@
 
 import java.lang.reflect.Executable;
 import java.util.concurrent.Callable;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class MathIntrinsic {
 
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build MultiplyExactIntTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build MultiplyExactLongTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build NegateExactIntTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build NegateExactLongTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build SubtractExactIntTest
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,8 +23,7 @@
 
 /*
  * @test
- * @library /testlibrary /test/lib /compiler/whitebox
- *          /compiler/testlibrary
+ * @library /testlibrary /test/lib /compiler/whitebox / /compiler/testlibrary
  * @modules java.base/sun.misc
  *          java.management
  * @build SubtractExactLongTest
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/intrinsics/unsafe/TestUnsafeUnalignedMismatchedAccesses.java	Fri Nov 13 13:31:48 2015 +0100
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8136473
+ * @summary Mismatched stores on same slice possible with Unsafe.Put*Unaligned methods
+ * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation TestUnsafeUnalignedMismatchedAccesses
+ * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:+UnlockDiagnosticVMOptions -XX:-UseUnalignedAccesses TestUnsafeUnalignedMismatchedAccesses
+ *
+ */
+
+import java.lang.reflect.*;
+import sun.misc.Unsafe;
+
+public class TestUnsafeUnalignedMismatchedAccesses {
+
+    private static final Unsafe UNSAFE;
+
+    static {
+        try {
+            Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
+            unsafeField.setAccessible(true);
+            UNSAFE = (Unsafe) unsafeField.get(null);
+        }
+        catch (Exception e) {
+            throw new AssertionError(e);
+        }
+    }
+
+    static void test1(byte[] array) {
+        array[0] = 0;
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET, 0);
+        array[0] = 0;
+    }
+
+    static void test2(byte[] array) {
+        array[0] = 0;
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+1, 0);
+        array[0] = 0;
+    }
+
+    static void test3(byte[] array) {
+        array[0] = 0;
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+2, 0);
+        array[0] = 0;
+    }
+
+    static void test4(byte[] array) {
+        array[0] = 0;
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+3, 0);
+        array[0] = 0;
+    }
+
+    static void test5(byte[] array) {
+        array[0] = 0;
+        UNSAFE.putInt(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET, 0);
+        array[0] = 0;
+    }
+
+    // unaligned access and non escaping allocation
+    static void test6() {
+        byte[] array = new byte[10];
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+1, -1);
+        array[0] = 0;
+    }
+
+    // unaligned access and non escaping allocation
+    static int test7() {
+        byte[] array = new byte[10];
+        UNSAFE.putIntUnaligned(array, UNSAFE.ARRAY_BYTE_BASE_OFFSET+1, -1);
+        array[0] = 0;
+        array[2] = 0;
+        return array[0] + array[1] + array[2] + array[3] + array[4];
+    }
+
+    // unaligned access with vectorization
+    static void test8(int[] src1, int[] src2, int[] dst) {
+        for (int i = 0; i < dst.length-1; i++) {
+            int res = src1[i] + src2[i];
+            UNSAFE.putIntUnaligned(dst, UNSAFE.ARRAY_INT_BASE_OFFSET + i*4+1, res);
+        }
+    }
+
+    static public void main(String[] args) throws Exception {
+        byte[] byte_array = new byte[100];
+        int[] int_array = new int[100];
+        Object[] obj_array = new Object[100];
+        TestUnsafeUnalignedMismatchedAccesses test = new TestUnsafeUnalignedMismatchedAccesses();
+        for (int i = 0; i < 20000; i++) {
+            test1(byte_array);
+            test2(byte_array);
+            test3(byte_array);
+            test4(byte_array);
+            test5(byte_array);
+            test6();
+            test7();
+            test8(int_array, int_array, int_array);
+        }
+    }
+}
--- a/hotspot/test/compiler/jvmci/compilerToVM/AllocateCompileIdTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/jvmci/compilerToVM/AllocateCompileIdTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -34,7 +34,6 @@
  * @run main/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI
  *      -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
  *      -XX:-BackgroundCompilation
-        -XX:+LogCompilation
  *      compiler.jvmci.compilerToVM.AllocateCompileIdTest
  */
 
@@ -45,22 +44,21 @@
 import java.lang.reflect.Executable;
 import java.lang.reflect.Method;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.HashSet;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
-import compiler.jvmci.common.testcases.TestCase;
 import jdk.vm.ci.hotspot.CompilerToVMHelper;
 import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod;
 import jdk.test.lib.Asserts;
 import jdk.test.lib.Pair;
 import jdk.test.lib.Utils;
-import sun.hotspot.WhiteBox;
 import sun.hotspot.code.NMethod;
 
 public class AllocateCompileIdTest {
 
+    private static final int SOME_REPEAT_VALUE = 5;
     private final HashSet<Integer> ids = new HashSet<>();
 
     public static void main(String[] args) {
@@ -69,7 +67,6 @@
         createTestCasesIncorrectBci().forEach(test::runSanityIncorrectTest);
     }
 
-
     private static List<CompileCodeTestCase> createTestCasesCorrectBci() {
         List<CompileCodeTestCase> result = new ArrayList<>();
         try {
@@ -84,29 +81,29 @@
         return result;
     }
 
-
     private static List<Pair<CompileCodeTestCase, Class<? extends Throwable>>>
             createTestCasesIncorrectBci() {
         List<Pair<CompileCodeTestCase, Class<? extends Throwable>>> result
                 = new ArrayList<>();
-
         try {
             Class<?> aClass = DummyClass.class;
             Object receiver = new DummyClass();
             Method method = aClass.getMethod("dummyInstanceFunction");
             // greater than bytecode.length
-            int[] bcis = new int[] {30, 50, 200};
-            for (int bci : bcis) {
-                result.add(new Pair<>(
-                        new CompileCodeTestCase(receiver, method, bci),
-                        IllegalArgumentException.class));
-            }
-            bcis = new int[] {-4, -50, -200};
-            for (int bci : bcis) {
-                result.add(new Pair<>(
-                        new CompileCodeTestCase(receiver, method, bci),
-                        IllegalArgumentException.class));
-            }
+            byte[] bytecode = CompilerToVMHelper.getBytecode(CTVMUtilities
+                    .getResolvedMethod(method));
+            Stream.of(
+                    // greater than bytecode.length
+                    bytecode.length + 4,
+                    bytecode.length + 50,
+                    bytecode.length + 200,
+                    // negative cases
+                    -4, -50, -200)
+                    .map(bci -> new Pair<CompileCodeTestCase,
+                            Class<? extends Throwable>>(
+                            new CompileCodeTestCase(receiver, method, bci),
+                            IllegalArgumentException.class))
+                    .collect(Collectors.toList());
         } catch (NoSuchMethodException e) {
             throw new Error("TEST BUG : " + e.getMessage(), e);
         }
@@ -117,27 +114,20 @@
         System.out.println(testCase);
         Executable aMethod = testCase.executable;
         // to generate ciTypeFlow
-        System.out.println(testCase.invoke(Utils.getNullValues(aMethod.getParameterTypes())));
+        testCase.invoke(Utils.getNullValues(aMethod.getParameterTypes()));
         int bci = testCase.bci;
         HotSpotResolvedJavaMethod method = CTVMUtilities
                 .getResolvedMethod(aMethod);
-        int wbCompileID = getWBCompileID(testCase);
-        int id = CompilerToVMHelper.allocateCompileId(method, bci);
-        Asserts.assertNE(id, 0, testCase + " : zero compile id");
-
-        if (wbCompileID > 0) {
+        for (int i = 0; i < SOME_REPEAT_VALUE; ++i) {
+            int wbCompileID = getWBCompileID(testCase);
+            int id = CompilerToVMHelper.allocateCompileId(method, bci);
+            Asserts.assertNE(id, 0, testCase + " : zero compile id");
             Asserts.assertGT(id, wbCompileID, testCase
                     + " : allocated 'compile id' not  greater than existed");
-            if (!ids.add(wbCompileID)) {
-                throw new AssertionError(String.format(
-                        "%s : vm compilation allocated existed id -- %d",
-                        testCase, id));
-            }
-        }
-        if (!ids.add(id)) {
-            throw new AssertionError(String.format(
-                    "%s : allocateCompileId returned existed id %d",
-                    testCase, id));
+            Asserts.assertTrue(ids.add(wbCompileID), testCase
+                    + " : vm compilation allocated existing id " + id);
+            Asserts.assertTrue(ids.add(id), testCase
+                    + " : allocateCompileId returned existing id " + id);
         }
     }
 
@@ -156,8 +146,8 @@
 
     private int getWBCompileID(CompileCodeTestCase testCase) {
         NMethod nm = testCase.deoptimizeAndCompile();
-        if (nm == null) {
-            throw new Error("[TEST BUG] cannot compile method " + testCase);
+        if (nm == null || nm.compile_id <= 0) {
+            throw new Error("TEST BUG : cannot compile method " + testCase);
         }
         return nm.compile_id;
     }
--- a/hotspot/test/compiler/jvmci/compilerToVM/ReprofileTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/jvmci/compilerToVM/ReprofileTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -45,18 +45,15 @@
 import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Random;
+
+import compiler.whitebox.CompilerWhiteBoxTest;
 import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod;
 import jdk.vm.ci.hotspot.CompilerToVMHelper;
 import jdk.vm.ci.meta.ProfilingInfo;
 import jdk.test.lib.Asserts;
-import jdk.test.lib.Utils;
-import sun.hotspot.WhiteBox;
 
 public class ReprofileTest {
 
-    private static final WhiteBox WB = WhiteBox.getWhiteBox();
-
     public static void main(String[] args) {
         List<Method> testCases = createTestCases();
         testCases.forEach(ReprofileTest::runSanityTest);
@@ -67,10 +64,10 @@
         try {
 
             Class<?> aClass = DummyClass.class;
-            testCases.add(aClass.getMethod("withLoop"));
+            testCases.add(aClass.getMethod("dummyInstanceFunction"));
 
             aClass = DummyClass.class;
-            testCases.add(aClass.getDeclaredMethod("dummyFunction"));
+            testCases.add(aClass.getMethod("dummyFunction"));
         } catch (NoSuchMethodException e) {
             throw new Error("TEST BUG " + e.getMessage(), e);
         }
@@ -78,17 +75,17 @@
     }
 
     private static void runSanityTest(Method aMethod) {
+        System.out.println(aMethod);
         HotSpotResolvedJavaMethod method = CTVMUtilities
                 .getResolvedMethod(aMethod);
         ProfilingInfo startProfile = method.getProfilingInfo();
         Asserts.assertFalse(startProfile.isMature(), aMethod
-                + " : profiling info is mature in the begging");
+                + " : profiling info is mature in the beginning");
 
-        long compileThreshold = (Long) WB.getVMFlag("CompileThreshold");
         // make interpreter to profile this method
         try {
             Object obj = aMethod.getDeclaringClass().newInstance();
-            for (long i = 0; i < compileThreshold; i++) {
+            for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; i++) {
                 aMethod.invoke(obj);
             }
         } catch (ReflectiveOperationException e) {
@@ -99,10 +96,10 @@
         Asserts.assertNE(startProfile.toString(), compProfile.toString(),
                 String.format("%s : profiling info wasn't changed after "
                                 + "%d invocations",
-                        aMethod, compileThreshold));
+                        aMethod, CompilerWhiteBoxTest.THRESHOLD));
         Asserts.assertTrue(compProfile.isMature(),
                 String.format("%s is not mature after %d invocations",
-                        aMethod, compileThreshold));
+                        aMethod, CompilerWhiteBoxTest.THRESHOLD));
 
         CompilerToVMHelper.reprofile(method);
         ProfilingInfo reprofiledProfile = method.getProfilingInfo();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/jvmci/errors/CodeInstallerTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.jvmci.errors;
+
+import java.lang.reflect.Method;
+
+import jdk.vm.ci.code.Architecture;
+import jdk.vm.ci.code.CodeCacheProvider;
+import jdk.vm.ci.code.CompilationResult;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.meta.MetaAccessProvider;
+import jdk.vm.ci.meta.PlatformKind;
+import jdk.vm.ci.meta.ResolvedJavaMethod;
+import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
+import jdk.vm.ci.runtime.JVMCI;
+import jdk.vm.ci.runtime.JVMCIBackend;
+
+import org.junit.Assert;
+
+public class CodeInstallerTest {
+
+    protected final Architecture arch;
+    protected final CodeCacheProvider codeCache;
+    protected final MetaAccessProvider metaAccess;
+    protected final HotSpotConstantReflectionProvider constantReflection;
+
+    protected final ResolvedJavaMethod dummyMethod;
+
+    public static void dummyMethod() {
+    }
+
+    protected CodeInstallerTest() {
+        JVMCIBackend backend = JVMCI.getRuntime().getHostJVMCIBackend();
+        metaAccess = backend.getMetaAccess();
+        codeCache = backend.getCodeCache();
+        constantReflection = (HotSpotConstantReflectionProvider) backend.getConstantReflection();
+        arch = codeCache.getTarget().arch;
+
+        Method method = null;
+        try {
+            method = CodeInstallerTest.class.getMethod("dummyMethod");
+        } catch (NoSuchMethodException e) {
+            Assert.fail();
+        }
+
+        dummyMethod = metaAccess.lookupJavaMethod(method);
+    }
+
+    protected void installCode(CompilationResult result) {
+        codeCache.addCode(dummyMethod, result, null, null);
+    }
+
+    protected CompilationResult createEmptyCompilationResult() {
+        CompilationResult ret = new CompilationResult();
+        ret.setTotalFrameSize(0);
+        return ret;
+    }
+
+    protected Register getRegister(PlatformKind kind, int index) {
+        Register[] allRegs = arch.getAvailableValueRegisters();
+        for (int i = 0; i < allRegs.length; i++) {
+            if (arch.canStoreValue(allRegs[i].getRegisterCategory(), kind)) {
+                if (index-- == 0) {
+                    return allRegs[i];
+                }
+            }
+        }
+        return null;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/jvmci/errors/TestInvalidCompilationResult.java	Fri Nov 13 13:31:48 2015 +0100
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "sparcv9") & os.arch != "aarch64"
+ * @compile CodeInstallerTest.java
+ * @run junit/othervm -da:jdk.vm.ci... -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI compiler.jvmci.errors.TestInvalidCompilationResult
+ */
+
+package compiler.jvmci.errors;
+
+import static jdk.vm.ci.code.CompilationResult.ConstantReference;
+import static jdk.vm.ci.code.CompilationResult.DataPatch;
+import static jdk.vm.ci.code.CompilationResult.DataSectionReference;
+import static jdk.vm.ci.code.CompilationResult.Infopoint;
+import static jdk.vm.ci.code.CompilationResult.Reference;
+import static jdk.vm.ci.code.DataSection.Data;
+import static jdk.vm.ci.code.DataSection.DataBuilder;
+import static jdk.vm.ci.meta.Assumptions.Assumption;
+
+import jdk.vm.ci.code.CompilationResult;
+import jdk.vm.ci.code.InfopointReason;
+import jdk.vm.ci.common.JVMCIError;
+import jdk.vm.ci.hotspot.HotSpotConstant;
+import jdk.vm.ci.meta.ResolvedJavaType;
+import jdk.vm.ci.meta.VMConstant;
+
+import org.junit.Test;
+
+/**
+ * Tests for errors in the code installer.
+ */
+public class TestInvalidCompilationResult extends CodeInstallerTest {
+
+    private static class InvalidAssumption extends Assumption {
+    }
+
+    private static class InvalidVMConstant implements VMConstant {
+
+        public boolean isDefaultForKind() {
+            return false;
+        }
+
+        public String toValueString() {
+            return null;
+        }
+    }
+
+    private static class InvalidReference extends Reference {
+
+        @Override
+        public int hashCode() {
+            return 0;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            return false;
+        }
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidAssumption() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.setAssumptions(new Assumption[]{new InvalidAssumption()});
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidAlignment() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.getDataSection().insertData(new Data(7, 1, DataBuilder.zero(1)));
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullDataPatchInDataSection() {
+        CompilationResult result = createEmptyCompilationResult();
+        Data data = new Data(1, 1, (buffer, patch) -> {
+            patch.accept(null);
+            buffer.put((byte) 0);
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullReferenceInDataSection() {
+        CompilationResult result = createEmptyCompilationResult();
+        Data data = new Data(1, 1, (buffer, patch) -> {
+            patch.accept(new DataPatch(buffer.position(), null));
+            buffer.put((byte) 0);
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidDataSectionReference() {
+        CompilationResult result = createEmptyCompilationResult();
+        DataSectionReference ref = result.getDataSection().insertData(new Data(1, 1, DataBuilder.zero(1)));
+        Data data = new Data(1, 1, (buffer, patch) -> {
+            patch.accept(new DataPatch(buffer.position(), ref));
+            buffer.put((byte) 0);
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidNarrowMethodInDataSection() {
+        CompilationResult result = createEmptyCompilationResult();
+        HotSpotConstant c = (HotSpotConstant) dummyMethod.getEncoding();
+        Data data = new Data(4, 4, (buffer, patch) -> {
+            patch.accept(new DataPatch(buffer.position(), new ConstantReference((VMConstant) c.compress())));
+            buffer.putInt(0);
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullConstantInDataSection() {
+        CompilationResult result = createEmptyCompilationResult();
+        Data data = new Data(1, 1, (buffer, patch) -> {
+            patch.accept(new DataPatch(buffer.position(), new ConstantReference(null)));
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidConstantInDataSection() {
+        CompilationResult result = createEmptyCompilationResult();
+        Data data = new Data(1, 1, (buffer, patch) -> {
+            patch.accept(new DataPatch(buffer.position(), new ConstantReference(new InvalidVMConstant())));
+        });
+        result.getDataSection().insertData(data);
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullReferenceInCode() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordDataPatch(0, null);
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullConstantInCode() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordDataPatch(0, new ConstantReference(null));
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidConstantInCode() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordDataPatch(0, new ConstantReference(new InvalidVMConstant()));
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidReference() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordDataPatch(0, new InvalidReference());
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testOutOfBoundsDataSectionReference() {
+        CompilationResult result = createEmptyCompilationResult();
+        DataSectionReference ref = new DataSectionReference();
+        ref.setOffset(0x1000);
+        result.recordDataPatch(0, ref);
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidMark() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordMark(0, new Object());
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidMarkInt() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.recordMark(0, -1);
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullInfopoint() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(null);
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnknownInfopointReason() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(new Infopoint(0, null, InfopointReason.UNKNOWN));
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInfopointMissingDebugInfo() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(new Infopoint(0, null, InfopointReason.METHOD_START));
+        installCode(result);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testSafepointMissingDebugInfo() {
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(new Infopoint(0, null, InfopointReason.SAFEPOINT));
+        installCode(result);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/jvmci/errors/TestInvalidDebugInfo.java	Fri Nov 13 13:31:48 2015 +0100
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "sparcv9") & os.arch != "aarch64"
+ * @compile CodeInstallerTest.java
+ * @run junit/othervm -da:jdk.vm.ci... -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI compiler.jvmci.errors.TestInvalidDebugInfo
+ */
+
+package compiler.jvmci.errors;
+
+import static jdk.vm.ci.code.CompilationResult.Infopoint;
+
+import jdk.vm.ci.code.BytecodeFrame;
+import jdk.vm.ci.code.CompilationResult;
+import jdk.vm.ci.code.DebugInfo;
+import jdk.vm.ci.code.InfopointReason;
+import jdk.vm.ci.code.Location;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.code.StackSlot;
+import jdk.vm.ci.code.VirtualObject;
+import jdk.vm.ci.hotspot.HotSpotReferenceMap;
+import jdk.vm.ci.meta.JavaConstant;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.JavaValue;
+import jdk.vm.ci.meta.LIRKind;
+import jdk.vm.ci.meta.ResolvedJavaType;
+import jdk.vm.ci.meta.Value;
+import jdk.vm.ci.common.JVMCIError;
+
+import org.junit.Test;
+
+/**
+ * Tests for errors in debug info.
+ */
+public class TestInvalidDebugInfo extends CodeInstallerTest {
+
+    private static class UnknownJavaValue implements JavaValue {
+    }
+
+    private void test(JavaValue[] values, JavaKind[] slotKinds, int locals, int stack, int locks) {
+        test(null, values, slotKinds, locals, stack, locks);
+    }
+
+    private void test(VirtualObject[] vobj, JavaValue[] values, JavaKind[] slotKinds, int locals, int stack, int locks) {
+        BytecodeFrame frame = new BytecodeFrame(null, dummyMethod, 0, false, false, values, slotKinds, locals, stack, locks);
+        DebugInfo info = new DebugInfo(frame, vobj);
+        info.setReferenceMap(new HotSpotReferenceMap(new Location[0], new Location[0], new int[0], 8));
+
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(new Infopoint(0, info, InfopointReason.SAFEPOINT));
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullValues() {
+        test(null, new JavaKind[0], 0, 0, 0);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullSlotKinds() {
+        test(new JavaValue[0], null, 0, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedScopeValuesLength() {
+        test(new JavaValue[]{JavaConstant.FALSE}, new JavaKind[0], 0, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedScopeSlotKindsLength() {
+        test(new JavaValue[0], new JavaKind[]{JavaKind.Boolean}, 0, 0, 0);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullValue() {
+        test(new JavaValue[]{null}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullSlotKind() {
+        test(new JavaValue[]{JavaConstant.INT_0}, new JavaKind[]{null}, 1, 0, 0);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullMonitor() {
+        test(new JavaValue[]{null}, new JavaKind[0], 0, 0, 1);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testWrongMonitorType() {
+        test(new JavaValue[]{JavaConstant.INT_0}, new JavaKind[0], 0, 0, 1);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedIllegalValue() {
+        test(new JavaValue[]{Value.ILLEGAL}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedTypeInCPURegister() {
+        Register reg = getRegister(arch.getPlatformKind(JavaKind.Int), 0);
+        test(new JavaValue[]{reg.asValue()}, new JavaKind[]{JavaKind.Illegal}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedTypeInFloatRegister() {
+        Register reg = getRegister(arch.getPlatformKind(JavaKind.Float), 0);
+        test(new JavaValue[]{reg.asValue()}, new JavaKind[]{JavaKind.Illegal}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedTypeOnStack() {
+        LIRKind kind = codeCache.getTarget().getLIRKind(JavaKind.Int);
+        StackSlot value = StackSlot.get(kind, 8, false);
+        test(new JavaValue[]{value}, new JavaKind[]{JavaKind.Illegal}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testWrongConstantType() {
+        test(new JavaValue[]{JavaConstant.INT_0}, new JavaKind[]{JavaKind.Object}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnsupportedConstantType() {
+        test(new JavaValue[]{JavaConstant.forShort((short) 0)}, new JavaKind[]{JavaKind.Short}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedNull() {
+        test(new JavaValue[]{JavaConstant.NULL_POINTER}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedObject() {
+        JavaValue wrapped = constantReflection.forObject(this);
+        test(new JavaValue[]{wrapped}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnknownJavaValue() {
+        test(new JavaValue[]{new UnknownJavaValue()}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testMissingIllegalAfterDouble() {
+        test(new JavaValue[]{JavaConstant.DOUBLE_0, JavaConstant.INT_0}, new JavaKind[]{JavaKind.Double, JavaKind.Int}, 2, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidVirtualObjectId() {
+        ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class);
+        VirtualObject o = VirtualObject.get(obj, 5);
+        o.setValues(new JavaValue[0], new JavaKind[0]);
+
+        test(new VirtualObject[]{o}, new JavaValue[0], new JavaKind[0], 0, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testDuplicateVirtualObject() {
+        ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class);
+        VirtualObject o1 = VirtualObject.get(obj, 0);
+        o1.setValues(new JavaValue[0], new JavaKind[0]);
+
+        VirtualObject o2 = VirtualObject.get(obj, 0);
+        o2.setValues(new JavaValue[0], new JavaKind[0]);
+
+        test(new VirtualObject[]{o1, o2}, new JavaValue[0], new JavaKind[0], 0, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUnexpectedVirtualObject() {
+        ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class);
+        VirtualObject o = VirtualObject.get(obj, 0);
+        o.setValues(new JavaValue[0], new JavaKind[0]);
+
+        test(new VirtualObject[]{o}, new JavaValue[]{o}, new JavaKind[]{JavaKind.Int}, 1, 0, 0);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testUndefinedVirtualObject() {
+        ResolvedJavaType obj = metaAccess.lookupJavaType(Object.class);
+        VirtualObject o0 = VirtualObject.get(obj, 0);
+        o0.setValues(new JavaValue[0], new JavaKind[0]);
+
+        VirtualObject o1 = VirtualObject.get(obj, 1);
+        o1.setValues(new JavaValue[0], new JavaKind[0]);
+
+        test(new VirtualObject[]{o0}, new JavaValue[]{o1}, new JavaKind[]{JavaKind.Object}, 1, 0, 0);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/jvmci/errors/TestInvalidOopMap.java	Fri Nov 13 13:31:48 2015 +0100
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @requires (os.simpleArch == "x64" | os.simpleArch == "sparcv9") & os.arch != "aarch64"
+ * @compile CodeInstallerTest.java
+ * @run junit/othervm -da:jdk.vm.ci... -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI compiler.jvmci.errors.TestInvalidOopMap
+ */
+
+package compiler.jvmci.errors;
+
+import static jdk.vm.ci.code.CompilationResult.Infopoint;
+
+import jdk.vm.ci.code.BytecodePosition;
+import jdk.vm.ci.code.CompilationResult;
+import jdk.vm.ci.code.DebugInfo;
+import jdk.vm.ci.code.InfopointReason;
+import jdk.vm.ci.code.Location;
+import jdk.vm.ci.code.ReferenceMap;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.hotspot.HotSpotReferenceMap;
+import jdk.vm.ci.hotspot.HotSpotVMConfig;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.LIRKind;
+import jdk.vm.ci.meta.PlatformKind;
+import jdk.vm.ci.common.JVMCIError;
+
+import org.junit.Test;
+
+/**
+ * Tests for errors in oop maps.
+ */
+public class TestInvalidOopMap extends CodeInstallerTest {
+
+    private static class InvalidReferenceMap extends ReferenceMap {
+    }
+
+    private void test(ReferenceMap refMap) {
+        BytecodePosition pos = new BytecodePosition(null, dummyMethod, 0);
+        DebugInfo info = new DebugInfo(pos);
+        info.setReferenceMap(refMap);
+
+        CompilationResult result = createEmptyCompilationResult();
+        result.addInfopoint(new Infopoint(0, info, InfopointReason.SAFEPOINT));
+        installCode(result);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testMissingReferenceMap() {
+        test(null);
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidReferenceMap() {
+        test(new InvalidReferenceMap());
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullOops() {
+        test(new HotSpotReferenceMap(null, new Location[0], new int[0], 8));
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullBase() {
+        test(new HotSpotReferenceMap(new Location[0], null, new int[0], 8));
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullSize() {
+        test(new HotSpotReferenceMap(new Location[0], new Location[0], null, 8));
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidLength() {
+        test(new HotSpotReferenceMap(new Location[1], new Location[2], new int[3], 8));
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidShortOop() {
+        PlatformKind kind = arch.getPlatformKind(JavaKind.Short);
+        Register reg = getRegister(kind, 0);
+
+        Location[] oops = new Location[]{Location.register(reg)};
+        Location[] base = new Location[]{null};
+        int[] size = new int[]{kind.getSizeInBytes()};
+
+        test(new HotSpotReferenceMap(oops, base, size, 8));
+    }
+
+    @Test(expected = JVMCIError.class)
+    public void testInvalidNarrowDerivedOop() {
+        if (!HotSpotVMConfig.config().useCompressedOops) {
+            throw new JVMCIError("skipping test");
+        }
+
+        PlatformKind kind = arch.getPlatformKind(JavaKind.Int);
+        Register reg = getRegister(kind, 0);
+        Register baseReg = getRegister(arch.getPlatformKind(JavaKind.Object), 1);
+
+        Location[] oops = new Location[]{Location.register(reg)};
+        Location[] base = new Location[]{Location.register(baseReg)};
+        int[] size = new int[]{kind.getSizeInBytes()};
+
+        test(new HotSpotReferenceMap(oops, base, size, 8));
+    }
+}
--- a/hotspot/test/compiler/jvmci/events/JvmciNotifyInstallEventTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/jvmci/events/JvmciNotifyInstallEventTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -111,6 +111,8 @@
         Asserts.assertEQ(gotInstallNotification, 1,
                 "Got unexpected event count after 1st install attempt");
         // since "empty" compilation result is ok, a second attempt should be ok
+        compResult = new CompilationResult(METHOD_NAME); // create another instance with fresh state
+        compResult.setTotalFrameSize(0);
         codeCache.installCode(compRequest, compResult, /* installedCode = */ null, /* speculationLog = */ null,
                 /* isDefault = */ false);
         Asserts.assertEQ(gotInstallNotification, 2,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/loopopts/superword/TestBestAlign.java	Fri Nov 13 13:31:48 2015 +0100
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2015 SAP AG.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8141624
+ * @summary Limit calculation of pre loop during super word optimization is wrong
+ * @run main/othervm TestBestAlign
+ * @author gunter.haug@sap.com
+ */
+
+public class TestBestAlign {
+
+    static final int initVal = -1;
+    static int intArray [];
+    static boolean boolArray[];
+    static int limit;
+    static public void clear() {
+        for (int i = 0; i < limit; i++) {
+            boolArray[1] = true;
+            intArray[i] = initVal;
+            boolArray[2] = true;
+        }
+    }
+
+    public static void main(String argv[]) throws Exception {
+        limit = 64;
+        boolArray = new boolean[8];
+        intArray = new int[limit + 4];
+        for (int i = 0; i < 10000000; ++i) {
+            if(i % 1000000 == 0)
+                System.out.println(i);
+            clear();
+        }
+    }
+}
--- a/hotspot/test/compiler/rangechecks/TestExplicitRangeChecks.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/rangechecks/TestExplicitRangeChecks.java	Fri Nov 13 13:31:48 2015 +0100
@@ -25,7 +25,7 @@
  * @test
  * @bug 8073480
  * @summary explicit range checks should be recognized by C2
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @build  TestExplicitRangeChecks
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  * @run main ClassFileInstaller jdk.test.lib.Platform
@@ -41,6 +41,7 @@
 import sun.hotspot.code.NMethod;
 import jdk.test.lib.Platform;
 import sun.misc.Unsafe;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class TestExplicitRangeChecks {
 
--- a/hotspot/test/compiler/rangechecks/TestRangeCheckSmearing.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/rangechecks/TestRangeCheckSmearing.java	Fri Nov 13 13:31:48 2015 +0100
@@ -25,7 +25,7 @@
  * @test
  * @bug 8066103
  * @summary C2's range check smearing allows out of bound array accesses
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.base/sun.misc
  *          java.management
  * @build TestRangeCheckSmearing
@@ -42,6 +42,7 @@
 import sun.hotspot.WhiteBox;
 import sun.hotspot.code.NMethod;
 import jdk.test.lib.Platform;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class TestRangeCheckSmearing {
     private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/rangechecks/TestUncommonTrapMerging.java	Fri Nov 13 13:31:48 2015 +0100
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8140574
+ * @summary Verify proper re-execution of checks after merging of uncommon traps
+ * @run main/othervm -Xcomp -XX:-TieredCompilation -XX:CompileCommand=compileonly,TestUncommonTrapMerging::test* TestUncommonTrapMerging Test1
+ * @run main/othervm -XX:CompileCommand=compileonly,TestUncommonTrapMerging::test* TestUncommonTrapMerging Test2
+ */
+public class TestUncommonTrapMerging {
+
+    public static void main(String[] args) throws Throwable {
+        if (args.length < 1) {
+            throw new RuntimeException("Not enough arguments!");
+        }
+        TestUncommonTrapMerging mytest = new TestUncommonTrapMerging();
+        String testcase = args[0];
+        if (testcase.equals("Test1")) {
+            try {
+                // '42' should hit the 'arg > 0' check
+                mytest.test(42);
+
+            } catch (OutOfMemoryError e) {
+                // expected
+            }
+        } else if (testcase.equals("Test2")) {
+            // Compile test2 with uncommon traps at path 1 and path 2
+            for (int i = 0; i < 100_000; i++) {
+                mytest.test2(-1, 0);
+            }
+
+            // Compile test3 which inlines test2 with uncommon traps at
+            // path 1 and path 2. Because test3 always passes 'value = 1',
+            // C2 will remove the 'value > 0' check and then merge the two
+            // uncommon traps.
+            for (int i = 0; i < 100_000; i++) {
+                mytest.test3(0);
+            }
+
+            // This should return through path 2
+            if (!mytest.test3(42)) {
+                throw new RuntimeException("test2 returned through wrong path!");
+            }
+        }
+    }
+
+    public void test(int arg) throws Throwable {
+        // The following two checks should not be merged if the
+        // uncommon trap of the dominating if has 'Reason_unloaded'
+        // because we need to re-execute both checks after deopt.
+        if (arg < 0) {
+            throw new RuntimeException("Should not reach here");
+        } else if (arg > 0) {
+            throw new OutOfMemoryError();
+        }
+        throw new RuntimeException("Should not reach here");
+    }
+
+    public boolean test2(int arg, int value) {
+        if (arg < 0) {
+            if (value > 0) {
+                // path 1
+                return false;
+            }
+        } else if (arg > 0) {
+            // path 2
+            return true;
+        }
+        // path 3
+        return false;
+    }
+
+    public boolean test3(int arg) {
+        int i;
+        for (i = 0; i < 1; ++i) { }
+        // i == 1
+        return test2(arg, i);
+    }
+}
--- a/hotspot/test/compiler/tiered/CompLevelsTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/tiered/CompLevelsTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -26,6 +26,9 @@
  *
  * @author igor.ignatyev@oracle.com
  */
+
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 public abstract class CompLevelsTest extends CompilerWhiteBoxTest {
     protected CompLevelsTest(TestCase testCase) {
         super(testCase);
--- a/hotspot/test/compiler/tiered/ConstantGettersTransitionsTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/tiered/ConstantGettersTransitionsTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -23,10 +23,11 @@
 
 import java.lang.reflect.Executable;
 import java.util.concurrent.Callable;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /**
  * @test ConstantGettersTransitionsTest
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.base/sun.misc
  *          java.management
  * @build TransitionsTestExecutor ConstantGettersTransitionsTest
--- a/hotspot/test/compiler/tiered/LevelTransitionTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/tiered/LevelTransitionTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -25,10 +25,12 @@
 import java.lang.reflect.Method;
 import java.util.Objects;
 import java.util.concurrent.Callable;
+import compiler.whitebox.CompilerWhiteBoxTest;
+import compiler.whitebox.SimpleTestCase;
 
 /**
  * @test LevelTransitionTest
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.base/sun.misc
  *          java.management
  * @ignore 8067651
@@ -36,7 +38,7 @@
  * @run main ClassFileInstaller sun.hotspot.WhiteBox sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main/othervm/timeout=240 -Xmixed -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+WhiteBoxAPI -XX:+TieredCompilation
- *                   -XX:CompileCommand=compileonly,SimpleTestCase$Helper::*
+ *                   -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::*
  *                   -XX:CompileCommand=compileonly,ExtendedTestCase$CompileMethodHolder::*
  *                   TransitionsTestExecutor LevelTransitionTest
  * @summary Test the correctness of compilation level transitions for different methods
--- a/hotspot/test/compiler/tiered/NonTieredLevelsTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/tiered/NonTieredLevelsTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -22,17 +22,18 @@
  */
 
 import java.util.function.IntPredicate;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /**
  * @test NonTieredLevelsTest
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.management
  * @build NonTieredLevelsTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main/othervm -Xbootclasspath/a:. -XX:-TieredCompilation
  *                   -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
- *                   -XX:CompileCommand=compileonly,SimpleTestCase$Helper::*
+ *                   -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::*
  *                   NonTieredLevelsTest
  * @summary Verify that only one level can be used
  * @author igor.ignatyev@oracle.com
--- a/hotspot/test/compiler/tiered/TieredLevelsTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/tiered/TieredLevelsTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -21,16 +21,18 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /**
  * @test TieredLevelsTest
- * @library /testlibrary /test/lib /compiler/whitebox
+ * @library /testlibrary /test/lib /compiler/whitebox /
  * @modules java.management
  * @build TieredLevelsTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main/othervm -Xbootclasspath/a:. -XX:+TieredCompilation
  *                   -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
- *                   -XX:CompileCommand=compileonly,SimpleTestCase$Helper::*
+ *                   -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::*
  *                   TieredLevelsTest
  * @summary Verify that all levels &lt; 'TieredStopAtLevel' can be used
  * @author igor.ignatyev@oracle.com
--- a/hotspot/test/compiler/tiered/TransitionsTestExecutor.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/tiered/TransitionsTestExecutor.java	Fri Nov 13 13:31:48 2015 +0100
@@ -29,6 +29,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /**
  * Executes given test in a separate VM with enabled Tiered Compilation for
--- a/hotspot/test/compiler/whitebox/ClearMethodStateTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/ClearMethodStateTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -22,16 +22,17 @@
  */
 
 import java.util.function.Function;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /*
  * @test ClearMethodStateTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build ClearMethodStateTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* ClearMethodStateTest
+ * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* ClearMethodStateTest
  * @summary testing of WB::clearMethodState()
  * @author igor.ignatyev@oracle.com
  */
--- a/hotspot/test/compiler/whitebox/CompilerWhiteBoxTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/CompilerWhiteBoxTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -20,13 +20,11 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
+package compiler.whitebox;
 
 import sun.hotspot.WhiteBox;
 import sun.hotspot.code.NMethod;
-
-import java.lang.reflect.Constructor;
 import java.lang.reflect.Executable;
-import java.lang.reflect.Method;
 import java.util.Objects;
 import java.util.concurrent.Callable;
 import java.util.function.Function;
@@ -38,19 +36,19 @@
  */
 public abstract class CompilerWhiteBoxTest {
     /** {@code CompLevel::CompLevel_none} -- Interpreter */
-    protected static final int COMP_LEVEL_NONE = 0;
+    public static final int COMP_LEVEL_NONE = 0;
     /** {@code CompLevel::CompLevel_any}, {@code CompLevel::CompLevel_all} */
-    protected static final int COMP_LEVEL_ANY = -1;
+    public static final int COMP_LEVEL_ANY = -1;
     /** {@code CompLevel::CompLevel_simple} -- C1 */
-    protected static final int COMP_LEVEL_SIMPLE = 1;
+    public static final int COMP_LEVEL_SIMPLE = 1;
     /** {@code CompLevel::CompLevel_limited_profile} -- C1, invocation &amp; backedge counters */
-    protected static final int COMP_LEVEL_LIMITED_PROFILE = 2;
+    public static final int COMP_LEVEL_LIMITED_PROFILE = 2;
     /** {@code CompLevel::CompLevel_full_profile} -- C1, invocation &amp; backedge counters + mdo */
-    protected static final int COMP_LEVEL_FULL_PROFILE = 3;
+    public static final int COMP_LEVEL_FULL_PROFILE = 3;
     /** {@code CompLevel::CompLevel_full_optimization} -- C2 or Shark */
-    protected static final int COMP_LEVEL_FULL_OPTIMIZATION = 4;
+    public static final int COMP_LEVEL_FULL_OPTIMIZATION = 4;
     /** Maximal value for CompLevel */
-    protected static final int COMP_LEVEL_MAX = COMP_LEVEL_FULL_OPTIMIZATION;
+    public static final int COMP_LEVEL_MAX = COMP_LEVEL_FULL_OPTIMIZATION;
 
     /** Instance of WhiteBox */
     protected static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
@@ -70,7 +68,7 @@
     protected static final boolean IS_VERBOSE
             = System.getProperty("verbose") != null;
     /** invocation count to trigger compilation */
-    protected static final int THRESHOLD;
+    public static final int THRESHOLD;
     /** invocation count to trigger OSR compilation */
     protected static final long BACKEDGE_THRESHOLD;
     /** Value of {@code java.vm.info} (interpreted|mixed|comp mode) */
@@ -312,7 +310,7 @@
      *
      * @param executable Executable
      */
-    protected static final void waitBackgroundCompilation(Executable executable) {
+    public static final void waitBackgroundCompilation(Executable executable) {
         if (!BACKGROUND_COMPILATION) {
             return;
         }
@@ -441,7 +439,7 @@
      * @return {@code true} if the test should be skipped,
      *         {@code false} otherwise
      */
-    protected static boolean skipOnTieredCompilation(boolean value) {
+    public static boolean skipOnTieredCompilation(boolean value) {
         if (value == CompilerWhiteBoxTest.TIERED_COMPILATION) {
             System.err.println("Test isn't applicable w/ "
                     + (value ? "enabled" : "disabled")
@@ -452,256 +450,3 @@
     }
 }
 
-enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase {
-    /** constructor test case */
-    CONSTRUCTOR_TEST(Helper.CONSTRUCTOR, Helper.CONSTRUCTOR_CALLABLE, false),
-    /** method test case */
-    METHOD_TEST(Helper.METHOD, Helper.METHOD_CALLABLE, false),
-    /** static method test case */
-    STATIC_TEST(Helper.STATIC, Helper.STATIC_CALLABLE, false),
-    /** OSR constructor test case */
-    OSR_CONSTRUCTOR_TEST(Helper.OSR_CONSTRUCTOR,
-            Helper.OSR_CONSTRUCTOR_CALLABLE, true),
-    /** OSR method test case */
-    OSR_METHOD_TEST(Helper.OSR_METHOD, Helper.OSR_METHOD_CALLABLE, true),
-    /** OSR static method test case */
-    OSR_STATIC_TEST(Helper.OSR_STATIC, Helper.OSR_STATIC_CALLABLE, true);
-
-    private final Executable executable;
-    private final Callable<Integer> callable;
-    private final boolean isOsr;
-
-    private SimpleTestCase(Executable executable, Callable<Integer> callable,
-            boolean isOsr) {
-        this.executable = executable;
-        this.callable = callable;
-        this.isOsr = isOsr;
-    }
-
-    @Override
-    public Executable getExecutable() {
-        return executable;
-    }
-
-    @Override
-    public Callable<Integer> getCallable() {
-        return callable;
-    }
-
-    @Override
-    public boolean isOsr() {
-        return isOsr;
-    }
-
-    private static class Helper {
-
-        private static final Callable<Integer> CONSTRUCTOR_CALLABLE
-                = new Callable<Integer>() {
-            @Override
-            public Integer call() throws Exception {
-                return new Helper(1337).hashCode();
-            }
-        };
-
-        private static final Callable<Integer> METHOD_CALLABLE
-                = new Callable<Integer>() {
-            private final Helper helper = new Helper();
-
-            @Override
-            public Integer call() throws Exception {
-                return helper.method();
-            }
-        };
-
-        private static final Callable<Integer> STATIC_CALLABLE
-                = new Callable<Integer>() {
-            @Override
-            public Integer call() throws Exception {
-                return staticMethod();
-            }
-        };
-
-        private static final Callable<Integer> OSR_CONSTRUCTOR_CALLABLE
-                = new Callable<Integer>() {
-            @Override
-            public Integer call() throws Exception {
-                return new Helper(null, CompilerWhiteBoxTest.BACKEDGE_THRESHOLD).hashCode();
-            }
-        };
-
-        private static final Callable<Integer> OSR_METHOD_CALLABLE
-                = new Callable<Integer>() {
-            private final Helper helper = new Helper();
-
-            @Override
-            public Integer call() throws Exception {
-                return helper.osrMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD);
-            }
-        };
-
-        private static final Callable<Integer> OSR_STATIC_CALLABLE
-                = new Callable<Integer>() {
-            @Override
-            public Integer call() throws Exception {
-                return osrStaticMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD);
-            }
-        };
-
-        private static final Constructor CONSTRUCTOR;
-        private static final Constructor OSR_CONSTRUCTOR;
-        private static final Method METHOD;
-        private static final Method STATIC;
-        private static final Method OSR_METHOD;
-        private static final Method OSR_STATIC;
-
-        static {
-            try {
-                CONSTRUCTOR = Helper.class.getDeclaredConstructor(int.class);
-            } catch (NoSuchMethodException | SecurityException e) {
-                throw new RuntimeException(
-                        "exception on getting method Helper.<init>(int)", e);
-            }
-            try {
-                OSR_CONSTRUCTOR = Helper.class.getDeclaredConstructor(
-                        Object.class, long.class);
-            } catch (NoSuchMethodException | SecurityException e) {
-                throw new RuntimeException(
-                        "exception on getting method Helper.<init>(Object, long)", e);
-            }
-            METHOD = getMethod("method");
-            STATIC = getMethod("staticMethod");
-            OSR_METHOD = getMethod("osrMethod", long.class);
-            OSR_STATIC = getMethod("osrStaticMethod", long.class);
-        }
-
-        private static Method getMethod(String name, Class<?>... parameterTypes) {
-            try {
-                return Helper.class.getDeclaredMethod(name, parameterTypes);
-            } catch (NoSuchMethodException | SecurityException e) {
-                throw new RuntimeException(
-                        "exception on getting method Helper." + name, e);
-            }
-        }
-
-        private static int staticMethod() {
-            return 1138;
-        }
-
-        private int method() {
-            return 42;
-        }
-
-        /**
-         * Deoptimizes all non-osr versions of the given executable after
-         * compilation finished.
-         *
-         * @param e Executable
-         * @throws Exception
-         */
-        private static void waitAndDeoptimize(Executable e) {
-            CompilerWhiteBoxTest.waitBackgroundCompilation(e);
-            if (WhiteBox.getWhiteBox().isMethodQueuedForCompilation(e)) {
-                throw new RuntimeException(e + " must not be in queue");
-            }
-            // Deoptimize non-osr versions of executable
-            WhiteBox.getWhiteBox().deoptimizeMethod(e, false);
-        }
-
-        /**
-         * Executes the method multiple times to make sure we have
-         * enough profiling information before triggering an OSR
-         * compilation. Otherwise the C2 compiler may add uncommon traps.
-         *
-         * @param m Method to be executed
-         * @return Number of times the method was executed
-         * @throws Exception
-         */
-        private static int warmup(Method m) throws Exception {
-            waitAndDeoptimize(m);
-            Helper helper = new Helper();
-            int result = 0;
-            for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) {
-                result += (int)m.invoke(helper, 1);
-            }
-            // Wait to make sure OSR compilation is not blocked by
-            // non-OSR compilation in the compile queue
-            CompilerWhiteBoxTest.waitBackgroundCompilation(m);
-            return result;
-        }
-
-        /**
-         * Executes the constructor multiple times to make sure we
-         * have enough profiling information before triggering an OSR
-         * compilation. Otherwise the C2 compiler may add uncommon traps.
-         *
-         * @param c Constructor to be executed
-         * @return Number of times the constructor was executed
-         * @throws Exception
-         */
-        private static int warmup(Constructor c) throws Exception {
-            waitAndDeoptimize(c);
-            int result = 0;
-            for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) {
-                result += c.newInstance(null, 1).hashCode();
-            }
-            // Wait to make sure OSR compilation is not blocked by
-            // non-OSR compilation in the compile queue
-            CompilerWhiteBoxTest.waitBackgroundCompilation(c);
-            return result;
-        }
-
-        private static int osrStaticMethod(long limit) throws Exception {
-            int result = 0;
-            if (limit != 1) {
-                result = warmup(OSR_STATIC);
-            }
-            // Trigger osr compilation
-            for (long i = 0; i < limit; ++i) {
-                result += staticMethod();
-            }
-            return result;
-        }
-
-        private int osrMethod(long limit) throws Exception {
-            int result = 0;
-            if (limit != 1) {
-                result = warmup(OSR_METHOD);
-            }
-            // Trigger osr compilation
-            for (long i = 0; i < limit; ++i) {
-                result += method();
-            }
-            return result;
-        }
-
-        private final int x;
-
-        // for method and OSR method test case
-        public Helper() {
-            x = 0;
-        }
-
-        // for OSR constructor test case
-        private Helper(Object o, long limit) throws Exception {
-            int result = 0;
-            if (limit != 1) {
-                result = warmup(OSR_CONSTRUCTOR);
-            }
-            // Trigger osr compilation
-            for (long i = 0; i < limit; ++i) {
-                result += method();
-            }
-            x = result;
-        }
-
-        // for constructor test case
-        private Helper(int x) {
-            this.x = x;
-        }
-
-        @Override
-        public int hashCode() {
-            return x;
-        }
-    }
-}
--- a/hotspot/test/compiler/whitebox/DeoptimizeAllTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/DeoptimizeAllTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test DeoptimizeAllTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build DeoptimizeAllTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* DeoptimizeAllTest
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* DeoptimizeAllTest
  * @summary testing of WB::deoptimizeAll()
  * @author igor.ignatyev@oracle.com
  */
--- a/hotspot/test/compiler/whitebox/DeoptimizeFramesTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/DeoptimizeFramesTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -21,10 +21,12 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test DeoptimizeFramesTest
  * @bug 8028595
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build DeoptimizeFramesTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
--- a/hotspot/test/compiler/whitebox/DeoptimizeMethodTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/DeoptimizeMethodTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test DeoptimizeMethodTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build DeoptimizeMethodTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* DeoptimizeMethodTest
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* DeoptimizeMethodTest
  * @summary testing of WB::deoptimizeMethod()
  * @author igor.ignatyev@oracle.com
  */
--- a/hotspot/test/compiler/whitebox/DeoptimizeMultipleOSRTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/DeoptimizeMultipleOSRTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,11 +24,12 @@
 import sun.hotspot.WhiteBox;
 import java.lang.reflect.Executable;
 import java.lang.reflect.Method;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /*
  * @test DeoptimizeMultipleOSRTest
  * @bug 8061817
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build DeoptimizeMultipleOSRTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
--- a/hotspot/test/compiler/whitebox/EnqueueMethodForCompilationTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/EnqueueMethodForCompilationTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test EnqueueMethodForCompilationTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build EnqueueMethodForCompilationTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* EnqueueMethodForCompilationTest
+ * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* EnqueueMethodForCompilationTest
  * @summary testing of WB::enqueueMethodForCompilation()
  * @author igor.ignatyev@oracle.com
  */
--- a/hotspot/test/compiler/whitebox/ForceNMethodSweepTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/ForceNMethodSweepTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -30,18 +30,19 @@
 
 import jdk.test.lib.Asserts;
 import jdk.test.lib.InfiniteLoop;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /*
  * @test
  * @bug 8059624 8064669
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build ForceNMethodSweepTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:-TieredCompilation -XX:+WhiteBoxAPI
- *                   -XX:CompileCommand=compileonly,SimpleTestCase$Helper::*
+ *                   -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::*
  *                   -XX:-BackgroundCompilation ForceNMethodSweepTest
  * @summary testing of WB::forceNMethodSweep
  */
--- a/hotspot/test/compiler/whitebox/GetNMethodTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/GetNMethodTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -25,16 +25,17 @@
 import sun.hotspot.code.BlobType;
 import sun.hotspot.code.NMethod;
 import jdk.test.lib.Asserts;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 /*
  * @test GetNMethodTest
  * @bug 8038240
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build GetNMethodTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* GetNMethodTest
+ * @run main/othervm -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* GetNMethodTest
  * @summary testing of WB::getNMethod()
  * @author igor.ignatyev@oracle.com
  */
--- a/hotspot/test/compiler/whitebox/IsMethodCompilableTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/IsMethodCompilableTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,7 +24,7 @@
 /*
  * @test IsMethodCompilableTest
  * @bug 8007270 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.base/sun.misc
  *          java.management
  * @build jdk.test.lib.* sun.hotspot.WhiteBox
@@ -32,12 +32,13 @@
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main ClassFileInstaller jdk.test.lib.Platform
- * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -Xmixed -XX:-TieredCompilation -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:PerMethodRecompilationCutoff=3 -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* IsMethodCompilableTest
+ * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -Xmixed -XX:-TieredCompilation -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:PerMethodRecompilationCutoff=3 -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* IsMethodCompilableTest
  * @summary testing of WB::isMethodCompilable()
  * @author igor.ignatyev@oracle.com
  */
 
 import jdk.test.lib.Platform;
+import compiler.whitebox.CompilerWhiteBoxTest;
 
 public class IsMethodCompilableTest extends CompilerWhiteBoxTest {
     /**
--- a/hotspot/test/compiler/whitebox/LockCompilationTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/LockCompilationTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -24,12 +24,12 @@
 /*
  * @test LockCompilationTest
  * @bug 8059624
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build LockCompilationTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* LockCompilationTest
+ * @run main/othervm/timeout=600 -Xbootclasspath/a:. -Xmixed -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* LockCompilationTest
  * @summary testing of WB::lock/unlockCompilation()
  */
 
@@ -37,7 +37,7 @@
 import java.io.PrintWriter;
 import java.util.concurrent.BrokenBarrierException;
 import java.util.concurrent.CyclicBarrier;
-
+import compiler.whitebox.CompilerWhiteBoxTest;
 import jdk.test.lib.Asserts;
 
 public class LockCompilationTest extends CompilerWhiteBoxTest {
--- a/hotspot/test/compiler/whitebox/MakeMethodNotCompilableTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/MakeMethodNotCompilableTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test MakeMethodNotCompilableTest
  * @bug 8012322 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build MakeMethodNotCompilableTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* MakeMethodNotCompilableTest
+ * @run main/othervm/timeout=2400 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* MakeMethodNotCompilableTest
  * @summary testing of WB::makeMethodNotCompilable()
  * @author igor.ignatyev@oracle.com
  */
--- a/hotspot/test/compiler/whitebox/SetDontInlineMethodTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/SetDontInlineMethodTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test SetDontInlineMethodTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build SetDontInlineMethodTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* SetDontInlineMethodTest
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* SetDontInlineMethodTest
  * @summary testing of WB::testSetDontInlineMethod()
  * @author igor.ignatyev@oracle.com
  */
--- a/hotspot/test/compiler/whitebox/SetForceInlineMethodTest.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/compiler/whitebox/SetForceInlineMethodTest.java	Fri Nov 13 13:31:48 2015 +0100
@@ -21,15 +21,17 @@
  * questions.
  */
 
+import compiler.whitebox.CompilerWhiteBoxTest;
+
 /*
  * @test SetForceInlineMethodTest
  * @bug 8006683 8007288 8022832
- * @library /testlibrary /test/lib
+ * @library /testlibrary /test/lib /
  * @modules java.management
  * @build SetForceInlineMethodTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,SimpleTestCase$Helper::* SetForceInlineMethodTest
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCase$Helper::* SetForceInlineMethodTest
  * @summary testing of WB::testSetForceInlineMethod()
  * @author igor.ignatyev@oracle.com
  */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/whitebox/SimpleTestCase.java	Fri Nov 13 13:31:48 2015 +0100
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.whitebox;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Executable;
+import java.lang.reflect.Method;
+import java.util.concurrent.Callable;
+import sun.hotspot.WhiteBox;
+
+public enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase {
+    /** constructor test case */
+    CONSTRUCTOR_TEST(Helper.CONSTRUCTOR, Helper.CONSTRUCTOR_CALLABLE, false),
+    /** method test case */
+    METHOD_TEST(Helper.METHOD, Helper.METHOD_CALLABLE, false),
+    /** static method test case */
+    STATIC_TEST(Helper.STATIC, Helper.STATIC_CALLABLE, false),
+    /** OSR constructor test case */
+    OSR_CONSTRUCTOR_TEST(Helper.OSR_CONSTRUCTOR,
+            Helper.OSR_CONSTRUCTOR_CALLABLE, true),
+    /** OSR method test case */
+    OSR_METHOD_TEST(Helper.OSR_METHOD, Helper.OSR_METHOD_CALLABLE, true),
+    /** OSR static method test case */
+    OSR_STATIC_TEST(Helper.OSR_STATIC, Helper.OSR_STATIC_CALLABLE, true);
+
+    private final Executable executable;
+    private final Callable<Integer> callable;
+    private final boolean isOsr;
+
+    private SimpleTestCase(Executable executable, Callable<Integer> callable,
+            boolean isOsr) {
+        this.executable = executable;
+        this.callable = callable;
+        this.isOsr = isOsr;
+    }
+
+    @Override
+    public Executable getExecutable() {
+        return executable;
+    }
+
+    @Override
+    public Callable<Integer> getCallable() {
+        return callable;
+    }
+
+    @Override
+    public boolean isOsr() {
+        return isOsr;
+    }
+
+    private static class Helper {
+
+        private static final Callable<Integer> CONSTRUCTOR_CALLABLE
+                = new Callable<Integer>() {
+            @Override
+            public Integer call() throws Exception {
+                return new Helper(1337).hashCode();
+            }
+        };
+
+        private static final Callable<Integer> METHOD_CALLABLE
+                = new Callable<Integer>() {
+            private final Helper helper = new Helper();
+
+            @Override
+            public Integer call() throws Exception {
+                return helper.method();
+            }
+        };
+
+        private static final Callable<Integer> STATIC_CALLABLE
+                = new Callable<Integer>() {
+            @Override
+            public Integer call() throws Exception {
+                return staticMethod();
+            }
+        };
+
+        private static final Callable<Integer> OSR_CONSTRUCTOR_CALLABLE
+                = new Callable<Integer>() {
+            @Override
+            public Integer call() throws Exception {
+                return new Helper(null, CompilerWhiteBoxTest.BACKEDGE_THRESHOLD).hashCode();
+            }
+        };
+
+        private static final Callable<Integer> OSR_METHOD_CALLABLE
+                = new Callable<Integer>() {
+            private final Helper helper = new Helper();
+
+            @Override
+            public Integer call() throws Exception {
+                return helper.osrMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD);
+            }
+        };
+
+        private static final Callable<Integer> OSR_STATIC_CALLABLE
+                = new Callable<Integer>() {
+            @Override
+            public Integer call() throws Exception {
+                return osrStaticMethod(CompilerWhiteBoxTest.BACKEDGE_THRESHOLD);
+            }
+        };
+
+        private static final Constructor CONSTRUCTOR;
+        private static final Constructor OSR_CONSTRUCTOR;
+        private static final Method METHOD;
+        private static final Method STATIC;
+        private static final Method OSR_METHOD;
+        private static final Method OSR_STATIC;
+
+        static {
+            try {
+                CONSTRUCTOR = Helper.class.getDeclaredConstructor(int.class);
+            } catch (NoSuchMethodException | SecurityException e) {
+                throw new RuntimeException(
+                        "exception on getting method Helper.<init>(int)", e);
+            }
+            try {
+                OSR_CONSTRUCTOR = Helper.class.getDeclaredConstructor(
+                        Object.class, long.class);
+            } catch (NoSuchMethodException | SecurityException e) {
+                throw new RuntimeException(
+                        "exception on getting method Helper.<init>(Object, long)", e);
+            }
+            METHOD = getMethod("method");
+            STATIC = getMethod("staticMethod");
+            OSR_METHOD = getMethod("osrMethod", long.class);
+            OSR_STATIC = getMethod("osrStaticMethod", long.class);
+        }
+
+        private static Method getMethod(String name, Class<?>... parameterTypes) {
+            try {
+                return Helper.class.getDeclaredMethod(name, parameterTypes);
+            } catch (NoSuchMethodException | SecurityException e) {
+                throw new RuntimeException(
+                        "exception on getting method Helper." + name, e);
+            }
+        }
+
+        private static int staticMethod() {
+            return 1138;
+        }
+
+        private int method() {
+            return 42;
+        }
+
+        /**
+         * Deoptimizes all non-osr versions of the given executable after
+         * compilation finished.
+         *
+         * @param e Executable
+         * @throws Exception
+         */
+        private static void waitAndDeoptimize(Executable e) {
+            CompilerWhiteBoxTest.waitBackgroundCompilation(e);
+            if (WhiteBox.getWhiteBox().isMethodQueuedForCompilation(e)) {
+                throw new RuntimeException(e + " must not be in queue");
+            }
+            // Deoptimize non-osr versions of executable
+            WhiteBox.getWhiteBox().deoptimizeMethod(e, false);
+        }
+
+        /**
+         * Executes the method multiple times to make sure we have
+         * enough profiling information before triggering an OSR
+         * compilation. Otherwise the C2 compiler may add uncommon traps.
+         *
+         * @param m Method to be executed
+         * @return Number of times the method was executed
+         * @throws Exception
+         */
+        private static int warmup(Method m) throws Exception {
+            waitAndDeoptimize(m);
+            Helper helper = new Helper();
+            int result = 0;
+            for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) {
+                result += (int)m.invoke(helper, 1);
+            }
+            // Wait to make sure OSR compilation is not blocked by
+            // non-OSR compilation in the compile queue
+            CompilerWhiteBoxTest.waitBackgroundCompilation(m);
+            return result;
+        }
+
+        /**
+         * Executes the constructor multiple times to make sure we
+         * have enough profiling information before triggering an OSR
+         * compilation. Otherwise the C2 compiler may add uncommon traps.
+         *
+         * @param c Constructor to be executed
+         * @return Number of times the constructor was executed
+         * @throws Exception
+         */
+        private static int warmup(Constructor c) throws Exception {
+            waitAndDeoptimize(c);
+            int result = 0;
+            for (long i = 0; i < CompilerWhiteBoxTest.THRESHOLD; ++i) {
+                result += c.newInstance(null, 1).hashCode();
+            }
+            // Wait to make sure OSR compilation is not blocked by
+            // non-OSR compilation in the compile queue
+            CompilerWhiteBoxTest.waitBackgroundCompilation(c);
+            return result;
+        }
+
+        private static int osrStaticMethod(long limit) throws Exception {
+            int result = 0;
+            if (limit != 1) {
+                result = warmup(OSR_STATIC);
+            }
+            // Trigger osr compilation
+            for (long i = 0; i < limit; ++i) {
+                result += staticMethod();
+            }
+            return result;
+        }
+
+        private int osrMethod(long limit) throws Exception {
+            int result = 0;
+            if (limit != 1) {
+                result = warmup(OSR_METHOD);
+            }
+            // Trigger osr compilation
+            for (long i = 0; i < limit; ++i) {
+                result += method();
+            }
+            return result;
+        }
+
+        private final int x;
+
+        // for method and OSR method test case
+        public Helper() {
+            x = 0;
+        }
+
+        // for OSR constructor test case
+        private Helper(Object o, long limit) throws Exception {
+            int result = 0;
+            if (limit != 1) {
+                result = warmup(OSR_CONSTRUCTOR);
+            }
+            // Trigger osr compilation
+            for (long i = 0; i < limit; ++i) {
+                result += method();
+            }
+            x = result;
+        }
+
+        // for constructor test case
+        private Helper(int x) {
+            this.x = x;
+        }
+
+        @Override
+        public int hashCode() {
+            return x;
+        }
+    }
+}
--- a/hotspot/test/testlibrary/jdk/test/lib/Utils.java	Wed Nov 11 23:47:41 2015 +0000
+++ b/hotspot/test/testlibrary/jdk/test/lib/Utils.java	Fri Nov 13 13:31:48 2015 +0100
@@ -44,6 +44,7 @@
 import java.util.Map;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Objects;
 import java.util.Random;
 import java.util.function.BooleanSupplier;
 import java.util.concurrent.TimeUnit;
@@ -82,6 +83,16 @@
      */
     public static final String TEST_SRC = System.getProperty("test.src", ".").trim();
 
+    /*
+     * Returns the value of 'test.jdk' system property
+     */
+    public static final String TEST_JDK = System.getProperty("test.jdk");
+
+    /**
+     * Returns the value of 'test.classes' system property
+     */
+    public static final String TEST_CLASSES = System.getProperty("test.classes", ".");
+
     private static Unsafe unsafe = null;
 
     /**
@@ -616,5 +627,18 @@
         NULL_VALUES.put(float.class, 0.0f);
         NULL_VALUES.put(double.class, 0.0d);
     }
+
+    /**
+     * Returns mandatory property value
+     * @param propName is a name of property to request
+     * @return a String with requested property value
+     */
+    public static String getMandatoryProperty(String propName) {
+        Objects.requireNonNull(propName, "Requested null property");
+        String prop = System.getProperty(propName);
+        Objects.requireNonNull(prop,
+                String.format("A mandatory property '%s' isn't set", propName));
+        return prop;
+    }
 }