Merge jdk8-b22
authorduke
Wed, 05 Jul 2017 18:00:12 +0200
changeset 11475 e8f03541af27
parent 11474 c02378b94bcb (diff)
parent 11389 151eda010ec7 (current diff)
child 11476 baafd82b3670
child 11477 c62b7a0288a4
child 11478 853e2553be68
child 11479 3175942e4e72
child 11494 0720b6b09943
child 11497 e80123e27048
child 11504 1dd82176150b
child 11531 b1f9f8d4806b
child 11539 818d340233a9
child 11542 751387b26353
child 11544 939aca1c5dac
child 11546 11fa98675c2a
child 11548 7ac48efacb1f
child 11552 f03033e7f71f
child 11619 6583f39d8dff
Merge
--- a/.hgtags-top-repo	Wed Jul 05 17:59:32 2017 +0200
+++ b/.hgtags-top-repo	Wed Jul 05 18:00:12 2017 +0200
@@ -142,3 +142,4 @@
 7010bd24cdd07bc7daef80702f39124854dec36c jdk8-b18
 237bc29afbfc6f56a4fe4a6008e2befb59c44bac jdk8-b19
 5a5eaf6374bcbe23530899579fed17a05b7705f3 jdk8-b20
+cc771d92284f71765eca14d6d08703c4af254c04 jdk8-b21
--- a/corba/.hgtags	Wed Jul 05 17:59:32 2017 +0200
+++ b/corba/.hgtags	Wed Jul 05 18:00:12 2017 +0200
@@ -142,3 +142,4 @@
 312cf15d16577ef198b033d2a4cc0a52369b7343 jdk8-b18
 e1366c5d84ef984095a332bcee70b3938232d07d jdk8-b19
 51d8b6cb18c0978ecfa4f33e1537d35ee01b69fa jdk8-b20
+f157fc2a71a38ce44007a6f18d5b011824dce705 jdk8-b21
--- a/hotspot/.hgtags	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/.hgtags	Wed Jul 05 18:00:12 2017 +0200
@@ -209,3 +209,5 @@
 4bcf61041217f8677dcec18e90e9196acc945bba hs23-b09
 9232e0ecbc2cec54dcc8f93004fb00c214446460 jdk8-b19
 fe2c8764998112b7fefcd7d41599714813ae4327 jdk8-b20
+9952d1c439d64c5fd4ad1236a63a62bd5a49d4c3 jdk8-b21
+513351373923f74a7c91755748b95c9771e59f96 hs23-b10
--- a/hotspot/make/bsd/makefiles/adlc.make	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/make/bsd/makefiles/adlc.make	Wed Jul 05 18:00:12 2017 +0200
@@ -39,9 +39,16 @@
 
 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 
 
-SOURCES.AD = \
+ifeq ("${Platform_arch_model}", "${Platform_arch}")
+  SOURCES.AD = \
   $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
   $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+else
+  SOURCES.AD = \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+endif
 
 EXEC	= $(OUTDIR)/adlc
 
--- a/hotspot/make/hotspot_version	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/make/hotspot_version	Wed Jul 05 18:00:12 2017 +0200
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=23
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=09
+HS_BUILD_NUMBER=10
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/hotspot/make/linux/makefiles/adlc.make	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/make/linux/makefiles/adlc.make	Wed Jul 05 18:00:12 2017 +0200
@@ -39,9 +39,16 @@
 
 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 
 
-SOURCES.AD = \
+ifeq ("${Platform_arch_model}", "${Platform_arch}")
+  SOURCES.AD = \
   $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
   $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+else
+  SOURCES.AD = \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+endif
 
 EXEC	= $(OUTDIR)/adlc
 
--- a/hotspot/make/solaris/makefiles/adlc.make	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/make/solaris/makefiles/adlc.make	Wed Jul 05 18:00:12 2017 +0200
@@ -40,9 +40,16 @@
 
 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 
 
-SOURCES.AD = \
+ifeq ("${Platform_arch_model}", "${Platform_arch}")
+  SOURCES.AD = \
   $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
   $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+else
+  SOURCES.AD = \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+endif
 
 EXEC	= $(OUTDIR)/adlc
 
--- a/hotspot/make/windows/makefiles/adlc.make	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/make/windows/makefiles/adlc.make	Wed Jul 05 18:00:12 2017 +0200
@@ -53,6 +53,17 @@
   /I "$(WorkSpace)\src\os\windows\vm" \
   /I "$(WorkSpace)\src\cpu\$(Platform_arch)\vm"
 
+!if "$(Platform_arch_model)" == "$(Platform_arch)"
+SOURCES_AD=\
+  $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \
+  $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
+!else
+SOURCES_AD=\
+  $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \
+  $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch).ad \
+  $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
+!endif
+
 # NOTE! If you add any files here, you must also update GENERATED_NAMES_IN_DIR
 # and ProjectCreatorIDEOptions in projectcreator.make. 
 GENERATED_NAMES=\
@@ -105,7 +116,6 @@
 	$(ADLC) $(ADLCFLAGS) $(Platform_arch_model).ad
 	mv $(GENERATED_NAMES) $(AdlcOutDir)/
 
-$(Platform_arch_model).ad: $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
+$(Platform_arch_model).ad: $(SOURCES_AD)
 	rm -f $(Platform_arch_model).ad
-	cat $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad  \
-	    $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad >$(Platform_arch_model).ad
+	cat $(SOURCES_AD) >$(Platform_arch_model).ad
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -3036,10 +3036,8 @@
                                                    Label* L_failure,
                                                    Label* L_slow_path,
                                         RegisterOrConstant super_check_offset) {
-  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_super_cache_offset_in_bytes());
-  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                    Klass::super_check_offset_offset_in_bytes());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
 
   bool must_load_sco  = (super_check_offset.constant_or_zero() == -1);
   bool need_slow_path = (must_load_sco ||
@@ -3159,10 +3157,8 @@
   assert(label_nulls <= 1, "at most one NULL in the batch");
 
   // a couple of useful fields in sub_klass:
-  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_supers_offset_in_bytes());
-  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_super_cache_offset_in_bytes());
+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 
   // Do a linear scan of the secondary super-klass chain.
   // This code is rarely used, so simplicity is a virtue here.
@@ -3336,7 +3332,7 @@
   cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label);
 
   load_klass(obj_reg, temp_reg);
-  ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
+  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
   or3(G2_thread, temp_reg, temp_reg);
   xor3(mark_reg, temp_reg, temp_reg);
   andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
@@ -3413,7 +3409,7 @@
   // FIXME: due to a lack of registers we currently blow away the age
   // bits in this situation. Should attempt to preserve them.
   load_klass(obj_reg, temp_reg);
-  ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
+  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
   or3(G2_thread, temp_reg, temp_reg);
   casn(mark_addr.base(), mark_reg, temp_reg);
   // If the biasing toward our thread failed, this means that
@@ -3443,7 +3439,7 @@
   // FIXME: due to a lack of registers we currently blow away the age
   // bits in this situation. Should attempt to preserve them.
   load_klass(obj_reg, temp_reg);
-  ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
+  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
   casn(mark_addr.base(), mark_reg, temp_reg);
   // Fall through to the normal CAS-based lock, because no matter what
   // the result of the above CAS, some thread must have succeeded in
--- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -302,7 +302,7 @@
     assert(_obj != noreg, "must be a valid register");
     assert(_oop_index >= 0, "must have oop index");
     __ load_heap_oop(_obj, java_lang_Class::klass_offset_in_bytes(), G3);
-    __ ld_ptr(G3, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc), G3);
+    __ ld_ptr(G3, in_bytes(instanceKlass::init_thread_offset()), G3);
     __ cmp_and_brx_short(G2_thread, G3, Assembler::notEqual, Assembler::pn, call_patch);
 
     // load_klass patches may execute the patched code before it's
@@ -471,7 +471,7 @@
 
   __ load_klass(src_reg, tmp_reg);
 
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
+  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
   __ ld(ref_type_adr, tmp_reg);
 
   // _reference_type field is of type ReferenceType (enum)
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -2202,8 +2202,7 @@
           } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
             __ load_klass(dst, tmp);
           }
-          int lh_offset = klassOopDesc::header_size() * HeapWordSize +
-            Klass::layout_helper_offset_in_bytes();
+          int lh_offset = in_bytes(Klass::layout_helper_offset());
 
           __ lduw(tmp, lh_offset, tmp2);
 
@@ -2238,12 +2237,10 @@
         __ mov(length, len);
         __ load_klass(dst, tmp);
 
-        int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
-                         objArrayKlass::element_klass_offset_in_bytes());
+        int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
         __ ld_ptr(tmp, ek_offset, super_k);
 
-        int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                          Klass::super_check_offset_offset_in_bytes());
+        int sco_offset = in_bytes(Klass::super_check_offset_offset());
         __ lduw(super_k, sco_offset, chk_off);
 
         __ call_VM_leaf(tmp, copyfunc_addr);
@@ -2455,8 +2452,8 @@
          op->obj()->as_register()   == O0 &&
          op->klass()->as_register() == G5, "must be");
   if (op->init_check()) {
-    __ ld(op->klass()->as_register(),
-          instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc),
+    __ ldub(op->klass()->as_register(),
+          in_bytes(instanceKlass::init_state_offset()),
           op->tmp1()->as_register());
     add_debug_info_for_null_check_here(op->stub()->info());
     __ cmp(op->tmp1()->as_register(), instanceKlass::fully_initialized);
@@ -2627,7 +2624,7 @@
   } else {
     bool need_slow_path = true;
     if (k->is_loaded()) {
-      if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
+      if ((int) k->super_check_offset() != in_bytes(Klass::secondary_super_cache_offset()))
         need_slow_path = false;
       // perform the fast part of the checking logic
       __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
@@ -2731,7 +2728,7 @@
     __ load_klass(value, klass_RInfo);
 
     // get instance klass
-    __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)), k_RInfo);
+    __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset()), k_RInfo);
     // perform the fast part of the checking logic
     __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target, failure_target, NULL);
 
--- a/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -181,7 +181,7 @@
 void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
   assert_different_registers(obj, klass, len, t1, t2);
   if (UseBiasedLocking && !len->is_valid()) {
-    ld_ptr(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes(), t1);
+    ld_ptr(klass, in_bytes(Klass::prototype_header_offset()), t1);
   } else {
     set((intx)markOopDesc::prototype(), t1);
   }
@@ -252,7 +252,7 @@
 #ifdef ASSERT
   {
     Label ok;
-    ld(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), t1);
+    ld(klass, in_bytes(Klass::layout_helper_offset()), t1);
     if (var_size_in_bytes != noreg) {
       cmp_and_brx_short(t1, var_size_in_bytes, Assembler::equal, Assembler::pt, ok);
     } else {
--- a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -398,14 +398,14 @@
 
           if (id == fast_new_instance_init_check_id) {
             // make sure the klass is initialized
-            __ ld(G5_klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_t1);
+            __ ldub(G5_klass, in_bytes(instanceKlass::init_state_offset()), G3_t1);
             __ cmp_and_br_short(G3_t1, instanceKlass::fully_initialized, Assembler::notEqual, Assembler::pn, slow_path);
           }
 #ifdef ASSERT
           // assert object can be fast path allocated
           {
             Label ok, not_ok;
-          __ ld(G5_klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), G1_obj_size);
+          __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
           // make sure it's an instance (LH > 0)
           __ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok);
           __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size);
@@ -425,7 +425,7 @@
           __ bind(retry_tlab);
 
           // get the instance size
-          __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size);
+          __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
 
           __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
 
@@ -437,7 +437,7 @@
 
           __ bind(try_eden);
           // get the instance size
-          __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size);
+          __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
           __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
           __ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2);
 
@@ -471,8 +471,7 @@
         Register G4_length = G4; // Incoming
         Register O0_obj   = O0; // Outgoing
 
-        Address klass_lh(G5_klass, ((klassOopDesc::header_size() * HeapWordSize)
-                                    + Klass::layout_helper_offset_in_bytes()));
+        Address klass_lh(G5_klass, Klass::layout_helper_offset());
         assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
         assert(Klass::_lh_header_size_mask == 0xFF, "bytewise");
         // Use this offset to pick out an individual byte of the layout_helper:
@@ -592,7 +591,7 @@
         Label register_finalizer;
         Register t = O1;
         __ load_klass(O0, t);
-        __ ld(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), t);
+        __ ld(t, in_bytes(Klass::access_flags_offset()), t);
         __ set(JVM_ACC_HAS_FINALIZER, G3);
         __ andcc(G3, t, G0);
         __ br(Assembler::notZero, false, Assembler::pt, register_finalizer);
--- a/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -766,7 +766,7 @@
       // get native function entry point(O0 is a good temp until the very end)
        ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc::native_function_offset())), O0);
     // for static methods insert the mirror argument
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 
     __ ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc:: constants_offset())), O1);
     __ ld_ptr(Address(O1, 0, constantPoolOopDesc::pool_holder_offset_in_bytes()), O1);
@@ -1173,7 +1173,7 @@
     __ btst(JVM_ACC_SYNCHRONIZED, O1);
     __ br( Assembler::zero, false, Assembler::pt, done);
 
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ delayed()->btst(JVM_ACC_STATIC, O1);
     __ ld_ptr(XXX_STATE(_locals), O1);
     __ br( Assembler::zero, true, Assembler::pt, got_obj);
--- a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1098,7 +1098,7 @@
   Address G3_amh_argument ( G3_method_handle, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes());
   Address G3_amh_conversion(G3_method_handle, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
 
-  const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+  const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
 
   if (have_entry(ek)) {
     __ nop();  // empty stubs make SG sick
--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Wed Jul 05 18:00:12 2017 +0200
@@ -6773,6 +6773,16 @@
   ins_pipe(empty);
 %}
 
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(0);
+
+  size(0);
+  format %{ "!MEMBAR-storestore (empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
 //----------Register Move Instructions-----------------------------------------
 instruct roundDouble_nop(regD dst) %{
   match(Set dst (RoundDouble dst));
@@ -9273,6 +9283,7 @@
 // (compare 'operand indIndex' and 'instruct addP_reg_reg' above)
 instruct jumpXtnd(iRegX switch_val, o7RegI table) %{
   match(Jump switch_val);
+  effect(TEMP table);
 
   ins_cost(350);
 
@@ -10263,24 +10274,24 @@
 // ============================================================================
 // inlined locking and unlocking
 
-instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
+instruct cmpFastLock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{
   match(Set pcc (FastLock object box));
 
-  effect(KILL scratch, TEMP scratch2);
+  effect(TEMP scratch2, USE_KILL box, KILL scratch);
   ins_cost(100);
 
-  format %{ "FASTLOCK  $object, $box; KILL $scratch, $scratch2, $box" %}
+  format %{ "FASTLOCK  $object,$box\t! kills $box,$scratch,$scratch2" %}
   ins_encode( Fast_Lock(object, box, scratch, scratch2) );
   ins_pipe(long_memory_op);
 %}
 
 
-instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
+instruct cmpFastUnlock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{
   match(Set pcc (FastUnlock object box));
-  effect(KILL scratch, TEMP scratch2);
+  effect(TEMP scratch2, USE_KILL box, KILL scratch);
   ins_cost(100);
 
-  format %{ "FASTUNLOCK  $object, $box; KILL $scratch, $scratch2, $box" %}
+  format %{ "FASTUNLOCK  $object,$box\t! kills $box,$scratch,$scratch2" %}
   ins_encode( Fast_Unlock(object, box, scratch, scratch2) );
   ins_pipe(long_memory_op);
 %}
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -3046,8 +3046,7 @@
     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
     //
 
-    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
-                    Klass::layout_helper_offset_in_bytes();
+    int lh_offset = in_bytes(Klass::layout_helper_offset());
 
     // Load 32-bits signed value. Use br() instruction with it to check icc.
     __ lduw(G3_src_klass, lh_offset, G5_lh);
@@ -3194,15 +3193,13 @@
                                  G4_dst_klass, G3_src_klass);
 
       // Generate the type check.
-      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                        Klass::super_check_offset_offset_in_bytes());
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
       __ lduw(G4_dst_klass, sco_offset, sco_temp);
       generate_type_check(G3_src_klass, sco_temp, G4_dst_klass,
                           O5_temp, L_plain_copy);
 
       // Fetch destination element klass from the objArrayKlass header.
-      int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
-                       objArrayKlass::element_klass_offset_in_bytes());
+      int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
 
       // the checkcast_copy loop needs two extra arguments:
       __ ld_ptr(G4_dst_klass, ek_offset, O4);   // dest elem klass
@@ -3414,6 +3411,9 @@
       generate_throw_exception("WrongMethodTypeException throw_exception",
                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
                                G5_method_type, G3_method_handle);
+
+    // Build this early so it's available for the interpreter.
+    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
   }
 
 
@@ -3427,7 +3427,6 @@
     StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
     StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
     StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
-    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
 
     StubRoutines::_handler_for_unsafe_access_entry =
       generate_handler_for_unsafe_access();
--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -366,7 +366,7 @@
 
   // get synchronization object to O0
   { Label done;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ btst(JVM_ACC_STATIC, O0);
     __ br( Assembler::zero, true, Assembler::pt, done);
     __ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case
@@ -396,7 +396,6 @@
                                                          Register Rscratch,
                                                          Register Rscratch2) {
   const int page_size = os::vm_page_size();
-  Address saved_exception_pc(G2_thread, JavaThread::saved_exception_pc_offset());
   Label after_frame_check;
 
   assert_different_registers(Rframe_size, Rscratch, Rscratch2);
@@ -436,11 +435,19 @@
   // the bottom of the stack
   __ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check);
 
-  // Save the return address as the exception pc
-  __ st_ptr(O7, saved_exception_pc);
+  // the stack will overflow, throw an exception
+
+  // Note that SP is restored to sender's sp (in the delay slot). This
+  // is necessary if the sender's frame is an extended compiled frame
+  // (see gen_c2i_adapter()) and safer anyway in case of JSR292
+  // adaptations.
 
-  // the stack will overflow, throw an exception
-  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+  // Note also that the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  AddressLiteral stub(StubRoutines::throw_StackOverflowError_entry());
+  __ jump_to(stub, Rscratch);
+  __ delayed()->mov(O5_savedSP, SP);
 
   // if you get to here, then there is enough stack space
   __ bind( after_frame_check );
@@ -984,7 +991,7 @@
     // get native function entry point(O0 is a good temp until the very end)
     __ delayed()->ld_ptr(Lmethod, in_bytes(methodOopDesc::native_function_offset()), O0);
     // for static methods insert the mirror argument
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 
     __ ld_ptr(Lmethod, methodOopDesc:: constants_offset(), O1);
     __ ld_ptr(O1, constantPoolOopDesc::pool_holder_offset_in_bytes(), O1);
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -888,7 +888,7 @@
 
   // do fast instanceof cache test
 
-  __ ld_ptr(O4,     sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes(),  O4);
+  __ ld_ptr(O4,     in_bytes(objArrayKlass::element_klass_offset()),  O4);
 
   assert(Otos_i == O0, "just checking");
 
@@ -2031,7 +2031,7 @@
     __ access_local_ptr(G3_scratch, Otos_i);
     __ load_klass(Otos_i, O2);
     __ set(JVM_ACC_HAS_FINALIZER, G3);
-    __ ld(O2, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), O2);
+    __ ld(O2, in_bytes(Klass::access_flags_offset()), O2);
     __ andcc(G3, O2, G0);
     Label skip_register_finalizer;
     __ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer);
@@ -3350,13 +3350,13 @@
   __ ld_ptr(Rscratch, Roffset, RinstanceKlass);
 
   // make sure klass is fully initialized:
-  __ ld(RinstanceKlass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_scratch);
+  __ ldub(RinstanceKlass, in_bytes(instanceKlass::init_state_offset()), G3_scratch);
   __ cmp(G3_scratch, instanceKlass::fully_initialized);
   __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
-  __ delayed()->ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset);
+  __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
 
   // get instance_size in instanceKlass (already aligned)
-  //__ ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset);
+  //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
 
   // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
   __ btst(Klass::_lh_instance_slow_path_bit, Roffset);
@@ -3483,7 +3483,7 @@
   __ bind(initialize_header);
 
   if (UseBiasedLocking) {
-    __ ld_ptr(RinstanceKlass, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), G4_scratch);
+    __ ld_ptr(RinstanceKlass, in_bytes(Klass::prototype_header_offset()), G4_scratch);
   } else {
     __ set((intptr_t)markOopDesc::prototype(), G4_scratch);
   }
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -533,6 +533,19 @@
 
   case 0x0F: // movx..., etc.
     switch (0xFF & *ip++) {
+    case 0x3A: // pcmpestri
+      tail_size = 1;
+    case 0x38: // ptest, pmovzxbw
+      ip++; // skip opcode
+      debug_only(has_disp32 = true); // has both kinds of operands!
+      break;
+
+    case 0x70: // pshufd r, r/a, #8
+      debug_only(has_disp32 = true); // has both kinds of operands!
+    case 0x73: // psrldq r, #8
+      tail_size = 1;
+      break;
+
     case 0x12: // movlps
     case 0x28: // movaps
     case 0x2E: // ucomiss
@@ -543,9 +556,7 @@
     case 0x57: // xorps
     case 0x6E: // movd
     case 0x7E: // movd
-    case 0xAE: // ldmxcsr   a
-      // 64bit side says it these have both operands but that doesn't
-      // appear to be true
+    case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
       debug_only(has_disp32 = true);
       break;
 
@@ -565,6 +576,12 @@
       // fall out of the switch to decode the address
       break;
 
+    case 0xC4: // pinsrw r, a, #8
+      debug_only(has_disp32 = true);
+    case 0xC5: // pextrw r, r, #8
+      tail_size = 1;  // the imm8
+      break;
+
     case 0xAC: // shrd r, a, #8
       debug_only(has_disp32 = true);
       tail_size = 1;  // the imm8
@@ -625,11 +642,44 @@
     tail_size = 1; // the imm8
     break;
 
-  case 0xE8: // call rdisp32
-  case 0xE9: // jmp  rdisp32
-    if (which == end_pc_operand)  return ip + 4;
-    assert(which == call32_operand, "call has no disp32 or imm");
-    return ip;
+  case 0xC4: // VEX_3bytes
+  case 0xC5: // VEX_2bytes
+    assert((UseAVX > 0), "shouldn't have VEX prefix");
+    assert(ip == inst+1, "no prefixes allowed");
+    // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
+    // but they have prefix 0x0F and processed when 0x0F processed above.
+    //
+    // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
+    // instructions (these instructions are not supported in 64-bit mode).
+    // To distinguish them bits [7:6] are set in the VEX second byte since
+    // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
+    // those VEX bits REX and vvvv bits are inverted.
+    //
+    // Fortunately C2 doesn't generate these instructions so we don't need
+    // to check for them in product version.
+
+    // Check second byte
+    NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
+
+    // First byte
+    if ((0xFF & *inst) == VEX_3bytes) {
+      ip++; // third byte
+      is_64bit = ((VEX_W & *ip) == VEX_W);
+    }
+    ip++; // opcode
+    // To find the end of instruction (which == end_pc_operand).
+    switch (0xFF & *ip) {
+    case 0x61: // pcmpestri r, r/a, #8
+    case 0x70: // pshufd r, r/a, #8
+    case 0x73: // psrldq r, #8
+      tail_size = 1;  // the imm8
+      break;
+    default:
+      break;
+    }
+    ip++; // skip opcode
+    debug_only(has_disp32 = true); // has both kinds of operands!
+    break;
 
   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
@@ -643,6 +693,12 @@
     debug_only(has_disp32 = true);
     break;
 
+  case 0xE8: // call rdisp32
+  case 0xE9: // jmp  rdisp32
+    if (which == end_pc_operand)  return ip + 4;
+    assert(which == call32_operand, "call has no disp32 or imm");
+    return ip;
+
   case 0xF0:                    // Lock
     assert(os::is_MP(), "only on MP");
     goto again_after_prefix;
@@ -918,9 +974,7 @@
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x58);
   emit_byte(0xC0 | encode);
 }
@@ -928,18 +982,14 @@
 void Assembler::addsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x58);
   emit_operand(dst, src);
 }
 
 void Assembler::addss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x58);
   emit_byte(0xC0 | encode);
 }
@@ -947,13 +997,19 @@
 void Assembler::addss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x58);
   emit_operand(dst, src);
 }
 
+void Assembler::andl(Address dst, int32_t imm32) {
+  InstructionMark im(this);
+  prefix(dst);
+  emit_byte(0x81);
+  emit_operand(rsp, dst, 4);
+  emit_long(imm32);
+}
+
 void Assembler::andl(Register dst, int32_t imm32) {
   prefix(dst);
   emit_arith(0x81, 0xE0, dst, imm32);
@@ -974,13 +1030,33 @@
 void Assembler::andpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
   emit_byte(0x54);
   emit_operand(dst, src);
 }
 
+void Assembler::andpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x54);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::andps(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
+  emit_byte(0x54);
+  emit_operand(dst, src);
+}
+
+void Assembler::andps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
+  emit_byte(0x54);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::bsfl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   emit_byte(0x0F);
@@ -1025,19 +1101,7 @@
 }
 
 void Assembler::call(Register dst) {
-  // This was originally using a 32bit register encoding
-  // and surely we want 64bit!
-  // this is a 32bit encoding but in 64bit mode the default
-  // operand size is 64bit so there is no need for the
-  // wide prefix. So prefix only happens if we use the
-  // new registers. Much like push/pop.
-  int x = offset();
-  // this may be true but dbx disassembles it as if it
-  // were 32bits...
-  // int encode = prefix_and_encode(dst->encoding());
-  // if (offset() != x) assert(dst->encoding() >= 8, "what?");
-  int encode = prefixq_and_encode(dst->encoding());
-
+  int encode = prefix_and_encode(dst->encoding());
   emit_byte(0xFF);
   emit_byte(0xD0 | encode);
 }
@@ -1157,87 +1221,119 @@
   // NOTE: dbx seems to decode this as comiss even though the
   // 0x66 is there. Strangly ucomisd comes out correct
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  comiss(dst, src);
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66);
+  emit_byte(0x2F);
+  emit_operand(dst, src);
+}
+
+void Assembler::comisd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
+  emit_byte(0x2F);
+  emit_byte(0xC0 | encode);
 }
 
 void Assembler::comiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-
-  InstructionMark im(this);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_NONE);
   emit_byte(0x2F);
   emit_operand(dst, src);
 }
 
+void Assembler::comiss(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
+  emit_byte(0x2F);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
   emit_byte(0xE6);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
   emit_byte(0x5B);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x5A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
+  emit_byte(0x5A);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x2A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
+  emit_byte(0x2A);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x2A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
+  emit_byte(0x2A);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x5A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtss2sd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
+  emit_byte(0x5A);
+  emit_operand(dst, src);
+}
+
+
 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
   emit_byte(0x2C);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
   emit_byte(0x2C);
   emit_byte(0xC0 | encode);
 }
@@ -1253,18 +1349,14 @@
 void Assembler::divsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x5E);
   emit_operand(dst, src);
 }
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x5E);
   emit_byte(0xC0 | encode);
 }
@@ -1272,18 +1364,14 @@
 void Assembler::divss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x5E);
   emit_operand(dst, src);
 }
 
 void Assembler::divss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x5E);
   emit_byte(0xC0 | encode);
 }
@@ -1377,8 +1465,14 @@
   if (L.is_bound()) {
     const int short_size = 2;
     address entry = target(L);
-    assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
-           "Dispacement too large for a short jmp");
+#ifdef ASSERT
+    intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
+    intptr_t delta = short_branch_delta();
+    if (delta != 0) {
+      dist += (dist < 0 ? (-delta) :delta);
+    }
+    assert(is8bit(dist), "Dispacement too large for a short jmp");
+#endif
     intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
     // 0111 tttn #8-bit disp
     emit_byte(0x70 | cc);
@@ -1444,9 +1538,15 @@
   if (L.is_bound()) {
     const int short_size = 2;
     address entry = target(L);
-    assert(is8bit((entry - _code_pos) + short_size),
-           "Dispacement too large for a short jmp");
     assert(entry != NULL, "jmp most probably wrong");
+#ifdef ASSERT
+    intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
+    intptr_t delta = short_branch_delta();
+    if (delta != 0) {
+      dist += (dist < 0 ? (-delta) :delta);
+    }
+    assert(is8bit(dist), "Dispacement too large for a short jmp");
+#endif
     intptr_t offs = entry - _code_pos;
     emit_byte(0xEB);
     emit_byte((offs - short_size) & 0xFF);
@@ -1509,49 +1609,16 @@
 
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int dstenc = dst->encoding();
-  int srcenc = src->encoding();
-  emit_byte(0x66);
-  if (dstenc < 8) {
-    if (srcenc >= 8) {
-      prefix(REX_B);
-      srcenc -= 8;
-    }
-  } else {
-    if (srcenc < 8) {
-      prefix(REX_R);
-    } else {
-      prefix(REX_RB);
-      srcenc -= 8;
-    }
-    dstenc -= 8;
-  }
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
   emit_byte(0x28);
-  emit_byte(0xC0 | dstenc << 3 | srcenc);
+  emit_byte(0xC0 | encode);
 }
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int dstenc = dst->encoding();
-  int srcenc = src->encoding();
-  if (dstenc < 8) {
-    if (srcenc >= 8) {
-      prefix(REX_B);
-      srcenc -= 8;
-    }
-  } else {
-    if (srcenc < 8) {
-      prefix(REX_R);
-    } else {
-      prefix(REX_RB);
-      srcenc -= 8;
-    }
-    dstenc -= 8;
-  }
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
   emit_byte(0x28);
-  emit_byte(0xC0 | dstenc << 3 | srcenc);
+  emit_byte(0xC0 | encode);
 }
 
 void Assembler::movb(Register dst, Address src) {
@@ -1582,19 +1649,15 @@
 
 void Assembler::movdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
   emit_byte(0x6E);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::movdl(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
   // swap src/dst to get correct prefix
-  int encode = prefix_and_encode(src->encoding(), dst->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
   emit_byte(0x7E);
   emit_byte(0xC0 | encode);
 }
@@ -1602,58 +1665,29 @@
 void Assembler::movdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_66);
   emit_byte(0x6E);
   emit_operand(dst, src);
 }
 
-
-void Assembler::movdqa(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x6F);
-  emit_operand(dst, src);
-}
-
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
   emit_byte(0x6F);
   emit_byte(0xC0 | encode);
 }
 
-void Assembler::movdqa(Address dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(dst, src);
-  emit_byte(0x0F);
-  emit_byte(0x7F);
-  emit_operand(src, dst);
-}
-
 void Assembler::movdqu(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F3);
   emit_byte(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF3);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
   emit_byte(0x6F);
   emit_byte(0xC0 | encode);
 }
@@ -1661,9 +1695,7 @@
 void Assembler::movdqu(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(dst, src);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F3);
   emit_byte(0x7F);
   emit_operand(src, dst);
 }
@@ -1710,9 +1742,7 @@
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
   emit_byte(0x12);
   emit_operand(dst, src);
 }
@@ -1740,9 +1770,7 @@
 void Assembler::movq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F3);
   emit_byte(0x7E);
   emit_operand(dst, src);
 }
@@ -1750,9 +1778,7 @@
 void Assembler::movq(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(dst, src);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_66);
   emit_byte(0xD6);
   emit_operand(src, dst);
 }
@@ -1775,9 +1801,7 @@
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x10);
   emit_byte(0xC0 | encode);
 }
@@ -1785,9 +1809,7 @@
 void Assembler::movsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F2);
   emit_byte(0x10);
   emit_operand(dst, src);
 }
@@ -1795,18 +1817,14 @@
 void Assembler::movsd(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(dst, src);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F2);
   emit_byte(0x11);
   emit_operand(src, dst);
 }
 
 void Assembler::movss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x10);
   emit_byte(0xC0 | encode);
 }
@@ -1814,9 +1832,7 @@
 void Assembler::movss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F3);
   emit_byte(0x10);
   emit_operand(dst, src);
 }
@@ -1824,9 +1840,7 @@
 void Assembler::movss(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(dst, src);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F3);
   emit_byte(0x11);
   emit_operand(src, dst);
 }
@@ -1919,18 +1933,14 @@
 void Assembler::mulsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x59);
   emit_operand(dst, src);
 }
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x59);
   emit_byte(0xC0 | encode);
 }
@@ -1938,18 +1948,14 @@
 void Assembler::mulss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x59);
   emit_operand(dst, src);
 }
 
 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x59);
   emit_byte(0xC0 | encode);
 }
@@ -2237,14 +2243,26 @@
   emit_arith(0x0B, 0xC0, dst, src);
 }
 
+void Assembler::packuswb(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x67);
+  emit_operand(dst, src);
+}
+
+void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x67);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x3A);
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
   emit_byte(0x61);
   emit_operand(dst, src);
   emit_byte(imm8);
@@ -2252,16 +2270,27 @@
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-
-  emit_byte(0x66);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0x3A);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
   emit_byte(0x61);
   emit_byte(0xC0 | encode);
   emit_byte(imm8);
 }
 
+void Assembler::pmovzxbw(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_sse4_1(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0x30);
+  emit_operand(dst, src);
+}
+
+void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sse4_1(), "");
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0x30);
+  emit_byte(0xC0 | encode);
+}
+
 // generic
 void Assembler::pop(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
@@ -2360,22 +2389,24 @@
 
 void Assembler::por(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  emit_byte(0x66);
-  int  encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
   emit_byte(0xEB);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::por(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0xEB);
+  emit_operand(dst, src);
+}
+
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  emit_byte(0x66);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
   emit_byte(0x70);
   emit_byte(0xC0 | encode);
   emit_byte(mode & 0xFF);
@@ -2385,11 +2416,9 @@
 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66);
   emit_byte(0x70);
   emit_operand(dst, src);
   emit_byte(mode & 0xFF);
@@ -2398,10 +2427,7 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
   emit_byte(0x70);
   emit_byte(0xC0 | encode);
   emit_byte(mode & 0xFF);
@@ -2410,11 +2436,9 @@
 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst); // QQ new
-  emit_byte(0x0F);
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_F2);
   emit_byte(0x70);
   emit_operand(dst, src);
   emit_byte(mode & 0xFF);
@@ -2425,11 +2449,8 @@
   // HMM Table D-1 says sse2 or mmx.
   // Do not confuse it with psrldq SSE2 instruction which
   // shifts 128 bit value in xmm register by number of bytes.
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-
-  int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
-  emit_byte(0x66);
-  emit_byte(0x0F);
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
   emit_byte(0x73);
   emit_byte(0xC0 | encode);
   emit_byte(shift);
@@ -2438,10 +2459,7 @@
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
-  emit_byte(0x66);
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
   emit_byte(0x73);
   emit_byte(0xC0 | encode);
   emit_byte(shift);
@@ -2449,36 +2467,52 @@
 
 void Assembler::ptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
-
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x38);
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-
-  emit_byte(0x66);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0x38);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x17);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::punpcklbw(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x60);
+  emit_operand(dst, src);
+}
+
 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
   emit_byte(0x60);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::punpckldq(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x62);
+  emit_operand(dst, src);
+}
+
+void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x62);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::push(int32_t imm32) {
   // in 64bits we push 64bits onto the stack but only
   // take a 32bit immediate
@@ -2508,20 +2542,16 @@
 
 void Assembler::pxor(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
   emit_byte(0xEF);
   emit_operand(dst, src);
 }
 
 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  emit_byte(0x66);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
   emit_byte(0xEF);
   emit_byte(0xC0 | encode);
 }
@@ -2683,12 +2713,8 @@
 }
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
-  // HMM Table D-1 says sse2
-  // NOT_LP64(assert(VM_Version::supports_sse(), ""));
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x51);
   emit_byte(0xC0 | encode);
 }
@@ -2696,30 +2722,22 @@
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x51);
   emit_operand(dst, src);
 }
 
 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
-  // HMM Table D-1 says sse2
-  // NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x51);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x51);
   emit_operand(dst, src);
 }
@@ -2765,9 +2783,7 @@
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x5C);
   emit_byte(0xC0 | encode);
 }
@@ -2775,18 +2791,14 @@
 void Assembler::subsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x5C);
   emit_operand(dst, src);
 }
 
 void Assembler::subss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x5C);
   emit_byte(0xC0 | encode);
 }
@@ -2794,9 +2806,7 @@
 void Assembler::subss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x5C);
   emit_operand(dst, src);
 }
@@ -2836,30 +2846,30 @@
 
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  ucomiss(dst, src);
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66);
+  emit_byte(0x2E);
+  emit_operand(dst, src);
 }
 
 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  ucomiss(dst, src);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
+  emit_byte(0x2E);
+  emit_byte(0xC0 | encode);
 }
 
 void Assembler::ucomiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-
-  InstructionMark im(this);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_NONE);
   emit_byte(0x2E);
   emit_operand(dst, src);
 }
 
 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
   emit_byte(0x2E);
   emit_byte(0xC0 | encode);
 }
@@ -2905,16 +2915,15 @@
 
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  xorps(dst, src);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x57);
+  emit_byte(0xC0 | encode);
 }
 
 void Assembler::xorpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
   emit_byte(0x57);
   emit_operand(dst, src);
 }
@@ -2922,8 +2931,7 @@
 
 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
   emit_byte(0x57);
   emit_byte(0xC0 | encode);
 }
@@ -2931,12 +2939,166 @@
 void Assembler::xorps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
   emit_byte(0x57);
   emit_operand(dst, src);
 }
 
+// AVX 3-operands non destructive source instructions (encoded with VEX prefix)
+
+void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x58);
+  emit_operand(dst, src);
+}
+
+void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x58);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x58);
+  emit_operand(dst, src);
+}
+
+void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x58);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
+  emit_byte(0x54);
+  emit_operand(dst, src);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
+  emit_byte(0x54);
+  emit_operand(dst, src);
+}
+
+void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x5E);
+  emit_operand(dst, src);
+}
+
+void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x5E);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x5E);
+  emit_operand(dst, src);
+}
+
+void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x5E);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x59);
+  emit_operand(dst, src);
+}
+
+void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x59);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x59);
+  emit_operand(dst, src);
+}
+
+void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x59);
+  emit_byte(0xC0 | encode);
+}
+
+
+void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x5C);
+  emit_operand(dst, src);
+}
+
+void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x5C);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x5C);
+  emit_operand(dst, src);
+}
+
+void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x5C);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
+  emit_byte(0x57);
+  emit_operand(dst, src);
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
+  emit_byte(0x57);
+  emit_operand(dst, src);
+}
+
+
 #ifndef _LP64
 // 32bit only pieces of the assembler
 
@@ -3394,12 +3556,114 @@
   emit_byte(0xF1);
 }
 
+// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
+static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
+// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
+static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
+
+// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
+void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
+  if (pre > 0) {
+    emit_byte(simd_pre[pre]);
+  }
+  if (rex_w) {
+    prefixq(adr, xreg);
+  } else {
+    prefix(adr, xreg);
+  }
+  if (opc > 0) {
+    emit_byte(0x0F);
+    int opc2 = simd_opc[opc];
+    if (opc2 > 0) {
+      emit_byte(opc2);
+    }
+  }
+}
+
+int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
+  if (pre > 0) {
+    emit_byte(simd_pre[pre]);
+  }
+  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
+                          prefix_and_encode(dst_enc, src_enc);
+  if (opc > 0) {
+    emit_byte(0x0F);
+    int opc2 = simd_opc[opc];
+    if (opc2 > 0) {
+      emit_byte(opc2);
+    }
+  }
+  return encode;
+}
+
+
+void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
+  if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
+    prefix(VEX_3bytes);
+
+    int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
+    byte1 = (~byte1) & 0xE0;
+    byte1 |= opc;
+    a_byte(byte1);
+
+    int byte2 = ((~nds_enc) & 0xf) << 3;
+    byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
+    emit_byte(byte2);
+  } else {
+    prefix(VEX_2bytes);
+
+    int byte1 = vex_r ? VEX_R : 0;
+    byte1 = (~byte1) & 0x80;
+    byte1 |= ((~nds_enc) & 0xf) << 3;
+    byte1 |= (vector256 ? 4 : 0) | pre;
+    emit_byte(byte1);
+  }
+}
+
+void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
+  bool vex_r = (xreg_enc >= 8);
+  bool vex_b = adr.base_needs_rex();
+  bool vex_x = adr.index_needs_rex();
+  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
+}
+
+int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
+  bool vex_r = (dst_enc >= 8);
+  bool vex_b = (src_enc >= 8);
+  bool vex_x = false;
+  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
+  return (((dst_enc & 7) << 3) | (src_enc & 7));
+}
+
+
+void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
+  if (UseAVX > 0) {
+    int xreg_enc = xreg->encoding();
+    int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
+  } else {
+    assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
+    rex_prefix(adr, xreg, pre, opc, rex_w);
+  }
+}
+
+int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if (UseAVX > 0) {
+    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
+  } else {
+    assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
+    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
+  }
+}
 
 #ifndef _LP64
 
 void Assembler::incl(Register dst) {
   // Don't use it directly. Use MacroAssembler::incrementl() instead.
- emit_byte(0x40 | dst->encoding());
+  emit_byte(0x40 | dst->encoding());
 }
 
 void Assembler::lea(Register dst, Address src) {
@@ -3756,6 +4020,38 @@
   }
 }
 
+void Assembler::prefixq(Address adr, XMMRegister src) {
+  if (src->encoding() < 8) {
+    if (adr.base_needs_rex()) {
+      if (adr.index_needs_rex()) {
+        prefix(REX_WXB);
+      } else {
+        prefix(REX_WB);
+      }
+    } else {
+      if (adr.index_needs_rex()) {
+        prefix(REX_WX);
+      } else {
+        prefix(REX_W);
+      }
+    }
+  } else {
+    if (adr.base_needs_rex()) {
+      if (adr.index_needs_rex()) {
+        prefix(REX_WRXB);
+      } else {
+        prefix(REX_WRB);
+      }
+    } else {
+      if (adr.index_needs_rex()) {
+        prefix(REX_WRX);
+      } else {
+        prefix(REX_WR);
+      }
+    }
+  }
+}
+
 void Assembler::adcq(Register dst, int32_t imm32) {
   (void) prefixq_and_encode(dst->encoding());
   emit_arith(0x81, 0xD0, dst, imm32);
@@ -3918,36 +4214,44 @@
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x2A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
+  emit_byte(0x2A);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x2A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  InstructionMark im(this);
+  simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
+  emit_byte(0x2A);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
   emit_byte(0x2C);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
   emit_byte(0x2C);
   emit_byte(0xC0 | encode);
 }
@@ -4107,21 +4411,17 @@
 
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
-  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
-  emit_byte(0x66);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
   emit_byte(0x6E);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::movdq(Register dst, XMMRegister src) {
   // table D-1 says MMX/SSE2
-  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
-  emit_byte(0x66);
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // swap src/dst to get correct prefix
-  int encode = prefixq_and_encode(src->encoding(), dst->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
   emit_byte(0x7E);
   emit_byte(0xC0 | encode);
 }
@@ -4632,7 +4932,7 @@
     null_check_offset = offset();
   }
   movl(tmp_reg, klass_addr);
-  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
   andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
   if (need_tmp_reg) {
     pop(tmp_reg);
@@ -4719,7 +5019,7 @@
   }
   get_thread(tmp_reg);
   movl(swap_reg, klass_addr);
-  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
   movl(swap_reg, saved_mark_addr);
   if (os::is_MP()) {
     lock();
@@ -4757,7 +5057,7 @@
     push(tmp_reg);
   }
   movl(tmp_reg, klass_addr);
-  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
   if (os::is_MP()) {
     lock();
   }
@@ -5680,6 +5980,24 @@
   LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
 }
 
+void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::addsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::addsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    addss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    addss(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::align(int modulus) {
   if (offset() % modulus != 0) {
     nop(modulus - (offset() % modulus));
@@ -5687,11 +6005,24 @@
 }
 
 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-masking with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
   if (reachable(src)) {
-    andpd(dst, as_Address(src));
+    Assembler::andpd(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    andpd(dst, Address(rscratch1, 0));
+    Assembler::andpd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-masking with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::andps(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::andps(dst, Address(rscratch1, 0));
   }
 }
 
@@ -6270,19 +6601,19 @@
 
 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
-    comisd(dst, as_Address(src));
+    Assembler::comisd(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    comisd(dst, Address(rscratch1, 0));
+    Assembler::comisd(dst, Address(rscratch1, 0));
   }
 }
 
 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
-    comiss(dst, as_Address(src));
+    Assembler::comiss(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    comiss(dst, Address(rscratch1, 0));
+    Assembler::comiss(dst, Address(rscratch1, 0));
   }
 }
 
@@ -6366,6 +6697,24 @@
   sarl(reg, shift_value);
 }
 
+void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::divsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::divsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::divss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::divss(dst, Address(rscratch1, 0));
+  }
+}
+
 // !defined(COMPILER2) is because of stupid core builds
 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
 void MacroAssembler::empty_FPU_stack() {
@@ -6805,12 +7154,39 @@
   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
 }
 
+void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::movsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::movsd(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
-    movss(dst, as_Address(src));
+    Assembler::movss(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    movss(dst, Address(rscratch1, 0));
+    Assembler::movss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::mulsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::mulsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::mulss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::mulss(dst, Address(rscratch1, 0));
   }
 }
 
@@ -6992,6 +7368,193 @@
   testl(dst, as_Address(src));
 }
 
+void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::sqrtsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::sqrtsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::sqrtss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::sqrtss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::subsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::subsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::subss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::subss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::ucomisd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::ucomisd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::ucomiss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::ucomiss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-bit flipping with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::xorpd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::xorpd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-bit flipping with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::xorps(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::xorps(dst, Address(rscratch1, 0));
+  }
+}
+
+// AVX 3-operands instructions
+
+void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vaddsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vaddsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vaddss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vaddss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vandpd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vandpd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vandps(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vandps(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vdivsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vdivsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vdivss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vdivss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vmulsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vmulsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vmulss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vmulss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vsubsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vsubsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vsubss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vsubss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vxorpd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vxorpd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vxorps(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vxorps(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+
 //////////////////////////////////////////////////////////////////////////////////
 #ifndef SERIALGC
 
@@ -7430,6 +7993,16 @@
                                           Register var_size_in_bytes,
                                           int con_size_in_bytes,
                                           Register t1) {
+  if (!thread->is_valid()) {
+#ifdef _LP64
+    thread = r15_thread;
+#else
+    assert(t1->is_valid(), "need temp reg");
+    thread = t1;
+    get_thread(thread);
+#endif
+  }
+
 #ifdef _LP64
   if (var_size_in_bytes->is_valid()) {
     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
@@ -7437,12 +8010,6 @@
     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
   }
 #else
-  if (!thread->is_valid()) {
-    assert(t1->is_valid(), "need temp reg");
-    thread = t1;
-    get_thread(thread);
-  }
-
   if (var_size_in_bytes->is_valid()) {
     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
   } else {
@@ -7685,10 +8252,8 @@
   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
   assert(label_nulls <= 1, "at most one NULL in the batch");
 
-  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_super_cache_offset_in_bytes());
-  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                    Klass::super_check_offset_offset_in_bytes());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
   Address super_check_offset_addr(super_klass, sco_offset);
 
   // Hacked jcc, which "knows" that L_fallthrough, at least, is in
@@ -7786,10 +8351,8 @@
   assert(label_nulls <= 1, "at most one NULL in the batch");
 
   // a couple of useful fields in sub_klass:
-  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_supers_offset_in_bytes());
-  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_super_cache_offset_in_bytes());
+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
   Address secondary_supers_addr(sub_klass, ss_offset);
   Address super_cache_addr(     sub_klass, sc_offset);
 
@@ -7876,32 +8439,6 @@
 }
 
 
-void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
-  ucomisd(dst, as_Address(src));
-}
-
-void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
-  ucomiss(dst, as_Address(src));
-}
-
-void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    xorpd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    xorpd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    xorps(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    xorps(dst, Address(rscratch1, 0));
-  }
-}
-
 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
   if (VM_Version::supports_cmov()) {
     cmovl(cc, dst, src);
@@ -8487,20 +9024,20 @@
     if (Universe::narrow_oop_shift() != 0) {
       assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
       if (LogMinObjAlignmentInBytes == Address::times_8) {
-        movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+        movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
       } else {
         // OK to use shift since we don't need to preserve flags.
         shlq(dst, LogMinObjAlignmentInBytes);
-        movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+        movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset()));
       }
     } else {
-      movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+      movq(dst, Address(dst, Klass::prototype_header_offset()));
     }
   } else
 #endif
   {
     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    movptr(dst, Address(dst, Klass::prototype_header_offset()));
   }
 }
 
@@ -8761,6 +9298,7 @@
                                       Register cnt1, Register cnt2,
                                       int int_cnt2,  Register result,
                                       XMMRegister vec, Register tmp) {
+  ShortBranchVerifier sbv(this);
   assert(UseSSE42Intrinsics, "SSE4.2 is required");
 
   // This method uses pcmpestri inxtruction with bound registers
@@ -8890,9 +9428,9 @@
       pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
     }
     // Need to reload strings pointers if not matched whole vector
-    jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
     addptr(cnt2, 8);
-    jccb(Assembler::negative, SCAN_SUBSTR);
+    jcc(Assembler::negative, SCAN_SUBSTR);
     // Fall through if found full substring
 
   } // (int_cnt2 > 8)
@@ -8911,6 +9449,7 @@
                                     Register cnt1, Register cnt2,
                                     int int_cnt2,  Register result,
                                     XMMRegister vec, Register tmp) {
+  ShortBranchVerifier sbv(this);
   assert(UseSSE42Intrinsics, "SSE4.2 is required");
   //
   // int_cnt2 is length of small (< 8 chars) constant substring
@@ -9172,6 +9711,7 @@
 void MacroAssembler::string_compare(Register str1, Register str2,
                                     Register cnt1, Register cnt2, Register result,
                                     XMMRegister vec1) {
+  ShortBranchVerifier sbv(this);
   Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
 
   // Compute the minimum of the string lengths and the
@@ -9308,6 +9848,7 @@
 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
                                         Register limit, Register result, Register chr,
                                         XMMRegister vec1, XMMRegister vec2) {
+  ShortBranchVerifier sbv(this);
   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
 
   int length_offset  = arrayOopDesc::length_offset_in_bytes();
@@ -9427,6 +9968,7 @@
 void MacroAssembler::generate_fill(BasicType t, bool aligned,
                                    Register to, Register value, Register count,
                                    Register rtmp, XMMRegister xtmp) {
+  ShortBranchVerifier sbv(this);
   assert_different_registers(to, value, count, rtmp);
   Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
   Label L_fill_2_bytes, L_fill_4_bytes;
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -503,7 +503,31 @@
     REX_WR     = 0x4C,
     REX_WRB    = 0x4D,
     REX_WRX    = 0x4E,
-    REX_WRXB   = 0x4F
+    REX_WRXB   = 0x4F,
+
+    VEX_3bytes = 0xC4,
+    VEX_2bytes = 0xC5
+  };
+
+  enum VexPrefix {
+    VEX_B = 0x20,
+    VEX_X = 0x40,
+    VEX_R = 0x80,
+    VEX_W = 0x80
+  };
+
+  enum VexSimdPrefix {
+    VEX_SIMD_NONE = 0x0,
+    VEX_SIMD_66   = 0x1,
+    VEX_SIMD_F3   = 0x2,
+    VEX_SIMD_F2   = 0x3
+  };
+
+  enum VexOpcode {
+    VEX_OPCODE_NONE  = 0x0,
+    VEX_OPCODE_0F    = 0x1,
+    VEX_OPCODE_0F_38 = 0x2,
+    VEX_OPCODE_0F_3A = 0x3
   };
 
   enum WhichOperand {
@@ -546,12 +570,99 @@
   void prefixq(Address adr);
 
   void prefix(Address adr, Register reg,  bool byteinst = false);
+  void prefix(Address adr, XMMRegister reg);
   void prefixq(Address adr, Register reg);
-
-  void prefix(Address adr, XMMRegister reg);
+  void prefixq(Address adr, XMMRegister reg);
 
   void prefetch_prefix(Address src);
 
+  void rex_prefix(Address adr, XMMRegister xreg,
+                  VexSimdPrefix pre, VexOpcode opc, bool rex_w);
+  int  rex_prefix_and_encode(int dst_enc, int src_enc,
+                             VexSimdPrefix pre, VexOpcode opc, bool rex_w);
+
+  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
+                  int nds_enc, VexSimdPrefix pre, VexOpcode opc,
+                  bool vector256);
+
+  void vex_prefix(Address adr, int nds_enc, int xreg_enc,
+                  VexSimdPrefix pre, VexOpcode opc,
+                  bool vex_w, bool vector256);
+
+  void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
+                  VexSimdPrefix pre, bool vector256 = false) {
+     vex_prefix(src, nds->encoding(), dst->encoding(),
+                pre, VEX_OPCODE_0F, false, vector256);
+  }
+
+  int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
+                             VexSimdPrefix pre, VexOpcode opc,
+                             bool vex_w, bool vector256);
+
+  int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
+                             VexSimdPrefix pre, bool vector256 = false) {
+     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
+                                  pre, VEX_OPCODE_0F, false, vector256);
+  }
+
+  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
+                   VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
+                   bool rex_w = false, bool vector256 = false);
+
+  void simd_prefix(XMMRegister dst, Address src,
+                   VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+    simd_prefix(dst, xnoreg, src, pre, opc);
+  }
+  void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
+    simd_prefix(src, dst, pre);
+  }
+  void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
+                     VexSimdPrefix pre) {
+    bool rex_w = true;
+    simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
+  }
+
+
+  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
+                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
+                             bool rex_w = false, bool vector256 = false);
+
+  int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
+                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+    return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
+  }
+
+  // Move/convert 32-bit integer value.
+  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
+                             VexSimdPrefix pre) {
+    // It is OK to cast from Register to XMMRegister to pass argument here
+    // since only encoding is used in simd_prefix_and_encode() and number of
+    // Gen and Xmm registers are the same.
+    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
+  }
+  int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
+    return simd_prefix_and_encode(dst, xnoreg, src, pre);
+  }
+  int simd_prefix_and_encode(Register dst, XMMRegister src,
+                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
+  }
+
+  // Move/convert 64-bit integer value.
+  int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
+                               VexSimdPrefix pre) {
+    bool rex_w = true;
+    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
+  }
+  int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
+    return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
+  }
+  int simd_prefix_and_encode_q(Register dst, XMMRegister src,
+                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+    bool rex_w = true;
+    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
+  }
+
   // Helper functions for groups of instructions
   void emit_arith_b(int op1, int op2, Register dst, int imm8);
 
@@ -764,6 +875,7 @@
   void addss(XMMRegister dst, Address src);
   void addss(XMMRegister dst, XMMRegister src);
 
+  void andl(Address  dst, int32_t imm32);
   void andl(Register dst, int32_t imm32);
   void andl(Register dst, Address src);
   void andl(Register dst, Register src);
@@ -774,9 +886,11 @@
   void andq(Register dst, Register src);
 
   // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
-  void andpd(XMMRegister dst, Address src);
   void andpd(XMMRegister dst, XMMRegister src);
 
+  // Bitwise Logical AND of Packed Single-Precision Floating-Point Values
+  void andps(XMMRegister dst, XMMRegister src);
+
   void bsfl(Register dst, Register src);
   void bsrl(Register dst, Register src);
 
@@ -837,9 +951,11 @@
 
   // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
   void comisd(XMMRegister dst, Address src);
+  void comisd(XMMRegister dst, XMMRegister src);
 
   // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
   void comiss(XMMRegister dst, Address src);
+  void comiss(XMMRegister dst, XMMRegister src);
 
   // Identify processor type and features
   void cpuid() {
@@ -849,14 +965,19 @@
 
   // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
   void cvtsd2ss(XMMRegister dst, XMMRegister src);
+  void cvtsd2ss(XMMRegister dst, Address src);
 
   // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
   void cvtsi2sdl(XMMRegister dst, Register src);
+  void cvtsi2sdl(XMMRegister dst, Address src);
   void cvtsi2sdq(XMMRegister dst, Register src);
+  void cvtsi2sdq(XMMRegister dst, Address src);
 
   // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
   void cvtsi2ssl(XMMRegister dst, Register src);
+  void cvtsi2ssl(XMMRegister dst, Address src);
   void cvtsi2ssq(XMMRegister dst, Register src);
+  void cvtsi2ssq(XMMRegister dst, Address src);
 
   // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
   void cvtdq2pd(XMMRegister dst, XMMRegister src);
@@ -866,6 +987,7 @@
 
   // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
   void cvtss2sd(XMMRegister dst, XMMRegister src);
+  void cvtss2sd(XMMRegister dst, Address src);
 
   // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
   void cvttsd2sil(Register dst, Address src);
@@ -1140,8 +1262,6 @@
   void movdq(Register dst, XMMRegister src);
 
   // Move Aligned Double Quadword
-  void movdqa(Address     dst, XMMRegister src);
-  void movdqa(XMMRegister dst, Address src);
   void movdqa(XMMRegister dst, XMMRegister src);
 
   // Move Unaligned Double Quadword
@@ -1261,10 +1381,18 @@
   void orq(Register dst, Address src);
   void orq(Register dst, Register src);
 
+  // Pack with unsigned saturation
+  void packuswb(XMMRegister dst, XMMRegister src);
+  void packuswb(XMMRegister dst, Address src);
+
   // SSE4.2 string instructions
   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
 
+  // SSE4.1 packed move
+  void pmovzxbw(XMMRegister dst, XMMRegister src);
+  void pmovzxbw(XMMRegister dst, Address src);
+
 #ifndef _LP64 // no 32bit push/pop on amd64
   void popl(Address dst);
 #endif
@@ -1292,6 +1420,7 @@
 
   // POR - Bitwise logical OR
   void por(XMMRegister dst, XMMRegister src);
+  void por(XMMRegister dst, Address src);
 
   // Shuffle Packed Doublewords
   void pshufd(XMMRegister dst, XMMRegister src, int mode);
@@ -1313,6 +1442,11 @@
 
   // Interleave Low Bytes
   void punpcklbw(XMMRegister dst, XMMRegister src);
+  void punpcklbw(XMMRegister dst, Address src);
+
+  // Interleave Low Doublewords
+  void punpckldq(XMMRegister dst, XMMRegister src);
+  void punpckldq(XMMRegister dst, Address src);
 
 #ifndef _LP64 // no 32bit push/pop on amd64
   void pushl(Address src);
@@ -1429,6 +1563,13 @@
   void xchgq(Register reg, Address adr);
   void xchgq(Register dst, Register src);
 
+  // Get Value of Extended Control Register
+  void xgetbv() {
+    emit_byte(0x0F);
+    emit_byte(0x01);
+    emit_byte(0xD0);
+  }
+
   void xorl(Register dst, int32_t imm32);
   void xorl(Register dst, Address src);
   void xorl(Register dst, Register src);
@@ -1437,14 +1578,44 @@
   void xorq(Register dst, Register src);
 
   // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
-  void xorpd(XMMRegister dst, Address src);
   void xorpd(XMMRegister dst, XMMRegister src);
 
   // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
-  void xorps(XMMRegister dst, Address src);
   void xorps(XMMRegister dst, XMMRegister src);
 
   void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
+
+  // AVX 3-operands instructions (encoded with VEX prefix)
+  void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
+  void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vaddss(XMMRegister dst, XMMRegister nds, Address src);
+  void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
+  void vandps(XMMRegister dst, XMMRegister nds, Address src);
+  void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
+  void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vdivss(XMMRegister dst, XMMRegister nds, Address src);
+  void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
+  void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vmulss(XMMRegister dst, XMMRegister nds, Address src);
+  void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
+  void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vsubss(XMMRegister dst, XMMRegister nds, Address src);
+  void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
+
+
+ protected:
+  // Next instructions require address alignment 16 bytes SSE mode.
+  // They should be called only from corresponding MacroAssembler instructions.
+  void andpd(XMMRegister dst, Address src);
+  void andps(XMMRegister dst, Address src);
+  void xorpd(XMMRegister dst, Address src);
+  void xorps(XMMRegister dst, Address src);
+
 };
 
 
@@ -2175,9 +2346,15 @@
   void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
   void andpd(XMMRegister dst, AddressLiteral src);
 
+  void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
+  void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
+  void andps(XMMRegister dst, AddressLiteral src);
+
+  void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
   void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
   void comiss(XMMRegister dst, AddressLiteral src);
 
+  void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
   void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
   void comisd(XMMRegister dst, AddressLiteral src);
 
@@ -2211,62 +2388,62 @@
   void movss(XMMRegister dst, Address src)     { Assembler::movss(dst, src); }
   void movss(XMMRegister dst, AddressLiteral src);
 
-  void movlpd(XMMRegister dst, Address src)      {Assembler::movlpd(dst, src); }
+  void movlpd(XMMRegister dst, Address src)    {Assembler::movlpd(dst, src); }
   void movlpd(XMMRegister dst, AddressLiteral src);
 
 public:
 
   void addsd(XMMRegister dst, XMMRegister src)    { Assembler::addsd(dst, src); }
   void addsd(XMMRegister dst, Address src)        { Assembler::addsd(dst, src); }
-  void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); }
+  void addsd(XMMRegister dst, AddressLiteral src);
 
   void addss(XMMRegister dst, XMMRegister src)    { Assembler::addss(dst, src); }
   void addss(XMMRegister dst, Address src)        { Assembler::addss(dst, src); }
-  void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); }
+  void addss(XMMRegister dst, AddressLiteral src);
 
   void divsd(XMMRegister dst, XMMRegister src)    { Assembler::divsd(dst, src); }
   void divsd(XMMRegister dst, Address src)        { Assembler::divsd(dst, src); }
-  void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); }
+  void divsd(XMMRegister dst, AddressLiteral src);
 
   void divss(XMMRegister dst, XMMRegister src)    { Assembler::divss(dst, src); }
   void divss(XMMRegister dst, Address src)        { Assembler::divss(dst, src); }
-  void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); }
+  void divss(XMMRegister dst, AddressLiteral src);
 
   void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
   void movsd(Address dst, XMMRegister src)     { Assembler::movsd(dst, src); }
   void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
-  void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); }
+  void movsd(XMMRegister dst, AddressLiteral src);
 
   void mulsd(XMMRegister dst, XMMRegister src)    { Assembler::mulsd(dst, src); }
   void mulsd(XMMRegister dst, Address src)        { Assembler::mulsd(dst, src); }
-  void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); }
+  void mulsd(XMMRegister dst, AddressLiteral src);
 
   void mulss(XMMRegister dst, XMMRegister src)    { Assembler::mulss(dst, src); }
   void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
-  void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); }
+  void mulss(XMMRegister dst, AddressLiteral src);
 
   void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
-  void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); }
+  void sqrtsd(XMMRegister dst, AddressLiteral src);
 
   void sqrtss(XMMRegister dst, XMMRegister src)    { Assembler::sqrtss(dst, src); }
   void sqrtss(XMMRegister dst, Address src)        { Assembler::sqrtss(dst, src); }
-  void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); }
+  void sqrtss(XMMRegister dst, AddressLiteral src);
 
   void subsd(XMMRegister dst, XMMRegister src)    { Assembler::subsd(dst, src); }
   void subsd(XMMRegister dst, Address src)        { Assembler::subsd(dst, src); }
-  void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); }
+  void subsd(XMMRegister dst, AddressLiteral src);
 
   void subss(XMMRegister dst, XMMRegister src)    { Assembler::subss(dst, src); }
   void subss(XMMRegister dst, Address src)        { Assembler::subss(dst, src); }
-  void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); }
+  void subss(XMMRegister dst, AddressLiteral src);
 
   void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
-  void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
+  void ucomiss(XMMRegister dst, Address src)     { Assembler::ucomiss(dst, src); }
   void ucomiss(XMMRegister dst, AddressLiteral src);
 
   void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
-  void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
+  void ucomisd(XMMRegister dst, Address src)     { Assembler::ucomisd(dst, src); }
   void ucomisd(XMMRegister dst, AddressLiteral src);
 
   // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
@@ -2279,6 +2456,53 @@
   void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
   void xorps(XMMRegister dst, AddressLiteral src);
 
+  // AVX 3-operands instructions
+
+  void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
+  void vaddsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddsd(dst, nds, src); }
+  void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
+  void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
+  void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandpd(dst, nds, src); }
+  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vandps(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandps(dst, nds, src); }
+  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
+  void vdivsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivsd(dst, nds, src); }
+  void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
+  void vdivss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivss(dst, nds, src); }
+  void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
+  void vmulsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vmulsd(dst, nds, src); }
+  void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
+  void vmulss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vmulss(dst, nds, src); }
+  void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
+  void vsubsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubsd(dst, nds, src); }
+  void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
+  void vsubss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubss(dst, nds, src); }
+  void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
+  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
+  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+
   // Data
 
   void cmov32( Condition cc, Register dst, Address  src);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -86,6 +86,7 @@
 inline void Assembler::prefixq(Address adr, Register reg) {}
 
 inline void Assembler::prefix(Address adr, XMMRegister reg) {}
+inline void Assembler::prefixq(Address adr, XMMRegister reg) {}
 #else
 inline void Assembler::emit_long64(jlong x) {
   *(jlong*) _code_pos = x;
--- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -320,7 +320,7 @@
     // begin_initialized_entry_offset has to fit in a byte. Also, we know it's not null.
     __ load_heap_oop_not_null(tmp2, Address(_obj, java_lang_Class::klass_offset_in_bytes()));
     __ get_thread(tmp);
-    __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc)));
+    __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset()));
     __ pop(tmp2);
     __ pop(tmp);
     __ jcc(Assembler::notEqual, call_patch);
@@ -519,7 +519,7 @@
 
   __ load_klass(tmp_reg, src_reg);
 
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
+  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
   __ cmpl(ref_type_adr, REF_NONE);
   __ jcc(Assembler::equal, _continuation);
 
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1557,8 +1557,8 @@
 
 void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
   if (op->init_check()) {
-    __ cmpl(Address(op->klass()->as_register(),
-                    instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)),
+    __ cmpb(Address(op->klass()->as_register(),
+                    instanceKlass::init_state_offset()),
             instanceKlass::fully_initialized);
     add_debug_info_for_null_check_here(op->stub()->info());
     __ jcc(Assembler::notEqual, *op->stub()->entry());
@@ -1730,7 +1730,7 @@
 #else
       __ cmpoop(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding());
 #endif // _LP64
-      if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
+      if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
         __ jcc(Assembler::notEqual, *failure_target);
         // successful cast, fall through to profile or jump
       } else {
@@ -1842,7 +1842,7 @@
     __ load_klass(klass_RInfo, value);
 
     // get instance klass (it's already uncompressed)
-    __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
+    __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset()));
     // perform the fast part of the checking logic
     __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
     // call out-of-line instance of __ check_klass_subtype_slow_path(...):
@@ -3289,8 +3289,7 @@
           } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
             __ load_klass(tmp, dst);
           }
-          int lh_offset = klassOopDesc::header_size() * HeapWordSize +
-            Klass::layout_helper_offset_in_bytes();
+          int lh_offset = in_bytes(Klass::layout_helper_offset());
           Address klass_lh_addr(tmp, lh_offset);
           jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
           __ cmpl(klass_lh_addr, objArray_lh);
@@ -3307,9 +3306,9 @@
 
 #ifndef _LP64
         __ movptr(tmp, dst_klass_addr);
-        __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
+        __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset()));
         __ push(tmp);
-        __ movl(tmp, Address(tmp, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
+        __ movl(tmp, Address(tmp, Klass::super_check_offset_offset()));
         __ push(tmp);
         __ push(length);
         __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
@@ -3333,15 +3332,15 @@
         // Allocate abi space for args but be sure to keep stack aligned
         __ subptr(rsp, 6*wordSize);
         __ load_klass(c_rarg3, dst);
-        __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
+        __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset()));
         store_parameter(c_rarg3, 4);
-        __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
+        __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset()));
         __ call(RuntimeAddress(copyfunc_addr));
         __ addptr(rsp, 6*wordSize);
 #else
         __ load_klass(c_rarg4, dst);
-        __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
-        __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
+        __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset()));
+        __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
         __ call(RuntimeAddress(copyfunc_addr));
 #endif
 
--- a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -150,7 +150,7 @@
   assert_different_registers(obj, klass, len);
   if (UseBiasedLocking && !len->is_valid()) {
     assert_different_registers(obj, klass, len, t1, t2);
-    movptr(t1, Address(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    movptr(t1, Address(klass, Klass::prototype_header_offset()));
     movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1);
   } else {
     // This assumes that all prototype bits fit in an int32_t
--- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1011,7 +1011,7 @@
 
           if (id == fast_new_instance_init_check_id) {
             // make sure the klass is initialized
-            __ cmpl(Address(klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized);
+            __ cmpb(Address(klass, instanceKlass::init_state_offset()), instanceKlass::fully_initialized);
             __ jcc(Assembler::notEqual, slow_path);
           }
 
@@ -1019,7 +1019,7 @@
           // assert object can be fast path allocated
           {
             Label ok, not_ok;
-            __ movl(obj_size, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+            __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
             __ cmpl(obj_size, 0);  // make sure it's an instance (LH > 0)
             __ jcc(Assembler::lessEqual, not_ok);
             __ testl(obj_size, Klass::_lh_instance_slow_path_bit);
@@ -1040,7 +1040,7 @@
           __ bind(retry_tlab);
 
           // get the instance size (size is postive so movl is fine for 64bit)
-          __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+          __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
 
           __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
 
@@ -1052,7 +1052,7 @@
 
           __ bind(try_eden);
           // get the instance size (size is postive so movl is fine for 64bit)
-          __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+          __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
 
           __ eden_allocate(obj, obj_size, 0, t1, slow_path);
           __ incr_allocated_bytes(thread, obj_size, 0);
@@ -1119,7 +1119,7 @@
         {
           Label ok;
           Register t0 = obj;
-          __ movl(t0, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+          __ movl(t0, Address(klass, Klass::layout_helper_offset()));
           __ sarl(t0, Klass::_lh_array_tag_shift);
           int tag = ((id == new_type_array_id)
                      ? Klass::_lh_array_tag_type_value
@@ -1153,7 +1153,7 @@
 
           // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
           // since size is positive movl does right thing on 64bit
-          __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+          __ movl(t1, Address(klass, Klass::layout_helper_offset()));
           // since size is postive movl does right thing on 64bit
           __ movl(arr_size, length);
           assert(t1 == rcx, "fixed register usage");
@@ -1167,7 +1167,7 @@
           __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
 
           __ initialize_header(obj, klass, length, t1, t2);
-          __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte)));
+          __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
           assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
           assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
           __ andptr(t1, Klass::_lh_header_size_mask);
@@ -1180,7 +1180,7 @@
           __ bind(try_eden);
           // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
           // since size is positive movl does right thing on 64bit
-          __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+          __ movl(t1, Address(klass, Klass::layout_helper_offset()));
           // since size is postive movl does right thing on 64bit
           __ movl(arr_size, length);
           assert(t1 == rcx, "fixed register usage");
@@ -1195,7 +1195,7 @@
           __ incr_allocated_bytes(thread, arr_size, 0);
 
           __ initialize_header(obj, klass, length, t1, t2);
-          __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte)));
+          __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
           assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
           assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
           __ andptr(t1, Klass::_lh_header_size_mask);
@@ -1267,7 +1267,7 @@
         Label register_finalizer;
         Register t = rsi;
         __ load_klass(t, rax);
-        __ movl(t, Address(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+        __ movl(t, Address(t, Klass::access_flags_offset()));
         __ testl(t, JVM_ACC_HAS_FINALIZER);
         __ jcc(Assembler::notZero, register_finalizer);
         __ ret(0);
--- a/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -511,7 +511,7 @@
     // get synchronization object
 
     Label done;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(rax, access_flags);
     __ testl(rax, JVM_ACC_STATIC);
     __ movptr(rax, Address(locals, 0));                   // get receiver (assume this is frequent case)
@@ -763,7 +763,7 @@
 #endif // ASSERT
   // get synchronization object
   { Label done;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(rax, access_flags);
     __ movptr(rdi, STATE(_locals));                                     // prepare to get receiver (assume common case)
     __ testl(rax, JVM_ACC_STATIC);
@@ -1180,7 +1180,7 @@
 
   // pass mirror handle if static call
   { Label L;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(t, Address(method, methodOopDesc::access_flags_offset()));
     __ testl(t, JVM_ACC_STATIC);
     __ jcc(Assembler::zero, L);
--- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1160,7 +1160,7 @@
   Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() );
   Address vmarg;                // __ argument_address(vmargslot)
 
-  const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+  const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
 
   if (have_entry(ek)) {
     __ nop();                   // empty stubs make SG sick
--- a/hotspot/src/cpu/x86/vm/nativeInst_x86.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/nativeInst_x86.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -237,9 +237,21 @@
   int off = 0;
   u_char instr_0 = ubyte_at(off);
 
+  // See comment in Assembler::locate_operand() about VEX prefixes.
+  if (instr_0 == instruction_VEX_prefix_2bytes) {
+    assert((UseAVX > 0), "shouldn't have VEX prefix");
+    NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
+    return 2;
+  }
+  if (instr_0 == instruction_VEX_prefix_3bytes) {
+    assert((UseAVX > 0), "shouldn't have VEX prefix");
+    NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
+    return 3;
+  }
+
   // First check to see if we have a (prefixed or not) xor
-  if ( instr_0 >= instruction_prefix_wide_lo &&      // 0x40
-       instr_0 <= instruction_prefix_wide_hi) { // 0x4f
+  if (instr_0 >= instruction_prefix_wide_lo && // 0x40
+      instr_0 <= instruction_prefix_wide_hi) { // 0x4f
     off++;
     instr_0 = ubyte_at(off);
   }
@@ -256,13 +268,13 @@
     instr_0 = ubyte_at(off);
   }
 
-  if ( instr_0 == instruction_code_xmm_ss_prefix ||      // 0xf3
+  if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3
        instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2
     off++;
     instr_0 = ubyte_at(off);
   }
 
-  if ( instr_0 >= instruction_prefix_wide_lo &&      // 0x40
+  if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
        instr_0 <= instruction_prefix_wide_hi) { // 0x4f
     off++;
     instr_0 = ubyte_at(off);
--- a/hotspot/src/cpu/x86/vm/nativeInst_x86.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/nativeInst_x86.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -287,6 +287,9 @@
     instruction_code_xmm_store          = 0x11,
     instruction_code_xmm_lpd            = 0x12,
 
+    instruction_VEX_prefix_2bytes       = Assembler::VEX_2bytes,
+    instruction_VEX_prefix_3bytes       = Assembler::VEX_3bytes,
+
     instruction_size                    = 4,
     instruction_offset                  = 0,
     data_offset                         = 2,
--- a/hotspot/src/cpu/x86/vm/register_definitions_x86.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/register_definitions_x86.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -53,6 +53,7 @@
 REGISTER_DEFINITION(Register, r15);
 #endif // AMD64
 
+REGISTER_DEFINITION(XMMRegister, xnoreg);
 REGISTER_DEFINITION(XMMRegister, xmm0 );
 REGISTER_DEFINITION(XMMRegister, xmm1 );
 REGISTER_DEFINITION(XMMRegister, xmm2 );
@@ -115,6 +116,7 @@
 REGISTER_DEFINITION(Register, r15_thread);
 #endif // AMD64
 
+REGISTER_DEFINITION(MMXRegister, mnoreg );
 REGISTER_DEFINITION(MMXRegister, mmx0 );
 REGISTER_DEFINITION(MMXRegister, mmx1 );
 REGISTER_DEFINITION(MMXRegister, mmx2 );
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1374,8 +1374,7 @@
     //                                  L_success, L_failure, NULL);
     assert_different_registers(sub_klass, temp);
 
-    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                     Klass::secondary_super_cache_offset_in_bytes());
+    int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 
     // if the pointers are equal, we are done (e.g., String[] elements)
     __ cmpptr(sub_klass, super_klass_addr);
@@ -1787,8 +1786,7 @@
     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
     //
 
-    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
-                    Klass::layout_helper_offset_in_bytes();
+    int lh_offset = in_bytes(Klass::layout_helper_offset());
     Address src_klass_lh_addr(rcx_src_klass, lh_offset);
 
     // Handle objArrays completely differently...
@@ -1914,10 +1912,8 @@
     // live at this point:  rcx_src_klass, dst[_pos], src[_pos]
     {
       // Handy offsets:
-      int  ek_offset = (klassOopDesc::header_size() * HeapWordSize +
-                        objArrayKlass::element_klass_offset_in_bytes());
-      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                        Klass::super_check_offset_offset_in_bytes());
+      int  ek_offset = in_bytes(objArrayKlass::element_klass_offset());
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
 
       Register rsi_dst_klass = rsi;
       Register rdi_temp      = rdi;
@@ -2323,6 +2319,9 @@
       generate_throw_exception("WrongMethodTypeException throw_exception",
                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
                                rax, rcx);
+
+    // Build this early so it's available for the interpreter
+    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
   }
 
 
@@ -2334,7 +2333,6 @@
     StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
     StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
     StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
-    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
 
     //------------------------------------------------------------------------------------------------------------------------
     // entry points that are platform specific
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -2261,8 +2261,7 @@
     // The ckoff and ckval must be mutually consistent,
     // even though caller generates both.
     { Label L;
-      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                        Klass::super_check_offset_offset_in_bytes());
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
       __ cmpl(ckoff, Address(ckval, sco_offset));
       __ jcc(Assembler::equal, L);
       __ stop("super_check_offset inconsistent");
@@ -2572,8 +2571,7 @@
     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
     //
 
-    const int lh_offset = klassOopDesc::header_size() * HeapWordSize +
-                          Klass::layout_helper_offset_in_bytes();
+    const int lh_offset = in_bytes(Klass::layout_helper_offset());
 
     // Handle objArrays completely differently...
     const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
@@ -2722,15 +2720,13 @@
       assert_clean_int(count, sco_temp);
 
       // Generate the type check.
-      const int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                              Klass::super_check_offset_offset_in_bytes());
+      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
       __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
       assert_clean_int(sco_temp, rax);
       generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
 
       // Fetch destination element klass from the objArrayKlass header.
-      int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
-                       objArrayKlass::element_klass_offset_in_bytes());
+      int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
       __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
       __ movl(  sco_temp,      Address(r11_dst_klass, sco_offset));
       assert_clean_int(sco_temp, rax);
@@ -3072,6 +3068,13 @@
       generate_throw_exception("WrongMethodTypeException throw_exception",
                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
                                rax, rcx);
+
+    // Build this early so it's available for the interpreter.
+    StubRoutines::_throw_StackOverflowError_entry =
+      generate_throw_exception("StackOverflowError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_StackOverflowError));
   }
 
   void generate_all() {
@@ -3098,12 +3101,6 @@
                                                 SharedRuntime::
                                                 throw_NullPointerException_at_call));
 
-    StubRoutines::_throw_StackOverflowError_entry =
-      generate_throw_exception("StackOverflowError throw_exception",
-                               CAST_FROM_FN_PTR(address,
-                                                SharedRuntime::
-                                                throw_StackOverflowError));
-
     // entry points that are platform specific
     StubRoutines::x86::_f2i_fixup = generate_f2i_fixup();
     StubRoutines::x86::_f2l_fixup = generate_f2l_fixup();
--- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -522,9 +522,18 @@
 
   __ pop(rsi);  // get saved bcp / (c++ prev state ).
 
-  __ pop(rax);  // get return address
-  __ jump(ExternalAddress(Interpreter::throw_StackOverflowError_entry()));
+  // Restore sender's sp as SP. This is necessary if the sender's
+  // frame is an extended compiled frame (see gen_c2i_adapter())
+  // and safer anyway in case of JSR292 adaptations.
 
+  __ pop(rax); // return address must be moved if SP is changed
+  __ mov(rsp, rsi);
+  __ push(rax);
+
+  // Note: the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  __ jump(ExternalAddress(StubRoutines::throw_StackOverflowError_entry()));
   // all done with frame size check
   __ bind(after_frame_check_pop);
   __ pop(rsi);
@@ -552,7 +561,7 @@
   #endif // ASSERT
   // get synchronization object
   { Label done;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(rax, access_flags);
     __ testl(rax, JVM_ACC_STATIC);
     __ movptr(rax, Address(rdi, Interpreter::local_offset_in_bytes(0)));  // get receiver (assume this is frequent case)
@@ -1012,7 +1021,7 @@
 
   // pass mirror handle if static call
   { Label L;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(t, Address(method, methodOopDesc::access_flags_offset()));
     __ testl(t, JVM_ACC_STATIC);
     __ jcc(Assembler::zero, L);
--- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -467,8 +467,18 @@
   __ cmpptr(rsp, rax);
   __ jcc(Assembler::above, after_frame_check);
 
-  __ pop(rax); // get return address
-  __ jump(ExternalAddress(Interpreter::throw_StackOverflowError_entry()));
+  // Restore sender's sp as SP. This is necessary if the sender's
+  // frame is an extended compiled frame (see gen_c2i_adapter())
+  // and safer anyway in case of JSR292 adaptations.
+
+  __ pop(rax); // return address must be moved if SP is changed
+  __ mov(rsp, r13);
+  __ push(rax);
+
+  // Note: the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  __ jump(ExternalAddress(StubRoutines::throw_StackOverflowError_entry()));
 
   // all done with frame size check
   __ bind(after_frame_check);
@@ -505,8 +515,7 @@
 
   // get synchronization object
   {
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() +
-                              Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     Label done;
     __ movl(rax, access_flags);
     __ testl(rax, JVM_ACC_STATIC);
@@ -1006,8 +1015,7 @@
   // pass mirror handle if static call
   {
     Label L;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() +
-                              Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(t, Address(method, methodOopDesc::access_flags_offset()));
     __ testl(t, JVM_ACC_STATIC);
     __ jcc(Assembler::zero, L);
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -980,7 +980,7 @@
   __ load_klass(rbx, rax);
   // Move superklass into EAX
   __ load_klass(rax, rdx);
-  __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
+  __ movptr(rax, Address(rax, objArrayKlass::element_klass_offset()));
   // Compress array+index*wordSize+12 into a single register.  Frees ECX.
   __ lea(rdx, element_address);
 
@@ -2033,7 +2033,7 @@
     assert(state == vtos, "only valid state");
     __ movptr(rax, aaddress(0));
     __ load_klass(rdi, rax);
-    __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+    __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
     __ testl(rdi, JVM_ACC_HAS_FINALIZER);
     Label skip_register_finalizer;
     __ jcc(Assembler::zero, skip_register_finalizer);
@@ -3188,11 +3188,11 @@
 
   // make sure klass is initialized & doesn't have finalizer
   // make sure klass is fully initialized
-  __ cmpl(Address(rcx, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized);
+  __ cmpb(Address(rcx, instanceKlass::init_state_offset()), instanceKlass::fully_initialized);
   __ jcc(Assembler::notEqual, slow_case);
 
   // get instance_size in instanceKlass (scaled to a count of bytes)
-  __ movl(rdx, Address(rcx, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+  __ movl(rdx, Address(rcx, Klass::layout_helper_offset()));
   // test to see if it has a finalizer or is malformed in some way
   __ testl(rdx, Klass::_lh_instance_slow_path_bit);
   __ jcc(Assembler::notZero, slow_case);
@@ -3293,7 +3293,7 @@
     __ bind(initialize_header);
     if (UseBiasedLocking) {
       __ pop(rcx);   // get saved klass back in the register.
-      __ movptr(rbx, Address(rcx, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+      __ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
     } else {
       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()),
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1004,8 +1004,7 @@
   // Move superklass into rax
   __ load_klass(rax, rdx);
   __ movptr(rax, Address(rax,
-                         sizeof(oopDesc) +
-                         objArrayKlass::element_klass_offset_in_bytes()));
+                         objArrayKlass::element_klass_offset()));
   // Compress array + index*oopSize + 12 into a single register.  Frees rcx.
   __ lea(rdx, element_address);
 
@@ -2067,7 +2066,7 @@
     assert(state == vtos, "only valid state");
     __ movptr(c_rarg1, aaddress(0));
     __ load_klass(rdi, c_rarg1);
-    __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+    __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
     __ testl(rdi, JVM_ACC_HAS_FINALIZER);
     Label skip_register_finalizer;
     __ jcc(Assembler::zero, skip_register_finalizer);
@@ -3235,16 +3234,15 @@
 
   // make sure klass is initialized & doesn't have finalizer
   // make sure klass is fully initialized
-  __ cmpl(Address(rsi,
-                  instanceKlass::init_state_offset_in_bytes() +
-                  sizeof(oopDesc)),
+  __ cmpb(Address(rsi,
+                  instanceKlass::init_state_offset()),
           instanceKlass::fully_initialized);
   __ jcc(Assembler::notEqual, slow_case);
 
   // get instance_size in instanceKlass (scaled to a count of bytes)
   __ movl(rdx,
           Address(rsi,
-                  Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+                  Klass::layout_helper_offset()));
   // test to see if it has a finalizer or is malformed in some way
   __ testl(rdx, Klass::_lh_instance_slow_path_bit);
   __ jcc(Assembler::notZero, slow_case);
@@ -3337,7 +3335,7 @@
     // initialize object header only.
     __ bind(initialize_header);
     if (UseBiasedLocking) {
-      __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+      __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset()));
       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), rscratch1);
     } else {
       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()),
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -50,7 +50,7 @@
 VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
 
 static BufferBlob* stub_blob;
-static const int stub_size = 400;
+static const int stub_size = 550;
 
 extern "C" {
   typedef void (*getPsrInfo_stub_t)(void*);
@@ -73,7 +73,7 @@
     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 
     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
-    Label ext_cpuid1, ext_cpuid5, done;
+    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done;
 
     StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
 #   define __ _masm->
@@ -229,14 +229,51 @@
     __ movl(Address(rsi, 8), rcx);
     __ movl(Address(rsi,12), rdx);
 
+    //
+    // Check if OS has enabled XGETBV instruction to access XCR0
+    // (OSXSAVE feature flag) and CPU supports AVX
+    //
+    __ andl(rcx, 0x18000000);
+    __ cmpl(rcx, 0x18000000);
+    __ jccb(Assembler::notEqual, sef_cpuid);
+
+    //
+    // XCR0, XFEATURE_ENABLED_MASK register
+    //
+    __ xorl(rcx, rcx);   // zero for XCR0 register
+    __ xgetbv();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rdx);
+
+    //
+    // cpuid(0x7) Structured Extended Features
+    //
+    __ bind(sef_cpuid);
+    __ movl(rax, 7);
+    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
+    __ jccb(Assembler::greater, ext_cpuid);
+
+    __ xorl(rcx, rcx);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+
+    //
+    // Extended cpuid(0x80000000)
+    //
+    __ bind(ext_cpuid);
     __ movl(rax, 0x80000000);
     __ cpuid();
     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
     __ jcc(Assembler::belowEqual, done);
     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
     __ jccb(Assembler::belowEqual, ext_cpuid1);
+    __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
+    __ jccb(Assembler::belowEqual, ext_cpuid5);
     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
-    __ jccb(Assembler::belowEqual, ext_cpuid5);
+    __ jccb(Assembler::belowEqual, ext_cpuid7);
     //
     // Extended cpuid(0x80000008)
     //
@@ -249,6 +286,18 @@
     __ movl(Address(rsi,12), rdx);
 
     //
+    // Extended cpuid(0x80000007)
+    //
+    __ bind(ext_cpuid7);
+    __ movl(rax, 0x80000007);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
+    //
     // Extended cpuid(0x80000005)
     //
     __ bind(ext_cpuid5);
@@ -359,13 +408,19 @@
   if (UseSSE < 1)
     _cpuFeatures &= ~CPU_SSE;
 
+  if (UseAVX < 2)
+    _cpuFeatures &= ~CPU_AVX2;
+
+  if (UseAVX < 1)
+    _cpuFeatures &= ~CPU_AVX;
+
   if (logical_processors_per_package() == 1) {
     // HT processor could be installed on a system which doesn't support HT.
     _cpuFeatures &= ~CPU_HT;
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -379,27 +434,39 @@
                (supports_sse4_1() ? ", sse4.1" : ""),
                (supports_sse4_2() ? ", sse4.2" : ""),
                (supports_popcnt() ? ", popcnt" : ""),
+               (supports_avx()    ? ", avx" : ""),
+               (supports_avx2()   ? ", avx2" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
                (supports_lzcnt()   ? ", lzcnt": ""),
                (supports_sse4a()   ? ", sse4a": ""),
-               (supports_ht() ? ", ht": ""));
+               (supports_ht() ? ", ht": ""),
+               (supports_tsc() ? ", tsc": ""),
+               (supports_tscinv_bit() ? ", tscinvbit": ""),
+               (supports_tscinv() ? ", tscinv": ""));
   _features_str = strdup(buf);
 
   // UseSSE is set to the smaller of what hardware supports and what
   // the command line requires.  I.e., you cannot set UseSSE to 2 on
   // older Pentiums which do not support it.
-  if( UseSSE > 4 ) UseSSE=4;
-  if( UseSSE < 0 ) UseSSE=0;
-  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
+  if (UseSSE > 4) UseSSE=4;
+  if (UseSSE < 0) UseSSE=0;
+  if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
     UseSSE = MIN2((intx)3,UseSSE);
-  if( !supports_sse3() ) // Drop to 2 if no SSE3 support
+  if (!supports_sse3()) // Drop to 2 if no SSE3 support
     UseSSE = MIN2((intx)2,UseSSE);
-  if( !supports_sse2() ) // Drop to 1 if no SSE2 support
+  if (!supports_sse2()) // Drop to 1 if no SSE2 support
     UseSSE = MIN2((intx)1,UseSSE);
-  if( !supports_sse () ) // Drop to 0 if no SSE  support
+  if (!supports_sse ()) // Drop to 0 if no SSE  support
     UseSSE = 0;
 
+  if (UseAVX > 2) UseAVX=2;
+  if (UseAVX < 0) UseAVX=0;
+  if (!supports_avx2()) // Drop to 1 if no AVX2 support
+    UseAVX = MIN2((intx)1,UseAVX);
+  if (!supports_avx ()) // Drop to 0 if no AVX  support
+    UseAVX = 0;
+
   // On new cpus instructions which update whole XMM register should be used
   // to prevent partial register stall due to dependencies on high half.
   //
@@ -534,6 +601,9 @@
     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
       UsePopCountInstruction = true;
     }
+  } else if (UsePopCountInstruction) {
+    warning("POPCNT instruction is not available on this CPU");
+    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
   }
 
 #ifdef COMPILER2
@@ -605,7 +675,11 @@
   if (PrintMiscellaneous && Verbose) {
     tty->print_cr("Logical CPUs per core: %u",
                   logical_processors_per_package());
-    tty->print_cr("UseSSE=%d",UseSSE);
+    tty->print("UseSSE=%d",UseSSE);
+    if (UseAVX > 0) {
+      tty->print("  UseAVX=%d",UseAVX);
+    }
+    tty->cr();
     tty->print("Allocation");
     if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
       tty->print_cr(": no prefetching");
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -78,7 +78,10 @@
                sse4_2   : 1,
                         : 2,
                popcnt   : 1,
-                        : 8;
+                        : 3,
+               osxsave  : 1,
+               avx      : 1,
+                        : 3;
     } bits;
   };
 
@@ -168,6 +171,15 @@
     } bits;
   };
 
+  union ExtCpuid7Edx {
+    uint32_t value;
+    struct {
+      uint32_t               : 8,
+              tsc_invariance : 1,
+                             : 23;
+    } bits;
+  };
+
   union ExtCpuid8Ecx {
     uint32_t value;
     struct {
@@ -176,32 +188,75 @@
     } bits;
   };
 
-protected:
-   static int _cpu;
-   static int _model;
-   static int _stepping;
-   static int _cpuFeatures;     // features returned by the "cpuid" instruction
-                                // 0 if this instruction is not available
-   static const char* _features_str;
+  union SefCpuid7Eax {
+    uint32_t value;
+  };
+
+  union SefCpuid7Ebx {
+    uint32_t value;
+    struct {
+      uint32_t fsgsbase : 1,
+                        : 2,
+                   bmi1 : 1,
+                        : 1,
+                   avx2 : 1,
+                        : 2,
+                   bmi2 : 1,
+                        : 23;
+    } bits;
+  };
+
+  union XemXcr0Eax {
+    uint32_t value;
+    struct {
+      uint32_t x87 : 1,
+               sse : 1,
+               ymm : 1,
+                   : 29;
+    } bits;
+  };
 
-   enum {
-     CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
-     CPU_CMOV   = (1 << 1),
-     CPU_FXSR   = (1 << 2),
-     CPU_HT     = (1 << 3),
-     CPU_MMX    = (1 << 4),
-     CPU_3DNOW_PREFETCH  = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
-                                     // may not necessarily support other 3dnow instructions
-     CPU_SSE    = (1 << 6),
-     CPU_SSE2   = (1 << 7),
-     CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
-     CPU_SSSE3  = (1 << 9),
-     CPU_SSE4A  = (1 << 10),
-     CPU_SSE4_1 = (1 << 11),
-     CPU_SSE4_2 = (1 << 12),
-     CPU_POPCNT = (1 << 13),
-     CPU_LZCNT  = (1 << 14)
-   } cpuFeatureFlags;
+protected:
+  static int _cpu;
+  static int _model;
+  static int _stepping;
+  static int _cpuFeatures;     // features returned by the "cpuid" instruction
+                               // 0 if this instruction is not available
+  static const char* _features_str;
+
+  enum {
+    CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
+    CPU_CMOV   = (1 << 1),
+    CPU_FXSR   = (1 << 2),
+    CPU_HT     = (1 << 3),
+    CPU_MMX    = (1 << 4),
+    CPU_3DNOW_PREFETCH  = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
+                                    // may not necessarily support other 3dnow instructions
+    CPU_SSE    = (1 << 6),
+    CPU_SSE2   = (1 << 7),
+    CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
+    CPU_SSSE3  = (1 << 9),
+    CPU_SSE4A  = (1 << 10),
+    CPU_SSE4_1 = (1 << 11),
+    CPU_SSE4_2 = (1 << 12),
+    CPU_POPCNT = (1 << 13),
+    CPU_LZCNT  = (1 << 14),
+    CPU_TSC    = (1 << 15),
+    CPU_TSCINV = (1 << 16),
+    CPU_AVX    = (1 << 17),
+    CPU_AVX2   = (1 << 18)
+  } cpuFeatureFlags;
+
+  enum {
+    // AMD
+    CPU_FAMILY_AMD_11H       = 17,
+    // Intel
+    CPU_FAMILY_INTEL_CORE    = 6,
+    CPU_MODEL_NEHALEM_EP     = 26,
+    CPU_MODEL_WESTMERE_EP    = 44,
+//  CPU_MODEL_IVYBRIDGE_EP   = ??, TODO - get real value
+    CPU_MODEL_SANDYBRIDGE_EP = 45
+  } cpuExtendedFamily;
 
   // cpuid information block.  All info derived from executing cpuid with
   // various function numbers is stored here.  Intel and AMD info is
@@ -228,6 +283,12 @@
     uint32_t     dcp_cpuid4_ecx; // unused currently
     uint32_t     dcp_cpuid4_edx; // unused currently
 
+    // cpuid function 7 (structured extended features)
+    SefCpuid7Eax sef_cpuid7_eax;
+    SefCpuid7Ebx sef_cpuid7_ebx;
+    uint32_t     sef_cpuid7_ecx; // unused currently
+    uint32_t     sef_cpuid7_edx; // unused currently
+
     // cpuid function 0xB (processor topology)
     // ecx = 0
     uint32_t     tpl_cpuidB0_eax;
@@ -270,11 +331,21 @@
     ExtCpuid5Ex  ext_cpuid5_ecx; // L1 data cache info (AMD)
     ExtCpuid5Ex  ext_cpuid5_edx; // L1 instruction cache info (AMD)
 
+    // cpuid function 0x80000007
+    uint32_t     ext_cpuid7_eax; // reserved
+    uint32_t     ext_cpuid7_ebx; // reserved
+    uint32_t     ext_cpuid7_ecx; // reserved
+    ExtCpuid7Edx ext_cpuid7_edx; // tscinv
+
     // cpuid function 0x80000008
     uint32_t     ext_cpuid8_eax; // unused currently
     uint32_t     ext_cpuid8_ebx; // reserved
     ExtCpuid8Ecx ext_cpuid8_ecx;
     uint32_t     ext_cpuid8_edx; // reserved
+
+    // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
+    XemXcr0Eax   xem_xcr0_eax;
+    uint32_t     xem_xcr0_edx; // reserved
   };
 
   // The actual cpuid info block
@@ -286,19 +357,23 @@
     result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
     return result;
   }
+
   static uint32_t extended_cpu_model() {
     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
     result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
     return result;
   }
+
   static uint32_t cpu_stepping() {
     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
     return result;
   }
+
   static uint logical_processor_count() {
     uint result = threads_per_core();
     return result;
   }
+
   static uint32_t feature_flags() {
     uint32_t result = 0;
     if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
@@ -328,6 +403,18 @@
       result |= CPU_SSE4_2;
     if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
       result |= CPU_POPCNT;
+    if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
+        _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
+        _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
+        _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
+      result |= CPU_AVX;
+      if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
+        result |= CPU_AVX2;
+    }
+    if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
+      result |= CPU_TSC;
+    if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
+      result |= CPU_TSCINV;
 
     // AMD features.
     if (is_amd()) {
@@ -350,12 +437,15 @@
   static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
   static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
   static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
+  static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
   static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
   static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
+  static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
   static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
   static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
   static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
   static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
+  static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
 
   // Initialization
   static void initialize();
@@ -382,7 +472,6 @@
   //
   static int  cpu_family()        { return _cpu;}
   static bool is_P6()             { return cpu_family() >= 6; }
-
   static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
   static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
 
@@ -447,14 +536,51 @@
   static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
-  //
+  static bool supports_avx()      { return (_cpuFeatures & CPU_AVX) != 0; }
+  static bool supports_avx2()     { return (_cpuFeatures & CPU_AVX2) != 0; }
+  static bool supports_tsc()      { return (_cpuFeatures & CPU_TSC)    != 0; }
+
+  // Intel features
+  static bool is_intel_family_core() { return is_intel() &&
+                                       extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
+
+  static bool is_intel_tsc_synched_at_init()  {
+    if (is_intel_family_core()) {
+      uint32_t ext_model = extended_cpu_model();
+      if (ext_model == CPU_MODEL_NEHALEM_EP   ||
+          ext_model == CPU_MODEL_WESTMERE_EP  ||
+// TODO   ext_model == CPU_MODEL_IVYBRIDGE_EP ||
+          ext_model == CPU_MODEL_SANDYBRIDGE_EP) {
+        // 2-socket invtsc support. EX versions with 4 sockets are not
+        // guaranteed to synchronize tscs at initialization via a double
+        // handshake. The tscs can be explicitly set in software.  Code
+        // that uses tsc values must be prepared for them to arbitrarily
+        // jump backward or forward.
+        return true;
+      }
+    }
+    return false;
+  }
+
   // AMD features
-  //
   static bool supports_3dnow_prefetch()    { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; }
   static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
   static bool supports_lzcnt()    { return (_cpuFeatures & CPU_LZCNT) != 0; }
   static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
 
+  static bool is_amd_Barcelona()  { return is_amd() &&
+                                           extended_cpu_family() == CPU_FAMILY_AMD_11H; }
+
+  // Intel and AMD newer cores support fast timestamps well
+  static bool supports_tscinv_bit() {
+    return (_cpuFeatures & CPU_TSCINV) != 0;
+  }
+  static bool supports_tscinv() {
+    return supports_tscinv_bit() &&
+           ( (is_amd() && !is_amd_Barcelona()) ||
+             is_intel_tsc_synched_at_init() );
+  }
+
   // Intel Core and newer cpus have fast IDIV instruction (excluding Atom).
   static bool has_fast_idiv()     { return is_intel() && cpu_family() == 6 &&
                                            supports_sse3() && _model != 0x1C; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/x86/vm/x86.ad	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,777 @@
+//
+// Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// X86 Common Architecture Description File
+
+source %{
+  // Float masks come from different places depending on platform.
+#ifdef _LP64
+  static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
+  static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
+  static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
+  static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
+#else
+  static address float_signmask()  { return (address)float_signmask_pool; }
+  static address float_signflip()  { return (address)float_signflip_pool; }
+  static address double_signmask() { return (address)double_signmask_pool; }
+  static address double_signflip() { return (address)double_signflip_pool; }
+#endif
+%}
+
+// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
+
+instruct addF_reg(regF dst, regF src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (AddF dst src));
+
+  format %{ "addss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct addF_mem(regF dst, memory src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (AddF dst (LoadF src)));
+
+  format %{ "addss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct addF_imm(regF dst, immF con) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (AddF dst con));
+  format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddF_reg(regF dst, regF src1, regF src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddF src1 src2));
+
+  format %{ "vaddss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddF_mem(regF dst, regF src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddF src1 (LoadF src2)));
+
+  format %{ "vaddss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddF_imm(regF dst, regF src, immF con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddF src con));
+
+  format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct addD_reg(regD dst, regD src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (AddD dst src));
+
+  format %{ "addsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct addD_mem(regD dst, memory src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (AddD dst (LoadD src)));
+
+  format %{ "addsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addsd($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct addD_imm(regD dst, immD con) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (AddD dst con));
+  format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addsd($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddD_reg(regD dst, regD src1, regD src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddD src1 src2));
+
+  format %{ "vaddsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddD_mem(regD dst, regD src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddD src1 (LoadD src2)));
+
+  format %{ "vaddsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddD_imm(regD dst, regD src, immD con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddD src con));
+
+  format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subF_reg(regF dst, regF src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (SubF dst src));
+
+  format %{ "subss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subF_mem(regF dst, memory src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (SubF dst (LoadF src)));
+
+  format %{ "subss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subss($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subF_imm(regF dst, immF con) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (SubF dst con));
+  format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subss($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubF_reg(regF dst, regF src1, regF src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubF src1 src2));
+
+  format %{ "vsubss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubF_mem(regF dst, regF src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubF src1 (LoadF src2)));
+
+  format %{ "vsubss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubF_imm(regF dst, regF src, immF con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubF src con));
+
+  format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subD_reg(regD dst, regD src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (SubD dst src));
+
+  format %{ "subsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subD_mem(regD dst, memory src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (SubD dst (LoadD src)));
+
+  format %{ "subsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subsd($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subD_imm(regD dst, immD con) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (SubD dst con));
+  format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subsd($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubD_reg(regD dst, regD src1, regD src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubD src1 src2));
+
+  format %{ "vsubsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubD_mem(regD dst, regD src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubD src1 (LoadD src2)));
+
+  format %{ "vsubsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubD_imm(regD dst, regD src, immD con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubD src con));
+
+  format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulF_reg(regF dst, regF src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (MulF dst src));
+
+  format %{ "mulss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulF_mem(regF dst, memory src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (MulF dst (LoadF src)));
+
+  format %{ "mulss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulF_imm(regF dst, immF con) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (MulF dst con));
+  format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulF_reg(regF dst, regF src1, regF src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulF src1 src2));
+
+  format %{ "vmulss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulF_mem(regF dst, regF src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulF src1 (LoadF src2)));
+
+  format %{ "vmulss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulF_imm(regF dst, regF src, immF con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulF src con));
+
+  format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulD_reg(regD dst, regD src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (MulD dst src));
+
+  format %{ "mulsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulD_mem(regD dst, memory src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (MulD dst (LoadD src)));
+
+  format %{ "mulsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulsd($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulD_imm(regD dst, immD con) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (MulD dst con));
+  format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulsd($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulD_reg(regD dst, regD src1, regD src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulD src1 src2));
+
+  format %{ "vmulsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulD_mem(regD dst, regD src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulD src1 (LoadD src2)));
+
+  format %{ "vmulsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulD_imm(regD dst, regD src, immD con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulD src con));
+
+  format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divF_reg(regF dst, regF src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (DivF dst src));
+
+  format %{ "divss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divF_mem(regF dst, memory src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (DivF dst (LoadF src)));
+
+  format %{ "divss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divss($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divF_imm(regF dst, immF con) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (DivF dst con));
+  format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divss($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivF_reg(regF dst, regF src1, regF src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivF src1 src2));
+
+  format %{ "vdivss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivF_mem(regF dst, regF src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivF src1 (LoadF src2)));
+
+  format %{ "vdivss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivF_imm(regF dst, regF src, immF con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivF src con));
+
+  format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divD_reg(regD dst, regD src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (DivD dst src));
+
+  format %{ "divsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divD_mem(regD dst, memory src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (DivD dst (LoadD src)));
+
+  format %{ "divsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divsd($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divD_imm(regD dst, immD con) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (DivD dst con));
+  format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divsd($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivD_reg(regD dst, regD src1, regD src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivD src1 src2));
+
+  format %{ "vdivsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivD_mem(regD dst, regD src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivD src1 (LoadD src2)));
+
+  format %{ "vdivsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivD_imm(regD dst, regD src, immD con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivD src con));
+
+  format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct absF_reg(regF dst) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (AbsF dst));
+  ins_cost(150);
+  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vabsF_reg(regF dst, regF src) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AbsF src));
+  ins_cost(150);
+  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(float_signmask()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct absD_reg(regD dst) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (AbsD dst));
+  ins_cost(150);
+  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
+            "# abs double by sign masking" %}
+  ins_encode %{
+    __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vabsD_reg(regD dst, regD src) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AbsD src));
+  ins_cost(150);
+  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
+            "# abs double by sign masking" %}
+  ins_encode %{
+    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(double_signmask()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct negF_reg(regF dst) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (NegF dst));
+  ins_cost(150);
+  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vnegF_reg(regF dst, regF src) %{
+  predicate(UseAVX > 0);
+  match(Set dst (NegF src));
+  ins_cost(150);
+  format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
+  ins_encode %{
+    __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(float_signflip()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct negD_reg(regD dst) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (NegD dst));
+  ins_cost(150);
+  format %{ "xorpd   $dst, [0x8000000000000000]\t"
+            "# neg double by sign flipping" %}
+  ins_encode %{
+    __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vnegD_reg(regD dst, regD src) %{
+  predicate(UseAVX > 0);
+  match(Set dst (NegD src));
+  ins_cost(150);
+  format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
+            "# neg double by sign flipping" %}
+  ins_encode %{
+    __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(double_signflip()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtF_reg(regF dst, regF src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+
+  format %{ "sqrtss  $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtF_mem(regF dst, memory src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
+
+  format %{ "sqrtss  $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtss($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtF_imm(regF dst, immF con) %{
+  predicate(UseSSE>=1);
+  match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
+  format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtss($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtD_reg(regD dst, regD src) %{
+  predicate(UseSSE>=2);
+  match(Set dst (SqrtD src));
+
+  format %{ "sqrtsd  $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtD_mem(regD dst, memory src) %{
+  predicate(UseSSE>=2);
+  match(Set dst (SqrtD (LoadD src)));
+
+  format %{ "sqrtsd  $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtsd($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtD_imm(regD dst, immD con) %{
+  predicate(UseSSE>=2);
+  match(Set dst (SqrtD con));
+  format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Wed Jul 05 18:00:12 2017 +0200
@@ -281,7 +281,7 @@
 }
 
 static int preserve_SP_size() {
-  return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
+  return 2;  // op, rm(reg/reg)
 }
 
 // !!!!! Special hack to get all type of calls to specify the byte offset
@@ -495,14 +495,34 @@
   }
 }
 
-void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
-  if( dst_encoding == src_encoding ) {
-    // reg-reg copy, use an empty encoding
-  } else {
-    MacroAssembler _masm(&cbuf);
-
-    __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
-  }
+void emit_cmpfp_fixup(MacroAssembler& _masm) {
+  Label exit;
+  __ jccb(Assembler::noParity, exit);
+  __ pushf();
+  //
+  // comiss/ucomiss instructions set ZF,PF,CF flags and
+  // zero OF,AF,SF for NaN values.
+  // Fixup flags by zeroing ZF,PF so that compare of NaN
+  // values returns 'less than' result (CF is set).
+  // Leave the rest of flags unchanged.
+  //
+  //    7 6 5 4 3 2 1 0
+  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
+  //    0 0 1 0 1 0 1 1   (0x2B)
+  //
+  __ andl(Address(rsp, 0), 0xffffff2b);
+  __ popf();
+  __ bind(exit);
+}
+
+void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
+  Label done;
+  __ movl(dst, -1);
+  __ jcc(Assembler::parity, done);
+  __ jcc(Assembler::below, done);
+  __ setb(Assembler::notEqual, dst);
+  __ movzbl(dst, dst);
+  __ bind(done);
 }
 
 
@@ -792,92 +812,88 @@
 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
-  if( cbuf ) {
-    if( reg_lo+1 == reg_hi ) { // double move?
-      if( is_load && !UseXmmLoadAndClearUpper )
-        emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
-      else
-        emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    if (reg_lo+1 == reg_hi) { // double move?
+      if (is_load) {
+        __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
+      } else {
+        __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
+      }
     } else {
-      emit_opcode(*cbuf, 0xF3 );
+      if (is_load) {
+        __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
+      } else {
+        __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
+      }
     }
-    emit_opcode(*cbuf, 0x0F );
-    if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
-      emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
-    else
-      emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
-    encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
 #ifndef PRODUCT
-  } else if( !do_size ) {
-    if( size != 0 ) st->print("\n\t");
-    if( reg_lo+1 == reg_hi ) { // double move?
-      if( is_load ) st->print("%s %s,[ESP + #%d]",
-                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
-                               Matcher::regName[reg_lo], offset);
-      else          st->print("MOVSD  [ESP + #%d],%s",
-                               offset, Matcher::regName[reg_lo]);
+  } else if (!do_size) {
+    if (size != 0) st->print("\n\t");
+    if (reg_lo+1 == reg_hi) { // double move?
+      if (is_load) st->print("%s %s,[ESP + #%d]",
+                              UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
+                              Matcher::regName[reg_lo], offset);
+      else         st->print("MOVSD  [ESP + #%d],%s",
+                              offset, Matcher::regName[reg_lo]);
     } else {
-      if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
-                               Matcher::regName[reg_lo], offset);
-      else          st->print("MOVSS  [ESP + #%d],%s",
-                               offset, Matcher::regName[reg_lo]);
+      if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
+                              Matcher::regName[reg_lo], offset);
+      else         st->print("MOVSS  [ESP + #%d],%s",
+                              offset, Matcher::regName[reg_lo]);
     }
 #endif
   }
   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
+  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
   return size+5+offset_size;
 }
 
 
 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
                             int src_hi, int dst_hi, int size, outputStream* st ) {
-  if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
-    if( cbuf ) {
-      if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
-        emit_opcode(*cbuf, 0x66 );
-      }
-      emit_opcode(*cbuf, 0x0F );
-      emit_opcode(*cbuf, 0x28 );
-      emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
+      __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
+                as_XMMRegister(Matcher::_regEncode[src_lo]));
+    } else {
+      __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
+                as_XMMRegister(Matcher::_regEncode[src_lo]));
+    }
 #ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
-      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
+  } else if (!do_size) {
+    if (size != 0) st->print("\n\t");
+    if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
+      if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
       } else {
         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
       }
-#endif
-    }
-    return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
-  } else {
-    if( cbuf ) {
-      emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
-      emit_opcode(*cbuf, 0x0F );
-      emit_opcode(*cbuf, 0x10 );
-      emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
-#ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
+    } else {
       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
       } else {
         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
       }
+    }
 #endif
-    }
-    return size+4;
   }
+  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
+  // Only MOVAPS SSE prefix uses 1 byte.
+  int sz = 4;
+  if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
+      UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
+  return size + sz;
 }
 
 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
                             int src_hi, int dst_hi, int size, outputStream* st ) {
   // 32-bit
   if (cbuf) {
-    emit_opcode(*cbuf, 0x66);
-    emit_opcode(*cbuf, 0x0F);
-    emit_opcode(*cbuf, 0x6E);
-    emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
+    MacroAssembler _masm(cbuf);
+    __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
+             as_Register(Matcher::_regEncode[src_lo]));
 #ifndef PRODUCT
   } else if (!do_size) {
     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
@@ -891,10 +907,9 @@
                                  int src_hi, int dst_hi, int size, outputStream* st ) {
   // 32-bit
   if (cbuf) {
-    emit_opcode(*cbuf, 0x66);
-    emit_opcode(*cbuf, 0x0F);
-    emit_opcode(*cbuf, 0x7E);
-    emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
+    MacroAssembler _masm(cbuf);
+    __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
+             as_XMMRegister(Matcher::_regEncode[src_lo]));
 #ifndef PRODUCT
   } else if (!do_size) {
     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
@@ -1760,7 +1775,7 @@
     emit_cc(cbuf, $secondary, $cop$$cmpcode);
   %}
 
-  enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
+  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
     emit_d8(cbuf, op >> 8 );
     emit_d8(cbuf, op & 255);
@@ -1931,11 +1946,6 @@
 
   %}
 
-  enc_class Xor_Reg (eRegI dst) %{
-    emit_opcode(cbuf, 0x33);
-    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
-  %}
-
 //   Following encoding is no longer used, but may be restored if calling
 //   convention changes significantly.
 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
@@ -2013,64 +2023,6 @@
   %}
 
 
-  enc_class MovI2X_reg(regX dst, eRegI src) %{
-    emit_opcode(cbuf, 0x66 );     // MOVD dst,src
-    emit_opcode(cbuf, 0x0F );
-    emit_opcode(cbuf, 0x6E );
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
-  %}
-
-  enc_class MovX2I_reg(eRegI dst, regX src) %{
-    emit_opcode(cbuf, 0x66 );     // MOVD dst,src
-    emit_opcode(cbuf, 0x0F );
-    emit_opcode(cbuf, 0x7E );
-    emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
-  %}
-
-  enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
-    { // MOVD $dst,$src.lo
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x6E);
-      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
-    }
-    { // MOVD $tmp,$src.hi
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x6E);
-      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
-    }
-    { // PUNPCKLDQ $dst,$tmp
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x62);
-      emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
-     }
-  %}
-
-  enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
-    { // MOVD $dst.lo,$src
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x7E);
-      emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
-    }
-    { // PSHUFLW $tmp,$src,0x4E  (01001110b)
-      emit_opcode(cbuf,0xF2);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x70);
-      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
-      emit_d8(cbuf, 0x4E);
-    }
-    { // MOVD $dst.hi,$tmp
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x7E);
-      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
-    }
-  %}
-
-
   // Encode a reg-reg copy.  If it is useless, then empty encoding.
   enc_class enc_Copy( eRegI dst, eRegI src ) %{
     encode_Copy( cbuf, $dst$$reg, $src$$reg );
@@ -2080,11 +2032,6 @@
     encode_Copy( cbuf, $dst$$reg, $src$$reg );
   %}
 
-  // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
-    encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
-  %}
-
   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
@@ -2116,14 +2063,14 @@
     $$$emit32$src$$constant;
   %}
 
-  enc_class Con32F_as_bits(immF src) %{        // storeF_imm
+  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
     // Output Float immediate bits
     jfloat jf = $src$$constant;
     int    jf_as_bits = jint_cast( jf );
     emit_d32(cbuf, jf_as_bits);
   %}
 
-  enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
+  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
     // Output Float immediate bits
     jfloat jf = $src$$constant;
     int    jf_as_bits = jint_cast( jf );
@@ -2336,7 +2283,7 @@
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
 
-  enc_class enc_FP_store(memory mem, regD src) %{
+  enc_class enc_FPR_store(memory mem, regDPR src) %{
     // If src is FPR1, we can just FST to store it.
     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
     int reg_encoding = 0x2; // Just store
@@ -2485,7 +2432,7 @@
 
   // ----------------- Encodings for floating point unit -----------------
   // May leave result in FPU-TOS or FPU reg depending on opcodes
-  enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
+  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
     $$$emit8$primary;
     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
   %}
@@ -2497,17 +2444,17 @@
   %}
 
   // !!!!! equivalent to Pop_Reg_F
-  enc_class Pop_Reg_D( regD dst ) %{
+  enc_class Pop_Reg_DPR( regDPR dst ) %{
     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
     emit_d8( cbuf, 0xD8+$dst$$reg );
   %}
 
-  enc_class Push_Reg_D( regD dst ) %{
+  enc_class Push_Reg_DPR( regDPR dst ) %{
     emit_opcode( cbuf, 0xD9 );
     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
   %}
 
-  enc_class strictfp_bias1( regD dst ) %{
+  enc_class strictfp_bias1( regDPR dst ) %{
     emit_opcode( cbuf, 0xDB );           // FLD m80real
     emit_opcode( cbuf, 0x2D );
     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
@@ -2515,7 +2462,7 @@
     emit_opcode( cbuf, 0xC8+$dst$$reg );
   %}
 
-  enc_class strictfp_bias2( regD dst ) %{
+  enc_class strictfp_bias2( regDPR dst ) %{
     emit_opcode( cbuf, 0xDB );           // FLD m80real
     emit_opcode( cbuf, 0x2D );
     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
@@ -2541,39 +2488,29 @@
     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
   %}
 
-  // Push the float in stackSlot 'src' onto FP-stack
-  enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
-    store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
-  %}
-
-  // Push the double in stackSlot 'src' onto FP-stack
-  enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
-    store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
-  %}
-
   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
+  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
   %}
 
   // Same as Pop_Mem_F except for opcode
   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
+  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
   %}
 
-  enc_class Pop_Reg_F( regF dst ) %{
+  enc_class Pop_Reg_FPR( regFPR dst ) %{
     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
     emit_d8( cbuf, 0xD8+$dst$$reg );
   %}
 
-  enc_class Push_Reg_F( regF dst ) %{
+  enc_class Push_Reg_FPR( regFPR dst ) %{
     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
     emit_d8( cbuf, 0xC0-1+$dst$$reg );
   %}
 
   // Push FPU's float to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
+  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
     int pop = 0x02;
     if ($src$$reg != FPR1L_enc) {
       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
@@ -2584,7 +2521,7 @@
   %}
 
   // Push FPU's double to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
+  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
     int pop = 0x02;
     if ($src$$reg != FPR1L_enc) {
       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
@@ -2595,7 +2532,7 @@
   %}
 
   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
-  enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
+  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
     int pop = 0xD0 - 1; // -1 since we skip FLD
     if ($src$$reg != FPR1L_enc) {
       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
@@ -2607,16 +2544,7 @@
   %}
 
 
-  enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
-    MacroAssembler masm(&cbuf);
-    masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
-    masm.fmul(   $src2$$reg+0);   // value at TOS
-    masm.fadd(   $src$$reg+0);    // value at TOS
-    masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
-  %}
-
-
-  enc_class Push_Reg_Mod_D( regD dst, regD src) %{
+  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
     // load dst in FPR0
     emit_opcode( cbuf, 0xD9 );
     emit_d8( cbuf, 0xC0-1+$dst$$reg );
@@ -2634,116 +2562,59 @@
     }
   %}
 
-  enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);            // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
-
-    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-  %}
-
-  enc_class Push_ModX_encoding( regX src0, regX src1) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);            // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x04);
-
-    emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9 );      // FLD [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9 );      // FLD [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-  %}
-
-  enc_class Push_ResultXD(regXD dst) %{
-    store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
-
-    // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
-    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-    encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);    // ADD ESP,8
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,0x08);
-  %}
-
-  enc_class Push_ResultX(regX dst, immI d8) %{
-    store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
-
-    emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x10 );
-    encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,$d8$$constant);
-  %}
-
-  enc_class Push_SrcXD(regXD src) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);            // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
-
-    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
+  enc_class Push_ModD_encoding(regD src0, regD src1) %{
+    MacroAssembler _masm(&cbuf);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+  %}
+
+  enc_class Push_ModF_encoding(regF src0, regF src1) %{
+    MacroAssembler _masm(&cbuf);
+    __ subptr(rsp, 4);
+    __ movflt(Address(rsp, 0), $src1$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+    __ movflt(Address(rsp, 0), $src0$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+  %}
+
+  enc_class Push_ResultD(regD dst) %{
+    MacroAssembler _masm(&cbuf);
+    __ fstp_d(Address(rsp, 0));
+    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 8);
+  %}
+
+  enc_class Push_ResultF(regF dst, immI d8) %{
+    MacroAssembler _masm(&cbuf);
+    __ fstp_s(Address(rsp, 0));
+    __ movflt($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, $d8$$constant);
+  %}
+
+  enc_class Push_SrcD(regD src) %{
+    MacroAssembler _masm(&cbuf);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
   %}
 
   enc_class push_stack_temp_qword() %{
-    emit_opcode(cbuf,0x83);     // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8    (cbuf,0x08);
+    MacroAssembler _masm(&cbuf);
+    __ subptr(rsp, 8);
   %}
 
   enc_class pop_stack_temp_qword() %{
-    emit_opcode(cbuf,0x83);     // ADD ESP,8
-    emit_opcode(cbuf,0xC4);
-    emit_d8    (cbuf,0x08);
-  %}
-
-  enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
-    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
+    MacroAssembler _masm(&cbuf);
+    __ addptr(rsp, 8);
+  %}
+
+  enc_class push_xmm_to_fpr1(regD src) %{
+    MacroAssembler _masm(&cbuf);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
   %}
 
   // Compute X^Y using Intel's fast hardware instructions, if possible.
@@ -2785,10 +2656,7 @@
     encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
   %}
 
-//   enc_class Pop_Reg_Mod_D( regD dst, regD src)
-//   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
-
-  enc_class Push_Result_Mod_D( regD src) %{
+  enc_class Push_Result_Mod_DPR( regDPR src) %{
     if ($src$$reg != FPR1L_enc) {
       // fincstp
       emit_opcode (cbuf, 0xD9);
@@ -2817,7 +2685,7 @@
     emit_opcode( cbuf, 0x05 );
   %}
 
-  enc_class emitModD() %{
+  enc_class emitModDPR() %{
     // fprem must be iterative
     // :: loop
     // fprem
@@ -2922,24 +2790,6 @@
   %}
 
 
-  // XMM version of CmpF_Result. Because the XMM compare
-  // instructions set the EFLAGS directly. It becomes simpler than
-  // the float version above.
-  enc_class CmpX_Result(eRegI dst) %{
-    MacroAssembler _masm(&cbuf);
-    Label nan, inc, done;
-
-    __ jccb(Assembler::parity, nan);
-    __ jccb(Assembler::equal,  done);
-    __ jccb(Assembler::above,  inc);
-    __ bind(nan);
-    __ decrement(as_Register($dst$$reg)); // NO L qqq
-    __ jmpb(done);
-    __ bind(inc);
-    __ increment(as_Register($dst$$reg)); // NO L qqq
-    __ bind(done);
-  %}
-
   // Compare the longs and set flags
   // BROKEN!  Do Not use as-is
   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
@@ -3162,48 +3012,6 @@
     emit_d8    (cbuf,0 );
   %}
 
-  enc_class movq_ld(regXD dst, memory mem) %{
-    MacroAssembler _masm(&cbuf);
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-
-  enc_class movq_st(memory mem, regXD src) %{
-    MacroAssembler _masm(&cbuf);
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-
-  enc_class pshufd_8x8(regX dst, regX src) %{
-    MacroAssembler _masm(&cbuf);
-
-    encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
-    __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
-    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
-  %}
-
-  enc_class pshufd_4x16(regX dst, regX src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
-  %}
-
-  enc_class pshufd(regXD dst, regXD src, int mode) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
-  %}
-
-  enc_class pxor(regXD dst, regXD src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
-  %}
-
-  enc_class mov_i2x(regXD dst, eRegI src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
-  %}
-
 
   // Because the transitions from emitted code to the runtime
   // monitorenter/exit helper stubs are so slow it's critical that
@@ -3757,7 +3565,7 @@
   // 'zero', store the darned double down as an int, and reset the
   // rounding mode to 'nearest'.  The hardware throws an exception which
   // patches up the correct value directly to the stack.
-  enc_class D2I_encoding( regD src ) %{
+  enc_class DPR2I_encoding( regDPR src ) %{
     // Flip to round-to-zero mode.  We attempted to allow invalid-op
     // exceptions here, so that a NAN or other corner-case value will
     // thrown an exception (but normal values get converted at full speed).
@@ -3800,7 +3608,7 @@
     // Carry on here...
   %}
 
-  enc_class D2L_encoding( regD src ) %{
+  enc_class DPR2L_encoding( regDPR src ) %{
     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
     emit_opcode(cbuf,0x2D);
     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
@@ -3842,294 +3650,27 @@
     // Carry on here...
   %}
 
-  enc_class X2L_encoding( regX src ) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);      // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
-
-    emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9);      // FLDCW  trunc
-    emit_opcode(cbuf,0x2D);
-    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
-
-    // Encoding assumes a double has been pushed into FPR0.
-    // Store down the double as a long, popping the FPU stack
-    emit_opcode(cbuf,0xDF);      // FISTP [ESP]
-    emit_opcode(cbuf,0x3C);
-    emit_d8(cbuf,0x24);
-
-    // Restore the rounding mode; mask the exception
-    emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
-    emit_opcode(cbuf,0x2D);
-    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
-      ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
-      : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
-
-    // Load the converted int; adjust CPU stack
-    emit_opcode(cbuf,0x58);      // POP EAX
-
-    emit_opcode(cbuf,0x5A);      // POP EDX
-
-    emit_opcode(cbuf,0x81);      // CMP EDX,imm
-    emit_d8    (cbuf,0xFA);      // rdx
-    emit_d32   (cbuf,0x80000000);//         0x80000000
-
-    emit_opcode(cbuf,0x75);      // JNE around_slow_call
-    emit_d8    (cbuf,0x13+4);    // Size of slow_call
-
-    emit_opcode(cbuf,0x85);      // TEST EAX,EAX
-    emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
-
-    emit_opcode(cbuf,0x75);      // JNE around_slow_call
-    emit_d8    (cbuf,0x13);      // Size of slow_call
-
-    // Allocate a word
-    emit_opcode(cbuf,0x83);      // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x04);
-
-    emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);      // ADD ESP,4
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,0x04);
-
-    // CALL directly to the runtime
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf,0xE8);       // Call into runtime
-    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-    // Carry on here...
-  %}
-
-  enc_class XD2L_encoding( regXD src ) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);      // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
-
-    emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9);      // FLDCW  trunc
-    emit_opcode(cbuf,0x2D);
-    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
-
-    // Encoding assumes a double has been pushed into FPR0.
-    // Store down the double as a long, popping the FPU stack
-    emit_opcode(cbuf,0xDF);      // FISTP [ESP]
-    emit_opcode(cbuf,0x3C);
-    emit_d8(cbuf,0x24);
-
-    // Restore the rounding mode; mask the exception
-    emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
-    emit_opcode(cbuf,0x2D);
-    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
-      ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
-      : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
-
-    // Load the converted int; adjust CPU stack
-    emit_opcode(cbuf,0x58);      // POP EAX
-
-    emit_opcode(cbuf,0x5A);      // POP EDX
-
-    emit_opcode(cbuf,0x81);      // CMP EDX,imm
-    emit_d8    (cbuf,0xFA);      // rdx
-    emit_d32   (cbuf,0x80000000); //         0x80000000
-
-    emit_opcode(cbuf,0x75);      // JNE around_slow_call
-    emit_d8    (cbuf,0x13+4);    // Size of slow_call
-
-    emit_opcode(cbuf,0x85);      // TEST EAX,EAX
-    emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
-
-    emit_opcode(cbuf,0x75);      // JNE around_slow_call
-    emit_d8    (cbuf,0x13);      // Size of slow_call
-
-    // Push src onto stack slow-path
-    // Allocate a word
-    emit_opcode(cbuf,0x83);      // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
-
-    emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);      // ADD ESP,8
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,0x08);
-
-    // CALL directly to the runtime
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf,0xE8);      // Call into runtime
-    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-    // Carry on here...
-  %}
-
-  enc_class D2X_encoding( regX dst, regD src ) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);            // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x04);
-    int pop = 0x02;
-    if ($src$$reg != FPR1L_enc) {
-      emit_opcode( cbuf, 0xD9 );       // FLD    ST(i-1)
-      emit_d8( cbuf, 0xC0-1+$src$$reg );
-      pop = 0x03;
-    }
-    store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S  [ESP]
-
-    emit_opcode  (cbuf, 0xF3 );        // MOVSS dst(xmm), [ESP]
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x10 );
-    encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);            // ADD ESP,4
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,0x04);
-    // Carry on here...
-  %}
-
-  enc_class FX2I_encoding( regX src, eRegI dst ) %{
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
-
-    // Compare the result to see if we need to go to the slow path
-    emit_opcode(cbuf,0x81);       // CMP dst,imm
-    emit_rm    (cbuf,0x3,0x7,$dst$$reg);
-    emit_d32   (cbuf,0x80000000); //         0x80000000
-
-    emit_opcode(cbuf,0x75);       // JNE around_slow_call
-    emit_d8    (cbuf,0x13);       // Size of slow_call
-    // Store xmm to a temp memory
-    // location and push it onto stack.
-
-    emit_opcode(cbuf,0x83);  // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf, $primary ? 0x8 : 0x4);
-
-    emit_opcode  (cbuf, $primary ? 0xF2 : 0xF3 );   // MOVSS [ESP], xmm
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf, $primary ? 0xDD : 0xD9 );      // FLD [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);    // ADD ESP,4
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf, $primary ? 0x8 : 0x4);
-
-    // CALL directly to the runtime
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf,0xE8);       // Call into runtime
-    emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-
-    // Carry on here...
-  %}
-
-  enc_class X2D_encoding( regD dst, regX src ) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);     // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x04);
-
-    emit_opcode  (cbuf, 0xF3 ); // MOVSS [ESP], xmm
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9 );    // FLD_S [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);     // ADD ESP,4
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,0x04);
-
-    // Carry on here...
-  %}
-
-  enc_class AbsXF_encoding(regX dst) %{
-    address signmask_address=(address)float_signmask_pool;
-    // andpd:\tANDPS  $dst,[signconst]
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x54);
-    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
-    emit_d32(cbuf, (int)signmask_address);
-  %}
-
-  enc_class AbsXD_encoding(regXD dst) %{
-    address signmask_address=(address)double_signmask_pool;
-    // andpd:\tANDPD  $dst,[signconst]
-    emit_opcode(cbuf, 0x66);
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x54);
-    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
-    emit_d32(cbuf, (int)signmask_address);
-  %}
-
-  enc_class NegXF_encoding(regX dst) %{
-    address signmask_address=(address)float_signflip_pool;
-    // andpd:\tXORPS  $dst,[signconst]
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x57);
-    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
-    emit_d32(cbuf, (int)signmask_address);
-  %}
-
-  enc_class NegXD_encoding(regXD dst) %{
-    address signmask_address=(address)double_signflip_pool;
-    // andpd:\tXORPD  $dst,[signconst]
-    emit_opcode(cbuf, 0x66);
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x57);
-    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
-    emit_d32(cbuf, (int)signmask_address);
-  %}
-
-  enc_class FMul_ST_reg( eRegF src1 ) %{
+  enc_class FMul_ST_reg( eRegFPR src1 ) %{
     // Operand was loaded from memory into fp ST (stack top)
     // FMUL   ST,$src  /* D8 C8+i */
     emit_opcode(cbuf, 0xD8);
     emit_opcode(cbuf, 0xC8 + $src1$$reg);
   %}
 
-  enc_class FAdd_ST_reg( eRegF src2 ) %{
+  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
     // FADDP  ST,src2  /* D8 C0+i */
     emit_opcode(cbuf, 0xD8);
     emit_opcode(cbuf, 0xC0 + $src2$$reg);
     //could use FADDP  src2,fpST  /* DE C0+i */
   %}
 
-  enc_class FAddP_reg_ST( eRegF src2 ) %{
+  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
     // FADDP  src2,ST  /* DE C0+i */
     emit_opcode(cbuf, 0xDE);
     emit_opcode(cbuf, 0xC0 + $src2$$reg);
   %}
 
-  enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
+  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
     // Operand has been loaded into fp ST (stack top)
       // FSUB   ST,$src1
       emit_opcode(cbuf, 0xD8);
@@ -4140,7 +3681,7 @@
       emit_opcode(cbuf, 0xF0 + $src2$$reg);
   %}
 
-  enc_class MulFAddF (eRegF src1, eRegF src2) %{
+  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
     // Operand was loaded from memory into fp ST (stack top)
     // FADD   ST,$src  /* D8 C0+i */
     emit_opcode(cbuf, 0xD8);
@@ -4152,7 +3693,7 @@
   %}
 
 
-  enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
+  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
     // Operand was loaded from memory into fp ST (stack top)
     // FADD   ST,$src  /* D8 C0+i */
     emit_opcode(cbuf, 0xD8);
@@ -4176,66 +3717,6 @@
     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
   %}
 
-  enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
-    { // Atomic long load
-      // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-      int base     = $mem$$base;
-      int index    = $mem$$index;
-      int scale    = $mem$$scale;
-      int displace = $mem$$disp;
-      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-    { // MOVSD $dst,$tmp ! atomic long store
-      emit_opcode(cbuf,0xF2);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x11);
-      int base     = $dst$$base;
-      int index    = $dst$$index;
-      int scale    = $dst$$scale;
-      int displace = $dst$$disp;
-      bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-  %}
-
-  enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
-    { // Atomic long load
-      // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-      int base     = $mem$$base;
-      int index    = $mem$$index;
-      int scale    = $mem$$scale;
-      int displace = $mem$$disp;
-      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-    { // MOVD $dst.lo,$tmp
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x7E);
-      emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
-    }
-    { // PSRLQ $tmp,32
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x73);
-      emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
-      emit_d8(cbuf, 0x20);
-    }
-    { // MOVD $dst.hi,$tmp
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x7E);
-      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
-    }
-  %}
-
   // Volatile Store Long.  Must be atomic, so move it into
   // the FP TOS and then do a 64-bit FIST.  Has to probe the
   // target address before the store (for null-ptr checks)
@@ -4253,66 +3734,6 @@
     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
   %}
 
-  enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
-    { // Atomic long load
-      // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-      int base     = $src$$base;
-      int index    = $src$$index;
-      int scale    = $src$$scale;
-      int displace = $src$$disp;
-      bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-    cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
-    { // MOVSD $mem,$tmp ! atomic long store
-      emit_opcode(cbuf,0xF2);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x11);
-      int base     = $mem$$base;
-      int index    = $mem$$index;
-      int scale    = $mem$$scale;
-      int displace = $mem$$disp;
-      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-  %}
-
-  enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
-    { // MOVD $tmp,$src.lo
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x6E);
-      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
-    }
-    { // MOVD $tmp2,$src.hi
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x6E);
-      emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
-    }
-    { // PUNPCKLDQ $tmp,$tmp2
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x62);
-      emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
-    }
-    cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
-    { // MOVSD $mem,$tmp ! atomic long store
-      emit_opcode(cbuf,0xF2);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x11);
-      int base     = $mem$$base;
-      int index    = $mem$$index;
-      int scale    = $mem$$scale;
-      int displace = $mem$$disp;
-      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-  %}
-
   // Safepoint Poll.  This polls the safepoint page, and causes an
   // exception if it is not readable. Unfortunately, it kills the condition code
   // in the process
@@ -4705,7 +4126,7 @@
 %}
 
 //Double Immediate zero
-operand immD0() %{
+operand immDPR0() %{
   // Do additional (and counter-intuitive) test against NaN to work around VC++
   // bug that generates code such that NaNs compare equal to 0.0
   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
@@ -4717,7 +4138,7 @@
 %}
 
 // Double Immediate one
-operand immD1() %{
+operand immDPR1() %{
   predicate( UseSSE<=1 && n->getd() == 1.0 );
   match(ConD);
 
@@ -4727,7 +4148,7 @@
 %}
 
 // Double Immediate
-operand immD() %{
+operand immDPR() %{
   predicate(UseSSE<=1);
   match(ConD);
 
@@ -4736,7 +4157,7 @@
   interface(CONST_INTER);
 %}
 
-operand immXD() %{
+operand immD() %{
   predicate(UseSSE>=2);
   match(ConD);
 
@@ -4746,7 +4167,7 @@
 %}
 
 // Double Immediate zero
-operand immXD0() %{
+operand immD0() %{
   // Do additional (and counter-intuitive) test against NaN to work around VC++
   // bug that generates code such that NaNs compare equal to 0.0 AND do not
   // compare equal to -0.0.
@@ -4758,7 +4179,7 @@
 %}
 
 // Float Immediate zero
-operand immF0() %{
+operand immFPR0() %{
   predicate(UseSSE == 0 && n->getf() == 0.0F);
   match(ConF);
 
@@ -4768,7 +4189,7 @@
 %}
 
 // Float Immediate one
-operand immF1() %{
+operand immFPR1() %{
   predicate(UseSSE == 0 && n->getf() == 1.0F);
   match(ConF);
 
@@ -4778,17 +4199,17 @@
 %}
 
 // Float Immediate
+operand immFPR() %{
+  predicate( UseSSE == 0 );
+  match(ConF);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate
 operand immF() %{
-  predicate( UseSSE == 0 );
-  match(ConF);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Float Immediate
-operand immXF() %{
   predicate(UseSSE >= 1);
   match(ConF);
 
@@ -4798,7 +4219,7 @@
 %}
 
 // Float Immediate zero.  Zero and not -0.0
-operand immXF0() %{
+operand immF0() %{
   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
   match(ConF);
 
@@ -5174,7 +4595,7 @@
 %}
 
 // Float register operands
-operand regD() %{
+operand regDPR() %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(dbl_reg));
   match(RegD);
@@ -5184,7 +4605,7 @@
   interface(REG_INTER);
 %}
 
-operand regDPR1(regD reg) %{
+operand regDPR1(regDPR reg) %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(dbl_reg0));
   match(reg);
@@ -5192,7 +4613,7 @@
   interface(REG_INTER);
 %}
 
-operand regDPR2(regD reg) %{
+operand regDPR2(regDPR reg) %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(dbl_reg1));
   match(reg);
@@ -5200,7 +4621,7 @@
   interface(REG_INTER);
 %}
 
-operand regnotDPR1(regD reg) %{
+operand regnotDPR1(regDPR reg) %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(dbl_notreg0));
   match(reg);
@@ -5209,18 +4630,18 @@
 %}
 
 // XMM Double register operands
-operand regXD() %{
+operand regD() %{
   predicate( UseSSE>=2 );
   constraint(ALLOC_IN_RC(xdb_reg));
   match(RegD);
-  match(regXD6);
-  match(regXD7);
+  match(regD6);
+  match(regD7);
   format %{ %}
   interface(REG_INTER);
 %}
 
 // XMM6 double register operands
-operand regXD6(regXD reg) %{
+operand regD6(regD reg) %{
   predicate( UseSSE>=2 );
   constraint(ALLOC_IN_RC(xdb_reg6));
   match(reg);
@@ -5229,7 +4650,7 @@
 %}
 
 // XMM7 double register operands
-operand regXD7(regXD reg) %{
+operand regD7(regD reg) %{
   predicate( UseSSE>=2 );
   constraint(ALLOC_IN_RC(xdb_reg7));
   match(reg);
@@ -5238,7 +4659,7 @@
 %}
 
 // Float register operands
-operand regF() %{
+operand regFPR() %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(flt_reg));
   match(RegF);
@@ -5248,7 +4669,7 @@
 %}
 
 // Float register operands
-operand regFPR1(regF reg) %{
+operand regFPR1(regFPR reg) %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(flt_reg0));
   match(reg);
@@ -5257,7 +4678,7 @@
 %}
 
 // XMM register operands
-operand regX() %{
+operand regF() %{
   predicate( UseSSE>=1 );
   constraint(ALLOC_IN_RC(xmm_reg));
   match(RegF);
@@ -6001,7 +5422,7 @@
 %}
 
 // Conditional move double reg-reg
-pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
+pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -6010,7 +5431,7 @@
 %}
 
 // Float reg-reg operation
-pipe_class fpu_reg(regD dst) %{
+pipe_class fpu_reg(regDPR dst) %{
     instruction_count(2);
     dst    : S3(read);
     DECODE : S0(2);     // any 2 decoders
@@ -6018,7 +5439,7 @@
 %}
 
 // Float reg-reg operation
-pipe_class fpu_reg_reg(regD dst, regD src) %{
+pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
     instruction_count(2);
     dst    : S4(write);
     src    : S3(read);
@@ -6027,7 +5448,7 @@
 %}
 
 // Float reg-reg operation
-pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
+pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
     instruction_count(3);
     dst    : S4(write);
     src1   : S3(read);
@@ -6037,7 +5458,7 @@
 %}
 
 // Float reg-reg operation
-pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
+pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
     instruction_count(4);
     dst    : S4(write);
     src1   : S3(read);
@@ -6048,7 +5469,7 @@
 %}
 
 // Float reg-reg operation
-pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
+pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
     instruction_count(4);
     dst    : S4(write);
     src1   : S3(read);
@@ -6061,7 +5482,7 @@
 %}
 
 // Float reg-mem operation
-pipe_class fpu_reg_mem(regD dst, memory mem) %{
+pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
     instruction_count(2);
     dst    : S5(write);
     mem    : S3(read);
@@ -6072,7 +5493,7 @@
 %}
 
 // Float reg-mem operation
-pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
+pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
     instruction_count(3);
     dst    : S5(write);
     src1   : S3(read);
@@ -6084,7 +5505,7 @@
 %}
 
 // Float mem-reg operation
-pipe_class fpu_mem_reg(memory mem, regD src) %{
+pipe_class fpu_mem_reg(memory mem, regDPR src) %{
     instruction_count(2);
     src    : S5(read);
     mem    : S3(read);
@@ -6094,7 +5515,7 @@
     MEM    : S3;        // any mem
 %}
 
-pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
+pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
     instruction_count(3);
     src1   : S3(read);
     src2   : S3(read);
@@ -6105,7 +5526,7 @@
     MEM    : S3;        // any mem
 %}
 
-pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
+pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
     instruction_count(3);
     src1   : S3(read);
     src2   : S3(read);
@@ -6134,7 +5555,7 @@
     MEM    : S3(3);     // any mem
 %}
 
-pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
+pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
     instruction_count(3);
     src1   : S4(read);
     mem    : S4(read);
@@ -6145,7 +5566,7 @@
 %}
 
 // Float load constant
-pipe_class fpu_reg_con(regD dst) %{
+pipe_class fpu_reg_con(regDPR dst) %{
     instruction_count(2);
     dst    : S5(write);
     D0     : S0;        // big decoder only for the load
@@ -6155,7 +5576,7 @@
 %}
 
 // Float load constant
-pipe_class fpu_reg_reg_con(regD dst, regD src) %{
+pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
     instruction_count(3);
     dst    : S5(write);
     src    : S3(read);
@@ -6870,18 +6291,21 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
+instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
   match(Set dst (LoadL mem));
   effect(TEMP tmp);
   ins_cost(180);
   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
             "MOVSD  $dst,$tmp" %}
-  ins_encode(enc_loadLX_volatile(mem, dst, tmp));
-  ins_pipe( pipe_slow );
-%}
-
-instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
+  ins_encode %{
+    __ movdbl($tmp$$XMMRegister, $mem$$Address);
+    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
   match(Set dst (LoadL mem));
   effect(TEMP tmp);
@@ -6890,7 +6314,12 @@
             "MOVD   $dst.lo,$tmp\n\t"
             "PSRLQ  $tmp,32\n\t"
             "MOVD   $dst.hi,$tmp" %}
-  ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
+  ins_encode %{
+    __ movdbl($tmp$$XMMRegister, $mem$$Address);
+    __ movdl($dst$$Register, $tmp$$XMMRegister);
+    __ psrlq($tmp$$XMMRegister, 32);
+    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6929,7 +6358,7 @@
 %}
 
 // Load Double
-instruct loadD(regD dst, memory mem) %{
+instruct loadDPR(regDPR dst, memory mem) %{
   predicate(UseSSE<=1);
   match(Set dst (LoadD mem));
 
@@ -6938,42 +6367,48 @@
             "FSTP   $dst" %}
   opcode(0xDD);               /* DD /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_D(dst) );
+              Pop_Reg_DPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
 // Load Double to XMM
-instruct loadXD(regXD dst, memory mem) %{
+instruct loadD(regD dst, memory mem) %{
   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
   match(Set dst (LoadD mem));
   ins_cost(145);
   format %{ "MOVSD  $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-instruct loadXD_partial(regXD dst, memory mem) %{
+  ins_encode %{
+    __ movdbl ($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct loadD_partial(regD dst, memory mem) %{
   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
   match(Set dst (LoadD mem));
   ins_cost(145);
   format %{ "MOVLPD $dst,$mem" %}
-  ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
+  ins_encode %{
+    __ movdbl ($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load to XMM register (single-precision floating point)
 // MOVSS instruction
-instruct loadX(regX dst, memory mem) %{
+instruct loadF(regF dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (LoadF mem));
   ins_cost(145);
   format %{ "MOVSS  $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
+  ins_encode %{
+    __ movflt ($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Float
-instruct loadF(regF dst, memory mem) %{
+instruct loadFPR(regFPR dst, memory mem) %{
   predicate(UseSSE==0);
   match(Set dst (LoadF mem));
 
@@ -6982,57 +6417,67 @@
             "FSTP   $dst" %}
   opcode(0xD9);               /* D9 /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_F(dst) );
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
 // Load Aligned Packed Byte to XMM register
-instruct loadA8B(regXD dst, memory mem) %{
+instruct loadA8B(regD dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (Load8B mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Short to XMM register
-instruct loadA4S(regXD dst, memory mem) %{
+instruct loadA4S(regD dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (Load4S mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Char to XMM register
-instruct loadA4C(regXD dst, memory mem) %{
+instruct loadA4C(regD dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (Load4C mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Integer to XMM register
-instruct load2IU(regXD dst, memory mem) %{
+instruct load2IU(regD dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (Load2I mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Single to XMM
-instruct loadA2F(regXD dst, memory mem) %{
+instruct loadA2F(regD dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (Load2F mem));
   ins_cost(145);
   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7139,8 +6584,8 @@
   ins_pipe( ialu_reg_long );
 %}
 
-// The instruction usage is guarded by predicate in operand immF().
-instruct loadConF(regF dst, immF con) %{
+// The instruction usage is guarded by predicate in operand immFPR().
+instruct loadConFPR(regFPR dst, immFPR con) %{
   match(Set dst con);
   ins_cost(125);
   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
@@ -7152,8 +6597,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immF0().
-instruct loadConF0(regF dst, immF0 con) %{
+// The instruction usage is guarded by predicate in operand immFPR0().
+instruct loadConFPR0(regFPR dst, immFPR0 con) %{
   match(Set dst con);
   ins_cost(125);
   format %{ "FLDZ   ST\n\t"
@@ -7165,8 +6610,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immF1().
-instruct loadConF1(regF dst, immF1 con) %{
+// The instruction usage is guarded by predicate in operand immFPR1().
+instruct loadConFPR1(regFPR dst, immFPR1 con) %{
   match(Set dst con);
   ins_cost(125);
   format %{ "FLD1   ST\n\t"
@@ -7178,8 +6623,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immXF().
-instruct loadConX(regX dst, immXF con) %{
+// The instruction usage is guarded by predicate in operand immF().
+instruct loadConF(regF dst, immF con) %{
   match(Set dst con);
   ins_cost(125);
   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
@@ -7189,8 +6634,8 @@
   ins_pipe(pipe_slow);
 %}
 
-// The instruction usage is guarded by predicate in operand immXF0().
-instruct loadConX0(regX dst, immXF0 src) %{
+// The instruction usage is guarded by predicate in operand immF0().
+instruct loadConF0(regF dst, immF0 src) %{
   match(Set dst src);
   ins_cost(100);
   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
@@ -7200,8 +6645,8 @@
   ins_pipe(pipe_slow);
 %}
 
-// The instruction usage is guarded by predicate in operand immD().
-instruct loadConD(regD dst, immD con) %{
+// The instruction usage is guarded by predicate in operand immDPR().
+instruct loadConDPR(regDPR dst, immDPR con) %{
   match(Set dst con);
   ins_cost(125);
 
@@ -7214,8 +6659,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immD0().
-instruct loadConD0(regD dst, immD0 con) %{
+// The instruction usage is guarded by predicate in operand immDPR0().
+instruct loadConDPR0(regDPR dst, immDPR0 con) %{
   match(Set dst con);
   ins_cost(125);
 
@@ -7228,8 +6673,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immD1().
-instruct loadConD1(regD dst, immD1 con) %{
+// The instruction usage is guarded by predicate in operand immDPR1().
+instruct loadConDPR1(regDPR dst, immDPR1 con) %{
   match(Set dst con);
   ins_cost(125);
 
@@ -7242,8 +6687,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immXD().
-instruct loadConXD(regXD dst, immXD con) %{
+// The instruction usage is guarded by predicate in operand immD().
+instruct loadConD(regD dst, immD con) %{
   match(Set dst con);
   ins_cost(125);
   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
@@ -7253,12 +6698,14 @@
   ins_pipe(pipe_slow);
 %}
 
-// The instruction usage is guarded by predicate in operand immXD0().
-instruct loadConXD0(regXD dst, immXD0 src) %{
+// The instruction usage is guarded by predicate in operand immD0().
+instruct loadConD0(regD dst, immD0 src) %{
   match(Set dst src);
   ins_cost(100);
   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
-  ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
+  ins_encode %{
+    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7296,7 +6743,7 @@
 %}
 
 // Load Stack Slot
-instruct loadSSF(regF dst, stackSlotF src) %{
+instruct loadSSF(regFPR dst, stackSlotF src) %{
   match(Set dst src);
   ins_cost(125);
 
@@ -7304,12 +6751,12 @@
             "FSTP   $dst" %}
   opcode(0xD9);               /* D9 /0, FLD m32real */
   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_F(dst) );
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
 // Load Stack Slot
-instruct loadSSD(regD dst, stackSlotD src) %{
+instruct loadSSD(regDPR dst, stackSlotD src) %{
   match(Set dst src);
   ins_cost(125);
 
@@ -7317,7 +6764,7 @@
             "FSTP   $dst" %}
   opcode(0xDD);               /* DD /0, FLD m64real */
   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_D(dst) );
+              Pop_Reg_DPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -7552,7 +6999,7 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
+instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
   match(Set mem (StoreL mem src));
   effect( TEMP tmp, KILL cr );
@@ -7560,12 +7007,15 @@
   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
             "MOVSD  $tmp,$src\n\t"
             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
-  opcode(0x3B);
-  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
-  ins_pipe( pipe_slow );
-%}
-
-instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
+  ins_encode %{
+    __ cmpl(rax, $mem$$Address);
+    __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
+    __ movdbl($mem$$Address, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
   match(Set mem (StoreL mem src));
   effect( TEMP tmp2 , TEMP tmp, KILL cr );
@@ -7575,8 +7025,13 @@
             "MOVD   $tmp2,$src.hi\n\t"
             "PUNPCKLDQ $tmp,$tmp2\n\t"
             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
-  opcode(0x3B);
-  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
+  ins_encode %{
+    __ cmpl(rax, $mem$$Address);
+    __ movdl($tmp$$XMMRegister, $src$$Register);
+    __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ movdbl($mem$$Address, $tmp$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7638,32 +7093,38 @@
 %}
 
 // Store Aligned Packed Byte XMM register to memory
-instruct storeA8B(memory mem, regXD src) %{
+instruct storeA8B(memory mem, regD src) %{
   predicate(UseSSE>=1);
   match(Set mem (Store8B mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed8B" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Store Aligned Packed Char/Short XMM register to memory
-instruct storeA4C(memory mem, regXD src) %{
+instruct storeA4C(memory mem, regD src) %{
   predicate(UseSSE>=1);
   match(Set mem (Store4C mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed4C" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Store Aligned Packed Integer XMM register to memory
-instruct storeA2I(memory mem, regXD src) %{
+instruct storeA2I(memory mem, regD src) %{
   predicate(UseSSE>=1);
   match(Set mem (Store2I mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed2I" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7679,98 +7140,116 @@
 %}
 
 // Store Double
-instruct storeD( memory mem, regDPR1 src) %{
+instruct storeDPR( memory mem, regDPR1 src) %{
   predicate(UseSSE<=1);
   match(Set mem (StoreD mem src));
 
   ins_cost(100);
   format %{ "FST_D  $mem,$src" %}
   opcode(0xDD);       /* DD /2 */
-  ins_encode( enc_FP_store(mem,src) );
+  ins_encode( enc_FPR_store(mem,src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Store double does rounding on x86
-instruct storeD_rounded( memory mem, regDPR1 src) %{
+instruct storeDPR_rounded( memory mem, regDPR1 src) %{
   predicate(UseSSE<=1);
   match(Set mem (StoreD mem (RoundDouble src)));
 
   ins_cost(100);
   format %{ "FST_D  $mem,$src\t# round" %}
   opcode(0xDD);       /* DD /2 */
-  ins_encode( enc_FP_store(mem,src) );
+  ins_encode( enc_FPR_store(mem,src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Store XMM register to memory (double-precision floating points)
 // MOVSD instruction
-instruct storeXD(memory mem, regXD src) %{
+instruct storeD(memory mem, regD src) %{
   predicate(UseSSE>=2);
   match(Set mem (StoreD mem src));
   ins_cost(95);
   format %{ "MOVSD  $mem,$src" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
+  ins_encode %{
+    __ movdbl($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Store XMM register to memory (single-precision floating point)
 // MOVSS instruction
-instruct storeX(memory mem, regX src) %{
+instruct storeF(memory mem, regF src) %{
   predicate(UseSSE>=1);
   match(Set mem (StoreF mem src));
   ins_cost(95);
   format %{ "MOVSS  $mem,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
+  ins_encode %{
+    __ movflt($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Store Aligned Packed Single Float XMM register to memory
-instruct storeA2F(memory mem, regXD src) %{
+instruct storeA2F(memory mem, regD src) %{
   predicate(UseSSE>=1);
   match(Set mem (Store2F mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed2F" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Store Float
-instruct storeF( memory mem, regFPR1 src) %{
+instruct storeFPR( memory mem, regFPR1 src) %{
   predicate(UseSSE==0);
   match(Set mem (StoreF mem src));
 
   ins_cost(100);
   format %{ "FST_S  $mem,$src" %}
   opcode(0xD9);       /* D9 /2 */
-  ins_encode( enc_FP_store(mem,src) );
+  ins_encode( enc_FPR_store(mem,src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Store Float does rounding on x86
-instruct storeF_rounded( memory mem, regFPR1 src) %{
+instruct storeFPR_rounded( memory mem, regFPR1 src) %{
   predicate(UseSSE==0);
   match(Set mem (StoreF mem (RoundFloat src)));
 
   ins_cost(100);
   format %{ "FST_S  $mem,$src\t# round" %}
   opcode(0xD9);       /* D9 /2 */
-  ins_encode( enc_FP_store(mem,src) );
+  ins_encode( enc_FPR_store(mem,src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Store Float does rounding on x86
-instruct storeF_Drounded( memory mem, regDPR1 src) %{
+instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
   predicate(UseSSE<=1);
   match(Set mem (StoreF mem (ConvD2F src)));
 
   ins_cost(100);
   format %{ "FST_S  $mem,$src\t# D-round" %}
   opcode(0xD9);       /* D9 /2 */
-  ins_encode( enc_FP_store(mem,src) );
+  ins_encode( enc_FPR_store(mem,src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Store immediate Float value (it is faster than store from FPU register)
+// The instruction usage is guarded by predicate in operand immFPR().
+instruct storeFPR_imm( memory mem, immFPR src) %{
+  match(Set mem (StoreF mem src));
+
+  ins_cost(50);
+  format %{ "MOV    $mem,$src\t# store float" %}
+  opcode(0xC7);               /* C7 /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Store immediate Float value (it is faster than store from XMM register)
 // The instruction usage is guarded by predicate in operand immF().
 instruct storeF_imm( memory mem, immF src) %{
   match(Set mem (StoreF mem src));
@@ -7782,18 +7261,6 @@
   ins_pipe( ialu_mem_imm );
 %}
 
-// Store immediate Float value (it is faster than store from XMM register)
-// The instruction usage is guarded by predicate in operand immXF().
-instruct storeX_imm( memory mem, immXF src) %{
-  match(Set mem (StoreF mem src));
-
-  ins_cost(50);
-  format %{ "MOV    $mem,$src\t# store float" %}
-  opcode(0xC7);               /* C7 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32XF_as_bits( src ));
-  ins_pipe( ialu_mem_imm );
-%}
-
 // Store Integer to stack slot
 instruct storeSSI(stackSlotI dst, eRegI src) %{
   match(Set dst src);
@@ -7901,6 +7368,16 @@
   ins_pipe(empty);
 %}
 
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-storestore (empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
 //----------Move Instructions--------------------------------------------------
 instruct castX2P(eAXRegP dst, eAXRegI src) %{
   match(Set dst (CastX2P src));
@@ -8088,29 +7565,29 @@
 //%}
 
 // Conditional move
-instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
+instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
   ins_cost(200);
   format %{ "FCMOV$cop $dst,$src\t# double" %}
   opcode(0xDA);
-  ins_encode( enc_cmov_d(cop,src) );
-  ins_pipe( pipe_cmovD_reg );
+  ins_encode( enc_cmov_dpr(cop,src) );
+  ins_pipe( pipe_cmovDPR_reg );
 %}
 
 // Conditional move
-instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
+instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
   predicate(UseSSE==0);
   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
   ins_cost(200);
   format %{ "FCMOV$cop $dst,$src\t# float" %}
   opcode(0xDA);
-  ins_encode( enc_cmov_d(cop,src) );
-  ins_pipe( pipe_cmovD_reg );
+  ins_encode( enc_cmov_dpr(cop,src) );
+  ins_pipe( pipe_cmovDPR_reg );
 %}
 
 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
-instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
+instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8118,12 +7595,12 @@
             "MOV    $dst,$src\t# double\n"
       "skip:" %}
   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
-  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
-  ins_pipe( pipe_cmovD_reg );
+  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
+  ins_pipe( pipe_cmovDPR_reg );
 %}
 
 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
-instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
+instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
   predicate(UseSSE==0);
   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8131,12 +7608,12 @@
             "MOV    $dst,$src\t# float\n"
       "skip:" %}
   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
-  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
-  ins_pipe( pipe_cmovD_reg );
+  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
+  ins_pipe( pipe_cmovDPR_reg );
 %}
 
 // No CMOVE with SSE/SSE2
-instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
+instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
   predicate (UseSSE>=1);
   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8154,7 +7631,7 @@
 %}
 
 // No CMOVE with SSE/SSE2
-instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
+instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
   predicate (UseSSE>=2);
   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8172,7 +7649,7 @@
 %}
 
 // unsigned version
-instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
+instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
   predicate (UseSSE>=1);
   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8189,17 +7666,17 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
+instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
   predicate (UseSSE>=1);
   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovX_regU(cop, cr, dst, src);
+    fcmovF_regU(cop, cr, dst, src);
   %}
 %}
 
 // unsigned version
-instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
+instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
   predicate (UseSSE>=2);
   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8216,12 +7693,12 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
+instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
   predicate (UseSSE>=2);
   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovXD_regU(cop, cr, dst, src);
+    fcmovD_regU(cop, cr, dst, src);
   %}
 %}
 
@@ -8440,7 +7917,7 @@
 %}
 
 // LoadLong-locked - same as a volatile long load when used with compare-swap
-instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
+instruct loadLLocked(stackSlotL dst, memory mem) %{
   predicate(UseSSE<=1);
   match(Set dst (LoadLLocked mem));
 
@@ -8451,18 +7928,21 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
+instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
   predicate(UseSSE>=2);
   match(Set dst (LoadLLocked mem));
   effect(TEMP tmp);
   ins_cost(180);
   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
             "MOVSD  $dst,$tmp" %}
-  ins_encode(enc_loadLX_volatile(mem, dst, tmp));
-  ins_pipe( pipe_slow );
-%}
-
-instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
+  ins_encode %{
+    __ movdbl($tmp$$XMMRegister, $mem$$Address);
+    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
   predicate(UseSSE>=2);
   match(Set dst (LoadLLocked mem));
   effect(TEMP tmp);
@@ -8471,7 +7951,12 @@
             "MOVD   $dst.lo,$tmp\n\t"
             "PSRLQ  $tmp,32\n\t"
             "MOVD   $dst.hi,$tmp" %}
-  ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
+  ins_encode %{
+    __ movdbl($tmp$$XMMRegister, $mem$$Address);
+    __ movdl($dst$$Register, $tmp$$XMMRegister);
+    __ psrlq($tmp$$XMMRegister, 32);
+    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -10054,7 +9539,7 @@
 // Compare & branch
 
 // P6 version of float compare, sets condition codes in EFLAGS
-instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
+instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
   predicate(VM_Version::supports_cmov() && UseSSE <=1);
   match(Set cr (CmpD src1 src2));
   effect(KILL rax);
@@ -10066,26 +9551,26 @@
             "SAHF\n"
      "exit:\tNOP               // avoid branch to branch" %}
   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               cmpF_P6_fixup );
   ins_pipe( pipe_slow );
 %}
 
-instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
+instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
   predicate(VM_Version::supports_cmov() && UseSSE <=1);
   match(Set cr (CmpD src1 src2));
   ins_cost(150);
   format %{ "FLD    $src1\n\t"
             "FUCOMIP ST,$src2  // P6 instruction" %}
   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2));
   ins_pipe( pipe_slow );
 %}
 
 // Compare & branch
-instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
+instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
   predicate(UseSSE<=1);
   match(Set cr (CmpD src1 src2));
   effect(KILL rax);
@@ -10098,138 +9583,140 @@
             "MOV    AH,1\t# unordered treat as LT\n"
     "flags:\tSAHF" %}
   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               fpu_flags);
   ins_pipe( pipe_slow );
 %}
 
 // Compare vs zero into -1,0,1
-instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE<=1);
   match(Set dst (CmpD3 src1 zero));
   effect(KILL cr, KILL rax);
   ins_cost(280);
   format %{ "FTSTD  $dst,$src1" %}
   opcode(0xE4, 0xD9);
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcS, OpcP, PopFPU,
               CmpF_Result(dst));
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1
-instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE<=1);
   match(Set dst (CmpD3 src1 src2));
   effect(KILL cr, KILL rax);
   ins_cost(300);
   format %{ "FCMPD  $dst,$src1,$src2" %}
   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               CmpF_Result(dst));
   ins_pipe( pipe_slow );
 %}
 
 // float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
+instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
   predicate(UseSSE>=2);
-  match(Set cr (CmpD dst src));
-  effect(KILL rax);
-  ins_cost(125);
-  format %{ "COMISD $dst,$src\n"
-          "\tJNP    exit\n"
-          "\tMOV    ah,1       // saw a NaN, set CF\n"
-          "\tSAHF\n"
-     "exit:\tNOP               // avoid branch to branch" %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
+  match(Set cr (CmpD src1 src2));
+  ins_cost(145);
+  format %{ "UCOMISD $src1,$src2\n\t"
+            "JNP,s   exit\n\t"
+            "PUSHF\t# saw NaN, set CF\n\t"
+            "AND     [rsp], #0xffffff2b\n\t"
+            "POPF\n"
+    "exit:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp_fixup(_masm);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
   predicate(UseSSE>=2);
-  match(Set cr (CmpD dst src));
+  match(Set cr (CmpD src1 src2));
   ins_cost(100);
-  format %{ "COMISD $dst,$src" %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+  format %{ "UCOMISD $src1,$src2" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
+instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
   predicate(UseSSE>=2);
-  match(Set cr (CmpD dst (LoadD src)));
-  effect(KILL rax);
+  match(Set cr (CmpD src1 (LoadD src2)));
   ins_cost(145);
-  format %{ "COMISD $dst,$src\n"
-          "\tJNP    exit\n"
-          "\tMOV    ah,1       // saw a NaN, set CF\n"
-          "\tSAHF\n"
-     "exit:\tNOP               // avoid branch to branch" %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
+  format %{ "UCOMISD $src1,$src2\n\t"
+            "JNP,s   exit\n\t"
+            "PUSHF\t# saw NaN, set CF\n\t"
+            "AND     [rsp], #0xffffff2b\n\t"
+            "POPF\n"
+    "exit:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp_fixup(_masm);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
   predicate(UseSSE>=2);
-  match(Set cr (CmpD dst (LoadD src)));
+  match(Set cr (CmpD src1 (LoadD src2)));
   ins_cost(100);
-  format %{ "COMISD $dst,$src" %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
+  format %{ "UCOMISD $src1,$src2" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1 in XMM
-instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
+instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
   predicate(UseSSE>=2);
   match(Set dst (CmpD3 src1 src2));
   effect(KILL cr);
   ins_cost(255);
-  format %{ "XOR    $dst,$dst\n"
-          "\tCOMISD $src1,$src2\n"
-          "\tJP,s   nan\n"
-          "\tJEQ,s  exit\n"
-          "\tJA,s   inc\n"
-      "nan:\tDEC    $dst\n"
-          "\tJMP,s  exit\n"
-      "inc:\tINC    $dst\n"
-      "exit:"
-                %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
-             CmpX_Result(dst));
+  format %{ "UCOMISD $src1, $src2\n\t"
+            "MOV     $dst, #-1\n\t"
+            "JP,s    done\n\t"
+            "JB,s    done\n\t"
+            "SETNE   $dst\n\t"
+            "MOVZB   $dst, $dst\n"
+    "done:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1 in XMM and memory
-instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
+instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
   predicate(UseSSE>=2);
-  match(Set dst (CmpD3 src1 (LoadD mem)));
+  match(Set dst (CmpD3 src1 (LoadD src2)));
   effect(KILL cr);
   ins_cost(275);
-  format %{ "COMISD $src1,$mem\n"
-          "\tMOV    $dst,0\t\t# do not blow flags\n"
-          "\tJP,s   nan\n"
-          "\tJEQ,s  exit\n"
-          "\tJA,s   inc\n"
-      "nan:\tDEC    $dst\n"
-          "\tJMP,s  exit\n"
-      "inc:\tINC    $dst\n"
-      "exit:"
-                %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
-             LdImmI(dst,0x0), CmpX_Result(dst));
-  ins_pipe( pipe_slow );
-%}
-
-
-instruct subD_reg(regD dst, regD src) %{
+  format %{ "UCOMISD $src1, $src2\n\t"
+            "MOV     $dst, #-1\n\t"
+            "JP,s    done\n\t"
+            "JB,s    done\n\t"
+            "SETNE   $dst\n\t"
+            "MOVZB   $dst, $dst\n"
+    "done:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct subDPR_reg(regDPR dst, regDPR src) %{
   predicate (UseSSE <=1);
   match(Set dst (SubD dst src));
 
@@ -10237,12 +9724,12 @@
             "DSUBp  $dst,ST" %}
   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
   ins_cost(150);
-  ins_encode( Push_Reg_D(src),
+  ins_encode( Push_Reg_DPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
   predicate (UseSSE <=1);
   match(Set dst (RoundDouble (SubD src1 src2)));
   ins_cost(250);
@@ -10251,13 +9738,13 @@
             "DSUB   ST,$src1\n\t"
             "FSTP_D $dst\t# D-round" %}
   opcode(0xD8, 0x5);
-  ins_encode( Push_Reg_D(src2),
-              OpcP, RegOpc(src1), Pop_Mem_D(dst) );
+  ins_encode( Push_Reg_DPR(src2),
+              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 
 
-instruct subD_reg_mem(regD dst, memory src) %{
+instruct subDPR_reg_mem(regDPR dst, memory src) %{
   predicate (UseSSE <=1);
   match(Set dst (SubD dst (LoadD src)));
   ins_cost(150);
@@ -10270,7 +9757,7 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct absD_reg(regDPR1 dst, regDPR1 src) %{
+instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   match(Set dst (AbsD src));
   ins_cost(100);
@@ -10280,15 +9767,7 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct absXD_reg( regXD dst ) %{
-  predicate(UseSSE>=2);
-  match(Set dst (AbsD dst));
-  format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
-  ins_encode( AbsXD_encoding(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct negD_reg(regDPR1 dst, regDPR1 src) %{
+instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate(UseSSE<=1);
   match(Set dst (NegD src));
   ins_cost(100);
@@ -10298,18 +9777,7 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct negXD_reg( regXD dst ) %{
-  predicate(UseSSE>=2);
-  match(Set dst (NegD dst));
-  format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
-  ins_encode %{
-     __ xorpd($dst$$XMMRegister,
-              ExternalAddress((address)double_signflip_pool));
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct addD_reg(regD dst, regD src) %{
+instruct addDPR_reg(regDPR dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (AddD dst src));
   format %{ "FLD    $src\n\t"
@@ -10317,13 +9785,13 @@
   size(4);
   ins_cost(150);
   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
-  ins_encode( Push_Reg_D(src),
+  ins_encode( Push_Reg_DPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
 
-instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
   predicate(UseSSE<=1);
   match(Set dst (RoundDouble (AddD src1 src2)));
   ins_cost(250);
@@ -10332,13 +9800,13 @@
             "DADD   ST,$src1\n\t"
             "FSTP_D $dst\t# D-round" %}
   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
-  ins_encode( Push_Reg_D(src2),
-              OpcP, RegOpc(src1), Pop_Mem_D(dst) );
+  ins_encode( Push_Reg_DPR(src2),
+              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 
 
-instruct addD_reg_mem(regD dst, memory src) %{
+instruct addDPR_reg_mem(regDPR dst, memory src) %{
   predicate(UseSSE<=1);
   match(Set dst (AddD dst (LoadD src)));
   ins_cost(150);
@@ -10352,7 +9820,7 @@
 %}
 
 // add-to-memory
-instruct addD_mem_reg(memory dst, regD src) %{
+instruct addDPR_mem_reg(memory dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
   ins_cost(150);
@@ -10368,7 +9836,7 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct addD_reg_imm1(regD dst, immD1 con) %{
+instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
   predicate(UseSSE<=1);
   match(Set dst (AddD dst con));
   ins_cost(125);
@@ -10381,7 +9849,7 @@
   ins_pipe(fpu_reg);
 %}
 
-instruct addD_reg_imm(regD dst, immD con) %{
+instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
   match(Set dst (AddD dst con));
   ins_cost(200);
@@ -10394,7 +9862,7 @@
   ins_pipe(fpu_reg_mem);
 %}
 
-instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
+instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
   match(Set dst (RoundDouble (AddD src con)));
   ins_cost(200);
@@ -10409,124 +9877,14 @@
   ins_pipe(fpu_mem_reg_con);
 %}
 
-// Add two double precision floating point values in xmm
-instruct addXD_reg(regXD dst, regXD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (AddD dst src));
-  format %{ "ADDSD  $dst,$src" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct addXD_imm(regXD dst, immXD con) %{
-  predicate(UseSSE>=2);
-  match(Set dst (AddD dst con));
-  format %{ "ADDSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
-  ins_encode %{
-    __ addsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct addXD_mem(regXD dst, memory mem) %{
-  predicate(UseSSE>=2);
-  match(Set dst (AddD dst (LoadD mem)));
-  format %{ "ADDSD  $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Sub two double precision floating point values in xmm
-instruct subXD_reg(regXD dst, regXD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (SubD dst src));
-  format %{ "SUBSD  $dst,$src" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct subXD_imm(regXD dst, immXD con) %{
-  predicate(UseSSE>=2);
-  match(Set dst (SubD dst con));
-  format %{ "SUBSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
-  ins_encode %{
-    __ subsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct subXD_mem(regXD dst, memory mem) %{
-  predicate(UseSSE>=2);
-  match(Set dst (SubD dst (LoadD mem)));
-  format %{ "SUBSD  $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Mul two double precision floating point values in xmm
-instruct mulXD_reg(regXD dst, regXD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MulD dst src));
-  format %{ "MULSD  $dst,$src" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct mulXD_imm(regXD dst, immXD con) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MulD dst con));
-  format %{ "MULSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
-  ins_encode %{
-    __ mulsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulXD_mem(regXD dst, memory mem) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MulD dst (LoadD mem)));
-  format %{ "MULSD  $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Div two double precision floating point values in xmm
-instruct divXD_reg(regXD dst, regXD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (DivD dst src));
-  format %{ "DIVSD  $dst,$src" %}
-  opcode(0xF2, 0x0F, 0x5E);
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct divXD_imm(regXD dst, immXD con) %{
-  predicate(UseSSE>=2);
-  match(Set dst (DivD dst con));
-  format %{ "DIVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
-  ins_encode %{
-    __ divsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct divXD_mem(regXD dst, memory mem) %{
-  predicate(UseSSE>=2);
-  match(Set dst (DivD dst (LoadD mem)));
-  format %{ "DIVSD  $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-
-instruct mulD_reg(regD dst, regD src) %{
+instruct mulDPR_reg(regDPR dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (MulD dst src));
   format %{ "FLD    $src\n\t"
             "DMULp  $dst,ST" %}
   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
   ins_cost(150);
-  ins_encode( Push_Reg_D(src),
+  ins_encode( Push_Reg_DPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
@@ -10539,7 +9897,7 @@
 // multiply scaled arg1 by arg2
 // rescale product by 2^(15360)
 //
-instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
+instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
   match(Set dst (MulD dst src));
   ins_cost(1);   // Select this instruction for all strict FP double multiplies
@@ -10552,13 +9910,13 @@
             "DMULp  $dst,ST\n\t" %}
   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
   ins_encode( strictfp_bias1(dst),
-              Push_Reg_D(src),
+              Push_Reg_DPR(src),
               OpcP, RegOpc(dst),
               strictfp_bias2(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct mulD_reg_imm(regD dst, immD con) %{
+instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
   match(Set dst (MulD dst con));
   ins_cost(200);
@@ -10572,7 +9930,7 @@
 %}
 
 
-instruct mulD_reg_mem(regD dst, memory src) %{
+instruct mulDPR_reg_mem(regDPR dst, memory src) %{
   predicate( UseSSE<=1 );
   match(Set dst (MulD dst (LoadD src)));
   ins_cost(200);
@@ -10586,7 +9944,7 @@
 
 //
 // Cisc-alternate to reg-reg multiply
-instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
+instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
   predicate( UseSSE<=1 );
   match(Set dst (MulD src (LoadD mem)));
   ins_cost(250);
@@ -10595,17 +9953,17 @@
             "FSTP_D $dst" %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
-              OpcReg_F(src),
-              Pop_Reg_D(dst) );
+              OpcReg_FPR(src),
+              Pop_Reg_DPR(dst) );
   ins_pipe( fpu_reg_reg_mem );
 %}
 
 
-// MACRO3 -- addD a mulD
+// MACRO3 -- addDPR a mulDPR
 // This instruction is a '2-address' instruction in that the result goes
 // back to src2.  This eliminates a move from the macro; possibly the
 // register allocator will have to add it back (and maybe not).
-instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
+instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
   predicate( UseSSE<=1 );
   match(Set src2 (AddD (MulD src0 src1) src2));
   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
@@ -10613,29 +9971,29 @@
             "DADDp  $src2,ST" %}
   ins_cost(250);
   opcode(0xDD); /* LoadD DD /0 */
-  ins_encode( Push_Reg_F(src0),
+  ins_encode( Push_Reg_FPR(src0),
               FMul_ST_reg(src1),
               FAddP_reg_ST(src2) );
   ins_pipe( fpu_reg_reg_reg );
 %}
 
 
-// MACRO3 -- subD a mulD
-instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
+// MACRO3 -- subDPR a mulDPR
+instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
   predicate( UseSSE<=1 );
   match(Set src2 (SubD (MulD src0 src1) src2));
   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
             "DMUL   ST,$src1\n\t"
             "DSUBRp $src2,ST" %}
   ins_cost(250);
-  ins_encode( Push_Reg_F(src0),
+  ins_encode( Push_Reg_FPR(src0),
               FMul_ST_reg(src1),
               Opcode(0xDE), Opc_plus(0xE0,src2));
   ins_pipe( fpu_reg_reg_reg );
 %}
 
 
-instruct divD_reg(regD dst, regD src) %{
+instruct divDPR_reg(regDPR dst, regDPR src) %{
   predicate( UseSSE<=1 );
   match(Set dst (DivD dst src));
 
@@ -10643,7 +10001,7 @@
             "FDIVp  $dst,ST" %}
   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
   ins_cost(150);
-  ins_encode( Push_Reg_D(src),
+  ins_encode( Push_Reg_DPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
@@ -10656,7 +10014,7 @@
 // divide scaled dividend by divisor
 // rescale quotient by 2^(15360)
 //
-instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
+instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
   predicate (UseSSE<=1);
   match(Set dst (DivD dst src));
   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
@@ -10670,13 +10028,13 @@
             "DMULp  $dst,ST\n\t" %}
   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
   ins_encode( strictfp_bias1(dst),
-              Push_Reg_D(src),
+              Push_Reg_DPR(src),
               OpcP, RegOpc(dst),
               strictfp_bias2(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
   match(Set dst (RoundDouble (DivD src1 src2)));
 
@@ -10684,27 +10042,27 @@
             "FDIV   ST,$src2\n\t"
             "FSTP_D $dst\t# D-round" %}
   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
-  ins_encode( Push_Reg_D(src1),
-              OpcP, RegOpc(src2), Pop_Mem_D(dst) );
+  ins_encode( Push_Reg_DPR(src1),
+              OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 
 
-instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
+instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE<=1);
   match(Set dst (ModD dst src));
-  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 
   format %{ "DMOD   $dst,$src" %}
   ins_cost(250);
-  ins_encode(Push_Reg_Mod_D(dst, src),
-              emitModD(),
-              Push_Result_Mod_D(src),
-              Pop_Reg_D(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
+  ins_encode(Push_Reg_Mod_DPR(dst, src),
+              emitModDPR(),
+              Push_Result_Mod_DPR(src),
+              Pop_Reg_DPR(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE>=2);
   match(Set dst (ModD src0 src1));
   effect(KILL rax, KILL cr);
@@ -10725,11 +10083,11 @@
           "\tFSTP   ST0\t # Restore FPU Stack"
     %}
   ins_cost(250);
-  ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
-  ins_pipe( pipe_slow );
-%}
-
-instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
+  ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
+  ins_pipe( pipe_slow );
+%}
+
+instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   match(Set dst (SinD src));
   ins_cost(1800);
@@ -10739,18 +10097,18 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
+instruct sinD_reg(regD dst, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst (SinD dst));
-  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
   ins_cost(1800);
   format %{ "DSIN   $dst" %}
   opcode(0xD9, 0xFE);
-  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
+  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   match(Set dst (CosD src));
   ins_cost(1800);
@@ -10760,18 +10118,18 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
+instruct cosD_reg(regD dst, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst (CosD dst));
-  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
   ins_cost(1800);
   format %{ "DCOS   $dst" %}
   opcode(0xD9, 0xFF);
-  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
+  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   match(Set dst(TanD src));
   format %{ "DTAN   $dst" %}
@@ -10780,50 +10138,50 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
+instruct tanD_reg(regD dst, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst(TanD dst));
-  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
   format %{ "DTAN   $dst" %}
-  ins_encode( Push_SrcXD(dst),
+  ins_encode( Push_SrcD(dst),
               Opcode(0xD9), Opcode(0xF2),    // fptan
               Opcode(0xDD), Opcode(0xD8),   // fstp st
-              Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct atanD_reg(regD dst, regD src) %{
+              Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct atanDPR_reg(regDPR dst, regDPR src) %{
   predicate (UseSSE<=1);
   match(Set dst(AtanD dst src));
   format %{ "DATA   $dst,$src" %}
   opcode(0xD9, 0xF3);
-  ins_encode( Push_Reg_D(src),
+  ins_encode( Push_Reg_DPR(src),
               OpcP, OpcS, RegOpc(dst) );
   ins_pipe( pipe_slow );
 %}
 
-instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst(AtanD dst src));
-  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
   format %{ "DATA   $dst,$src" %}
   opcode(0xD9, 0xF3);
-  ins_encode( Push_SrcXD(src),
-              OpcP, OpcS, Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct sqrtD_reg(regD dst, regD src) %{
+  ins_encode( Push_SrcD(src),
+              OpcP, OpcS, Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
   predicate (UseSSE<=1);
   match(Set dst (SqrtD src));
   format %{ "DSQRT  $dst,$src" %}
   opcode(0xFA, 0xD9);
-  ins_encode( Push_Reg_D(src),
-              OpcS, OpcP, Pop_Reg_D(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+  ins_encode( Push_Reg_DPR(src),
+              OpcS, OpcP, Pop_Reg_DPR(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
   predicate (UseSSE<=1);
   match(Set Y (PowD X Y));  // Raise X to the Yth power
   effect(KILL rax, KILL rbx, KILL rcx);
@@ -10852,14 +10210,14 @@
             "ADD    ESP,8"
              %}
   ins_encode( push_stack_temp_qword,
-              Push_Reg_D(X),
+              Push_Reg_DPR(X),
               Opcode(0xD9), Opcode(0xF1),   // fyl2x
               pow_exp_core_encoding,
               pop_stack_temp_qword);
   ins_pipe( pipe_slow );
 %}
 
-instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
+instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
   predicate (UseSSE>=2);
   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
@@ -10897,12 +10255,12 @@
               push_xmm_to_fpr1(src0),
               Opcode(0xD9), Opcode(0xF1),   // fyl2x
               pow_exp_core_encoding,
-              Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-
-instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+              Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
   predicate (UseSSE<=1);
   match(Set dpr1 (ExpD dpr1));
   effect(KILL rax, KILL rbx, KILL rcx);
@@ -10938,7 +10296,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
   predicate (UseSSE>=2);
   match(Set dst (ExpD src));
   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
@@ -10969,17 +10327,17 @@
             "MOVSD  $dst,[ESP]\n\t"
             "ADD    ESP,8"
              %}
-  ins_encode( Push_SrcXD(src),
+  ins_encode( Push_SrcD(src),
               Opcode(0xD9), Opcode(0xEA),   // fldl2e
               Opcode(0xDE), Opcode(0xC9),   // fmulp
               pow_exp_core_encoding,
-              Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-
-
-instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
+              Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+
+
+instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   // The source Double operand on FPU stack
   match(Set dst (Log10D src));
@@ -10997,7 +10355,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   effect(KILL cr);
   match(Set dst (Log10D src));
@@ -11007,14 +10365,14 @@
             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
          %}
   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
-              Push_SrcXD(src),
+              Push_SrcD(src),
               Opcode(0xD9), Opcode(0xF1),   // fyl2x
-              Push_ResultXD(dst));
-
-  ins_pipe( pipe_slow );
-%}
-
-instruct logD_reg(regDPR1 dst, regDPR1 src) %{
+              Push_ResultD(dst));
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   // The source Double operand on FPU stack
   match(Set dst (LogD src));
@@ -11032,7 +10390,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   effect(KILL cr);
   // The source and result Double operands in XMM registers
@@ -11043,9 +10401,9 @@
             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
          %}
   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
-              Push_SrcXD(src),
+              Push_SrcD(src),
               Opcode(0xD9), Opcode(0xF1),   // fyl2x
-              Push_ResultXD(dst));
+              Push_ResultD(dst));
   ins_pipe( pipe_slow );
 %}
 
@@ -11066,7 +10424,7 @@
 //   exit:
 
 // P6 version of float compare, sets condition codes in EFLAGS
-instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
+instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
   predicate(VM_Version::supports_cmov() && UseSSE == 0);
   match(Set cr (CmpF src1 src2));
   effect(KILL rax);
@@ -11078,27 +10436,27 @@
             "SAHF\n"
      "exit:\tNOP               // avoid branch to branch" %}
   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               cmpF_P6_fixup );
   ins_pipe( pipe_slow );
 %}
 
-instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
+instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
   predicate(VM_Version::supports_cmov() && UseSSE == 0);
   match(Set cr (CmpF src1 src2));
   ins_cost(100);
   format %{ "FLD    $src1\n\t"
             "FUCOMIP ST,$src2  // P6 instruction" %}
   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2));
   ins_pipe( pipe_slow );
 %}
 
 
 // Compare & branch
-instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
+instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
   predicate(UseSSE == 0);
   match(Set cr (CmpF src1 src2));
   effect(KILL rax);
@@ -11111,328 +10469,190 @@
             "MOV    AH,1\t# unordered treat as LT\n"
     "flags:\tSAHF" %}
   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               fpu_flags);
   ins_pipe( pipe_slow );
 %}
 
 // Compare vs zero into -1,0,1
-instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE == 0);
   match(Set dst (CmpF3 src1 zero));
   effect(KILL cr, KILL rax);
   ins_cost(280);
   format %{ "FTSTF  $dst,$src1" %}
   opcode(0xE4, 0xD9);
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcS, OpcP, PopFPU,
               CmpF_Result(dst));
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1
-instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE == 0);
   match(Set dst (CmpF3 src1 src2));
   effect(KILL cr, KILL rax);
   ins_cost(300);
   format %{ "FCMPF  $dst,$src1,$src2" %}
   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               CmpF_Result(dst));
   ins_pipe( pipe_slow );
 %}
 
 // float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
+instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
   predicate(UseSSE>=1);
-  match(Set cr (CmpF dst src));
-  effect(KILL rax);
+  match(Set cr (CmpF src1 src2));
   ins_cost(145);
-  format %{ "COMISS $dst,$src\n"
-          "\tJNP    exit\n"
-          "\tMOV    ah,1       // saw a NaN, set CF\n"
-          "\tSAHF\n"
-     "exit:\tNOP               // avoid branch to branch" %}
-  opcode(0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
+  format %{ "UCOMISS $src1,$src2\n\t"
+            "JNP,s   exit\n\t"
+            "PUSHF\t# saw NaN, set CF\n\t"
+            "AND     [rsp], #0xffffff2b\n\t"
+            "POPF\n"
+    "exit:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp_fixup(_masm);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
   predicate(UseSSE>=1);
-  match(Set cr (CmpF dst src));
+  match(Set cr (CmpF src1 src2));
   ins_cost(100);
-  format %{ "COMISS $dst,$src" %}
-  opcode(0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, RegReg(dst, src));
+  format %{ "UCOMISS $src1,$src2" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
+instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
   predicate(UseSSE>=1);
-  match(Set cr (CmpF dst (LoadF src)));
-  effect(KILL rax);
+  match(Set cr (CmpF src1 (LoadF src2)));
   ins_cost(165);
-  format %{ "COMISS $dst,$src\n"
-          "\tJNP    exit\n"
-          "\tMOV    ah,1       // saw a NaN, set CF\n"
-          "\tSAHF\n"
-     "exit:\tNOP               // avoid branch to branch" %}
-  opcode(0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
+  format %{ "UCOMISS $src1,$src2\n\t"
+            "JNP,s   exit\n\t"
+            "PUSHF\t# saw NaN, set CF\n\t"
+            "AND     [rsp], #0xffffff2b\n\t"
+            "POPF\n"
+    "exit:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp_fixup(_masm);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
   predicate(UseSSE>=1);
-  match(Set cr (CmpF dst (LoadF src)));
+  match(Set cr (CmpF src1 (LoadF src2)));
   ins_cost(100);
-  format %{ "COMISS $dst,$src" %}
-  opcode(0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, RegMem(dst, src));
+  format %{ "UCOMISS $src1,$src2" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1 in XMM
-instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
+instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
   predicate(UseSSE>=1);
   match(Set dst (CmpF3 src1 src2));
   effect(KILL cr);
   ins_cost(255);
-  format %{ "XOR    $dst,$dst\n"
-          "\tCOMISS $src1,$src2\n"
-          "\tJP,s   nan\n"
-          "\tJEQ,s  exit\n"
-          "\tJA,s   inc\n"
-      "nan:\tDEC    $dst\n"
-          "\tJMP,s  exit\n"
-      "inc:\tINC    $dst\n"
-      "exit:"
-                %}
-  opcode(0x0F, 0x2F);
-  ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
+  format %{ "UCOMISS $src1, $src2\n\t"
+            "MOV     $dst, #-1\n\t"
+            "JP,s    done\n\t"
+            "JB,s    done\n\t"
+            "SETNE   $dst\n\t"
+            "MOVZB   $dst, $dst\n"
+    "done:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1 in XMM and memory
-instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
+instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
   predicate(UseSSE>=1);
-  match(Set dst (CmpF3 src1 (LoadF mem)));
+  match(Set dst (CmpF3 src1 (LoadF src2)));
   effect(KILL cr);
   ins_cost(275);
-  format %{ "COMISS $src1,$mem\n"
-          "\tMOV    $dst,0\t\t# do not blow flags\n"
-          "\tJP,s   nan\n"
-          "\tJEQ,s  exit\n"
-          "\tJA,s   inc\n"
-      "nan:\tDEC    $dst\n"
-          "\tJMP,s  exit\n"
-      "inc:\tINC    $dst\n"
-      "exit:"
-                %}
-  opcode(0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
+  format %{ "UCOMISS $src1, $src2\n\t"
+            "MOV     $dst, #-1\n\t"
+            "JP,s    done\n\t"
+            "JB,s    done\n\t"
+            "SETNE   $dst\n\t"
+            "MOVZB   $dst, $dst\n"
+    "done:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Spill to obtain 24-bit precision
-instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (SubF src1 src2));
 
   format %{ "FSUB   $dst,$src1 - $src2" %}
   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
-  ins_encode( Push_Reg_F(src1),
-              OpcReg_F(src2),
-              Pop_Mem_F(dst) );
+  ins_encode( Push_Reg_FPR(src1),
+              OpcReg_FPR(src2),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 //
 // This instruction does not round to 24-bits
-instruct subF_reg(regF dst, regF src) %{
+instruct subFPR_reg(regFPR dst, regFPR src) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (SubF dst src));
 
   format %{ "FSUB   $dst,$src" %}
   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
-  ins_encode( Push_Reg_F(src),
+  ins_encode( Push_Reg_FPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
 // Spill to obtain 24-bit precision
-instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src1 src2));
 
   format %{ "FADD   $dst,$src1,$src2" %}
   opcode(0xD8, 0x0); /* D8 C0+i */
-  ins_encode( Push_Reg_F(src2),
-              OpcReg_F(src1),
-              Pop_Mem_F(dst) );
+  ins_encode( Push_Reg_FPR(src2),
+              OpcReg_FPR(src1),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 //
 // This instruction does not round to 24-bits
-instruct addF_reg(regF dst, regF src) %{
+instruct addFPR_reg(regFPR dst, regFPR src) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (AddF dst src));
 
   format %{ "FLD    $src\n\t"
             "FADDp  $dst,ST" %}
   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
-  ins_encode( Push_Reg_F(src),
+  ins_encode( Push_Reg_FPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
-// Add two single precision floating point values in xmm
-instruct addX_reg(regX dst, regX src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (AddF dst src));
-  format %{ "ADDSS  $dst,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct addX_imm(regX dst, immXF con) %{
-  predicate(UseSSE>=1);
-  match(Set dst (AddF dst con));
-  format %{ "ADDSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
-  ins_encode %{
-    __ addss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct addX_mem(regX dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (AddF dst (LoadF mem)));
-  format %{ "ADDSS  $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Subtract two single precision floating point values in xmm
-instruct subX_reg(regX dst, regX src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (SubF dst src));
-  format %{ "SUBSS  $dst,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct subX_imm(regX dst, immXF con) %{
-  predicate(UseSSE>=1);
-  match(Set dst (SubF dst con));
-  format %{ "SUBSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
-  ins_encode %{
-    __ subss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct subX_mem(regX dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (SubF dst (LoadF mem)));
-  format %{ "SUBSS  $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Multiply two single precision floating point values in xmm
-instruct mulX_reg(regX dst, regX src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (MulF dst src));
-  format %{ "MULSS  $dst,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct mulX_imm(regX dst, immXF con) %{
-  predicate(UseSSE>=1);
-  match(Set dst (MulF dst con));
-  format %{ "MULSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
-  ins_encode %{
-    __ mulss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulX_mem(regX dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (MulF dst (LoadF mem)));
-  format %{ "MULSS  $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Divide two single precision floating point values in xmm
-instruct divX_reg(regX dst, regX src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (DivF dst src));
-  format %{ "DIVSS  $dst,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct divX_imm(regX dst, immXF con) %{
-  predicate(UseSSE>=1);
-  match(Set dst (DivF dst con));
-  format %{ "DIVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
-  ins_encode %{
-    __ divss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct divX_mem(regX dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (DivF dst (LoadF mem)));
-  format %{ "DIVSS  $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Get the square root of a single precision floating point values in xmm
-instruct sqrtX_reg(regX dst, regX src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
-  format %{ "SQRTSS $dst,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct sqrtX_mem(regX dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
-  format %{ "SQRTSS $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Get the square root of a double precision floating point values in xmm
-instruct sqrtXD_reg(regXD dst, regXD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (SqrtD src));
-  format %{ "SQRTSD $dst,$src" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct sqrtXD_mem(regXD dst, memory mem) %{
-  predicate(UseSSE>=2);
-  match(Set dst (SqrtD (LoadD mem)));
-  format %{ "SQRTSD $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
-  ins_pipe( pipe_slow );
-%}
-
-instruct absF_reg(regFPR1 dst, regFPR1 src) %{
+instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
   predicate(UseSSE==0);
   match(Set dst (AbsF src));
   ins_cost(100);
@@ -11442,15 +10662,7 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct absX_reg(regX dst ) %{
-  predicate(UseSSE>=1);
-  match(Set dst (AbsF dst));
-  format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
-  ins_encode( AbsXF_encoding(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct negF_reg(regFPR1 dst, regFPR1 src) %{
+instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
   predicate(UseSSE==0);
   match(Set dst (NegF src));
   ins_cost(100);
@@ -11460,17 +10672,9 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct negX_reg( regX dst ) %{
-  predicate(UseSSE>=1);
-  match(Set dst (NegF dst));
-  format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
-  ins_encode( NegXF_encoding(dst));
-  ins_pipe( pipe_slow );
-%}
-
-// Cisc-alternate to addF_reg
+// Cisc-alternate to addFPR_reg
 // Spill to obtain 24-bit precision
-instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
+instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src1 (LoadF src2)));
 
@@ -11479,14 +10683,14 @@
             "FSTP_S $dst" %}
   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcReg_F(src1),
-              Pop_Mem_F(dst) );
+              OpcReg_FPR(src1),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_mem );
 %}
 //
-// Cisc-alternate to addF_reg
+// Cisc-alternate to addFPR_reg
 // This instruction does not round to 24-bits
-instruct addF_reg_mem(regF dst, memory src) %{
+instruct addFPR_reg_mem(regFPR dst, memory src) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (AddF dst (LoadF src)));
 
@@ -11499,21 +10703,21 @@
 
 // // Following two instructions for _222_mpegaudio
 // Spill to obtain 24-bit precision
-instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
+instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src1 src2));
 
   format %{ "FADD   $dst,$src1,$src2" %}
   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
-              OpcReg_F(src2),
-              Pop_Mem_F(dst) );
+              OpcReg_FPR(src2),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_mem );
 %}
 
 // Cisc-spill variant
 // Spill to obtain 24-bit precision
-instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
+instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src1 (LoadF src2)));
 
@@ -11522,12 +10726,12 @@
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
               set_instruction_start,
               OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_F(dst) );
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_mem_mem );
 %}
 
 // Spill to obtain 24-bit precision
-instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
+instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src1 src2));
 
@@ -11536,13 +10740,13 @@
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
               set_instruction_start,
               OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_F(dst) );
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_mem_mem );
 %}
 
 
 // Spill to obtain 24-bit precision
-instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
+instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src con));
   format %{ "FLD    $src\n\t"
@@ -11557,7 +10761,7 @@
 %}
 //
 // This instruction does not round to 24-bits
-instruct addF_reg_imm(regF dst, regF src, immF con) %{
+instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src con));
   format %{ "FLD    $src\n\t"
@@ -11572,7 +10776,7 @@
 %}
 
 // Spill to obtain 24-bit precision
-instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src1 src2));
 
@@ -11580,14 +10784,14 @@
             "FMUL   $src2\n\t"
             "FSTP_S $dst"  %}
   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
-  ins_encode( Push_Reg_F(src1),
-              OpcReg_F(src2),
-              Pop_Mem_F(dst) );
+  ins_encode( Push_Reg_FPR(src1),
+              OpcReg_FPR(src2),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 //
 // This instruction does not round to 24-bits
-instruct mulF_reg(regF dst, regF src1, regF src2) %{
+instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src1 src2));
 
@@ -11595,16 +10799,16 @@
             "FMUL   $src2\n\t"
             "FSTP_S $dst"  %}
   opcode(0xD8, 0x1); /* D8 C8+i */
-  ins_encode( Push_Reg_F(src2),
-              OpcReg_F(src1),
-              Pop_Reg_F(dst) );
+  ins_encode( Push_Reg_FPR(src2),
+              OpcReg_FPR(src1),
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_reg_reg );
 %}
 
 
 // Spill to obtain 24-bit precision
 // Cisc-alternate to reg-reg multiply
-instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
+instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src1 (LoadF src2)));
 
@@ -11613,27 +10817,27 @@
             "FSTP_S $dst"  %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcReg_F(src1),
-              Pop_Mem_F(dst) );
+              OpcReg_FPR(src1),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_mem );
 %}
 //
 // This instruction does not round to 24-bits
 // Cisc-alternate to reg-reg multiply
-instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
+instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src1 (LoadF src2)));
 
   format %{ "FMUL   $dst,$src1,$src2" %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcReg_F(src1),
-              Pop_Reg_F(dst) );
+              OpcReg_FPR(src1),
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_reg_mem );
 %}
 
 // Spill to obtain 24-bit precision
-instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
+instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src1 src2));
 
@@ -11642,12 +10846,12 @@
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
               set_instruction_start,
               OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_F(dst) );
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_mem_mem );
 %}
 
 // Spill to obtain 24-bit precision
-instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
+instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src con));
 
@@ -11663,7 +10867,7 @@
 %}
 //
 // This instruction does not round to 24-bits
-instruct mulF_reg_imm(regF dst, regF src, immF con) %{
+instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src con));
 
@@ -11680,9 +10884,9 @@
 
 
 //
-// MACRO1 -- subsume unshared load into mulF
+// MACRO1 -- subsume unshared load into mulFPR
 // This instruction does not round to 24-bits
-instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
+instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (MulF (LoadF mem1) src));
 
@@ -11691,36 +10895,36 @@
             "FSTP   $dst" %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
-              OpcReg_F(src),
-              Pop_Reg_F(dst) );
+              OpcReg_FPR(src),
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_reg_mem );
 %}
 //
-// MACRO2 -- addF a mulF which subsumed an unshared load
+// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
 // This instruction does not round to 24-bits
-instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
+instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
   ins_cost(95);
 
   format %{ "FLD    $mem1     ===MACRO2===\n\t"
-            "FMUL   ST,$src1  subsume mulF left load\n\t"
+            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
             "FADD   ST,$src2\n\t"
             "FSTP   $dst" %}
   opcode(0xD9); /* LoadF D9 /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
               FMul_ST_reg(src1),
               FAdd_ST_reg(src2),
-              Pop_Reg_F(dst) );
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_mem_reg_reg );
 %}
 
-// MACRO3 -- addF a mulF
+// MACRO3 -- addFPR a mulFPR
 // This instruction does not round to 24-bits.  It is a '2-address'
 // instruction in that the result goes back to src2.  This eliminates
 // a move from the macro; possibly the register allocator will have
 // to add it back (and maybe not).
-instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
+instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set src2 (AddF (MulF src0 src1) src2));
 
@@ -11728,15 +10932,15 @@
             "FMUL   ST,$src1\n\t"
             "FADDP  $src2,ST" %}
   opcode(0xD9); /* LoadF D9 /0 */
-  ins_encode( Push_Reg_F(src0),
+  ins_encode( Push_Reg_FPR(src0),
               FMul_ST_reg(src1),
               FAddP_reg_ST(src2) );
   ins_pipe( fpu_reg_reg_reg );
 %}
 
-// MACRO4 -- divF subF
+// MACRO4 -- divFPR subFPR
 // This instruction does not round to 24-bits
-instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
+instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (DivF (SubF src2 src1) src3));
 
@@ -11745,67 +10949,67 @@
             "FDIV   ST,$src3\n\t"
             "FSTP  $dst" %}
   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
-  ins_encode( Push_Reg_F(src2),
-              subF_divF_encode(src1,src3),
-              Pop_Reg_F(dst) );
+  ins_encode( Push_Reg_FPR(src2),
+              subFPR_divFPR_encode(src1,src3),
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_reg_reg_reg );
 %}
 
 // Spill to obtain 24-bit precision
-instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (DivF src1 src2));
 
   format %{ "FDIV   $dst,$src1,$src2" %}
   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
-  ins_encode( Push_Reg_F(src1),
-              OpcReg_F(src2),
-              Pop_Mem_F(dst) );
+  ins_encode( Push_Reg_FPR(src1),
+              OpcReg_FPR(src2),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 //
 // This instruction does not round to 24-bits
-instruct divF_reg(regF dst, regF src) %{
+instruct divFPR_reg(regFPR dst, regFPR src) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (DivF dst src));
 
   format %{ "FDIV   $dst,$src" %}
   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
-  ins_encode( Push_Reg_F(src),
+  ins_encode( Push_Reg_FPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
 
 // Spill to obtain 24-bit precision
-instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
+instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (ModF src1 src2));
-  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 
   format %{ "FMOD   $dst,$src1,$src2" %}
-  ins_encode( Push_Reg_Mod_D(src1, src2),
-              emitModD(),
-              Push_Result_Mod_D(src2),
-              Pop_Mem_F(dst));
+  ins_encode( Push_Reg_Mod_DPR(src1, src2),
+              emitModDPR(),
+              Push_Result_Mod_DPR(src2),
+              Pop_Mem_FPR(dst));
   ins_pipe( pipe_slow );
 %}
 //
 // This instruction does not round to 24-bits
-instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
+instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (ModF dst src));
-  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 
   format %{ "FMOD   $dst,$src" %}
-  ins_encode(Push_Reg_Mod_D(dst, src),
-              emitModD(),
-              Push_Result_Mod_D(src),
-              Pop_Reg_F(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
+  ins_encode(Push_Reg_Mod_DPR(dst, src),
+              emitModDPR(),
+              Push_Result_Mod_DPR(src),
+              Pop_Reg_FPR(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE>=1);
   match(Set dst (ModF src0 src1));
   effect(KILL rax, KILL cr);
@@ -11825,7 +11029,7 @@
           "\tFSTP   ST0\t # Restore FPU Stack"
     %}
   ins_cost(250);
-  ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
+  ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
   ins_pipe( pipe_slow );
 %}
 
@@ -11833,26 +11037,26 @@
 //----------Arithmetic Conversion Instructions---------------------------------
 // The conversions operations are all Alpha sorted.  Please keep it that way!
 
-instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
+instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
   predicate(UseSSE==0);
   match(Set dst (RoundFloat src));
   ins_cost(125);
   format %{ "FST_S  $dst,$src\t# F-round" %}
-  ins_encode( Pop_Mem_Reg_F(dst, src) );
+  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
   ins_pipe( fpu_mem_reg );
 %}
 
-instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
+instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (RoundDouble src));
   ins_cost(125);
   format %{ "FST_D  $dst,$src\t# D-round" %}
-  ins_encode( Pop_Mem_Reg_D(dst, src) );
+  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Force rounding to 24-bit precision and 6-bit exponent
-instruct convD2F_reg(stackSlotF dst, regD src) %{
+instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
   predicate(UseSSE==0);
   match(Set dst (ConvD2F src));
   format %{ "FST_S  $dst,$src\t# F-round" %}
@@ -11862,7 +11066,7 @@
 %}
 
 // Force rounding to 24-bit precision and 6-bit exponent
-instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
+instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
   predicate(UseSSE==1);
   match(Set dst (ConvD2F src));
   effect( KILL cr );
@@ -11870,29 +11074,40 @@
             "FST_S  [ESP],$src\t# F-round\n\t"
             "MOVSS  $dst,[ESP]\n\t"
             "ADD ESP,4" %}
-  ins_encode( D2X_encoding(dst, src) );
+  ins_encode %{
+    __ subptr(rsp, 4);
+    if ($src$$reg != FPR1L_enc) {
+      __ fld_s($src$$reg-1);
+      __ fstp_s(Address(rsp, 0));
+    } else {
+      __ fst_s(Address(rsp, 0));
+    }
+    __ movflt($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 4);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Force rounding double precision to single precision
-instruct convXD2X_reg(regX dst, regXD src) %{
+instruct convD2F_reg(regF dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (ConvD2F src));
   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
-  opcode(0xF2, 0x0F, 0x5A);
-  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convF2D_reg_reg(regD dst, regF src) %{
+  ins_encode %{
+    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
   predicate(UseSSE==0);
   match(Set dst (ConvF2D src));
   format %{ "FST_S  $dst,$src\t# D-round" %}
-  ins_encode( Pop_Reg_Reg_D(dst, src));
+  ins_encode( Pop_Reg_Reg_DPR(dst, src));
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct convF2D_reg(stackSlotD dst, regF src) %{
+instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
   predicate(UseSSE==1);
   match(Set dst (ConvF2D src));
   format %{ "FST_D  $dst,$src\t# D-round" %}
@@ -11901,7 +11116,7 @@
   %}
 %}
 
-instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
+instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
   predicate(UseSSE==1);
   match(Set dst (ConvF2D src));
   effect( KILL cr );
@@ -11910,21 +11125,28 @@
             "FLD_S  [ESP]\n\t"
             "ADD    ESP,4\n\t"
             "FSTP   $dst\t# D-round" %}
-  ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convX2XD_reg(regXD dst, regX src) %{
+  ins_encode %{
+    __ subptr(rsp, 4);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+    __ addptr(rsp, 4);
+    __ fstp_d($dst$$reg);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convF2D_reg(regD dst, regF src) %{
   predicate(UseSSE>=2);
   match(Set dst (ConvF2D src));
   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
-  opcode(0xF3, 0x0F, 0x5A);
-  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+  ins_encode %{
+    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
-instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
+instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
   predicate(UseSSE<=1);
   match(Set dst (ConvD2I src));
   effect( KILL tmp, KILL cr );
@@ -11939,12 +11161,12 @@
             "FLD_D  $src\n\t"
             "CALL   d2i_wrapper\n"
       "fast:" %}
-  ins_encode( Push_Reg_D(src), D2I_encoding(src) );
+  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
   ins_pipe( pipe_slow );
 %}
 
 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
-instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
+instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
   predicate(UseSSE>=2);
   match(Set dst (ConvD2I src));
   effect( KILL tmp, KILL cr );
@@ -11957,12 +11179,22 @@
             "ADD    ESP, 8\n\t"
             "CALL   d2i_wrapper\n"
       "fast:" %}
-  opcode(0x1); // double-precision conversion
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
+  ins_encode %{
+    Label fast;
+    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
+    __ cmpl($dst$$Register, 0x80000000);
+    __ jccb(Assembler::notEqual, fast);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ addptr(rsp, 8);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
+    __ bind(fast);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
   predicate(UseSSE<=1);
   match(Set dst (ConvD2L src));
   effect( KILL cr );
@@ -11980,12 +11212,12 @@
             "FLD    $src\n\t"
             "CALL   d2l_wrapper\n"
       "fast:" %}
-  ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
+  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
   ins_pipe( pipe_slow );
 %}
 
 // XMM lacks a float/double->long conversion, so use the old FPU stack.
-instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
+instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
   predicate (UseSSE>=2);
   match(Set dst (ConvD2L src));
   effect( KILL cr );
@@ -12004,9 +11236,36 @@
             "SUB    ESP,8\n\t"
             "MOVSD  [ESP],$src\n\t"
             "FLD_D  [ESP]\n\t"
+            "ADD    ESP,8\n\t"
             "CALL   d2l_wrapper\n"
       "fast:" %}
-  ins_encode( XD2L_encoding(src) );
+  ins_encode %{
+    Label fast;
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
+    __ fistp_d(Address(rsp, 0));
+    // Restore the rounding mode, mask the exception
+    if (Compile::current()->in_24_bit_fp_mode()) {
+      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+    } else {
+      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
+    }
+    // Load the converted long, adjust CPU stack
+    __ pop(rax);
+    __ pop(rdx);
+    __ cmpl(rdx, 0x80000000);
+    __ jccb(Assembler::notEqual, fast);
+    __ testl(rax, rax);
+    __ jccb(Assembler::notEqual, fast);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ addptr(rsp, 8);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
+    __ bind(fast);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -12016,7 +11275,7 @@
 // rounding mode to 'nearest'.  The hardware stores a flag value down
 // if we would overflow or converted a NAN; we check for this and
 // and go the slow path if needed.
-instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
+instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
   predicate(UseSSE==0);
   match(Set dst (ConvF2I src));
   effect( KILL tmp, KILL cr );
@@ -12031,13 +11290,13 @@
             "FLD    $src\n\t"
             "CALL   d2i_wrapper\n"
       "fast:" %}
-  // D2I_encoding works for F2I
-  ins_encode( Push_Reg_F(src), D2I_encoding(src) );
+  // DPR2I_encoding works for FPR2I
+  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
   ins_pipe( pipe_slow );
 %}
 
 // Convert a float in xmm to an int reg.
-instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
+instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
   predicate(UseSSE>=1);
   match(Set dst (ConvF2I src));
   effect( KILL tmp, KILL cr );
@@ -12050,12 +11309,22 @@
             "ADD    ESP, 4\n\t"
             "CALL   d2i_wrapper\n"
       "fast:" %}
-  opcode(0x0); // single-precision conversion
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
+  ins_encode %{
+    Label fast;
+    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
+    __ cmpl($dst$$Register, 0x80000000);
+    __ jccb(Assembler::notEqual, fast);
+    __ subptr(rsp, 4);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+    __ addptr(rsp, 4);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
+    __ bind(fast);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
   predicate(UseSSE==0);
   match(Set dst (ConvF2L src));
   effect( KILL cr );
@@ -12073,13 +11342,13 @@
             "FLD    $src\n\t"
             "CALL   d2l_wrapper\n"
       "fast:" %}
-  // D2L_encoding works for F2L
-  ins_encode( Push_Reg_F(src), D2L_encoding(src) );
+  // DPR2L_encoding works for FPR2L
+  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
   ins_pipe( pipe_slow );
 %}
 
 // XMM lacks a float/double->long conversion, so use the old FPU stack.
-instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
+instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
   predicate (UseSSE>=1);
   match(Set dst (ConvF2L src));
   effect( KILL cr );
@@ -12101,39 +11370,67 @@
             "ADD    ESP,4\n\t"
             "CALL   d2l_wrapper\n"
       "fast:" %}
-  ins_encode( X2L_encoding(src) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct convI2D_reg(regD dst, stackSlotI src) %{
+  ins_encode %{
+    Label fast;
+    __ subptr(rsp, 8);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
+    __ fistp_d(Address(rsp, 0));
+    // Restore the rounding mode, mask the exception
+    if (Compile::current()->in_24_bit_fp_mode()) {
+      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+    } else {
+      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
+    }
+    // Load the converted long, adjust CPU stack
+    __ pop(rax);
+    __ pop(rdx);
+    __ cmpl(rdx, 0x80000000);
+    __ jccb(Assembler::notEqual, fast);
+    __ testl(rax, rax);
+    __ jccb(Assembler::notEqual, fast);
+    __ subptr(rsp, 4);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+    __ addptr(rsp, 4);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
+    __ bind(fast);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
   predicate( UseSSE<=1 );
   match(Set dst (ConvI2D src));
   format %{ "FILD   $src\n\t"
             "FSTP   $dst" %}
   opcode(0xDB, 0x0);  /* DB /0 */
-  ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
+  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct convI2XD_reg(regXD dst, eRegI src) %{
+instruct convI2D_reg(regD dst, eRegI src) %{
   predicate( UseSSE>=2 && !UseXmmI2D );
   match(Set dst (ConvI2D src));
   format %{ "CVTSI2SD $dst,$src" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convI2XD_mem(regXD dst, memory mem) %{
+  ins_encode %{
+    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convI2D_mem(regD dst, memory mem) %{
   predicate( UseSSE>=2 );
   match(Set dst (ConvI2D (LoadI mem)));
   format %{ "CVTSI2SD $dst,$mem" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convXI2XD_reg(regXD dst, eRegI src)
+  ins_encode %{
+    __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convXI2D_reg(regD dst, eRegI src)
 %{
   predicate( UseSSE>=2 && UseXmmI2D );
   match(Set dst (ConvI2D src));
@@ -12147,31 +11444,31 @@
   ins_pipe(pipe_slow); // XXX
 %}
 
-instruct convI2D_mem(regD dst, memory mem) %{
+instruct convI2DPR_mem(regDPR dst, memory mem) %{
   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2D (LoadI mem)));
   format %{ "FILD   $mem\n\t"
             "FSTP   $dst" %}
   opcode(0xDB);      /* DB /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_D(dst));
+              Pop_Reg_DPR(dst));
   ins_pipe( fpu_reg_mem );
 %}
 
 // Convert a byte to a float; no rounding step needed.
-instruct conv24I2F_reg(regF dst, stackSlotI src) %{
+instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
   match(Set dst (ConvI2F src));
   format %{ "FILD   $src\n\t"
             "FSTP   $dst" %}
 
   opcode(0xDB, 0x0);  /* DB /0 */
-  ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
+  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
   ins_pipe( fpu_reg_mem );
 %}
 
 // In 24-bit mode, force exponent rounding by storing back out
-instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
+instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2F src));
   ins_cost(200);
@@ -12179,12 +11476,12 @@
             "FSTP_S $dst" %}
   opcode(0xDB, 0x0);  /* DB /0 */
   ins_encode( Push_Mem_I(src),
-              Pop_Mem_F(dst));
+              Pop_Mem_FPR(dst));
   ins_pipe( fpu_mem_mem );
 %}
 
 // In 24-bit mode, force exponent rounding by storing back out
-instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
+instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2F (LoadI mem)));
   ins_cost(200);
@@ -12192,46 +11489,46 @@
             "FSTP_S $dst" %}
   opcode(0xDB);  /* DB /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Mem_F(dst));
+              Pop_Mem_FPR(dst));
   ins_pipe( fpu_mem_mem );
 %}
 
 // This instruction does not round to 24-bits
-instruct convI2F_reg(regF dst, stackSlotI src) %{
+instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2F src));
   format %{ "FILD   $src\n\t"
             "FSTP   $dst" %}
   opcode(0xDB, 0x0);  /* DB /0 */
   ins_encode( Push_Mem_I(src),
-              Pop_Reg_F(dst));
+              Pop_Reg_FPR(dst));
   ins_pipe( fpu_reg_mem );
 %}
 
 // This instruction does not round to 24-bits
-instruct convI2F_mem(regF dst, memory mem) %{
+instruct convI2FPR_mem(regFPR dst, memory mem) %{
   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2F (LoadI mem)));
   format %{ "FILD   $mem\n\t"
             "FSTP   $dst" %}
   opcode(0xDB);      /* DB /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_F(dst));
+              Pop_Reg_FPR(dst));
   ins_pipe( fpu_reg_mem );
 %}
 
 // Convert an int to a float in xmm; no rounding step needed.
-instruct convI2X_reg(regX dst, eRegI src) %{
+instruct convI2F_reg(regF dst, eRegI src) %{
   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
   match(Set dst (ConvI2F src));
   format %{ "CVTSI2SS $dst, $src" %}
-
-  opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
-  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
- instruct convXI2X_reg(regX dst, eRegI src)
+  ins_encode %{
+    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+ instruct convXI2F_reg(regF dst, eRegI src)
 %{
   predicate( UseSSE>=2 && UseXmmI2F );
   match(Set dst (ConvI2F src));
@@ -12280,7 +11577,7 @@
   ins_pipe( ialu_reg_reg_long );
 %}
 
-instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
+instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
   predicate (UseSSE<=1);
   match(Set dst (ConvL2D src));
   effect( KILL cr );
@@ -12290,11 +11587,11 @@
             "ADD    ESP,8\n\t"
             "FSTP_D $dst\t# D-round" %}
   opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double(src), Pop_Mem_D(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
+  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst (ConvL2D src));
   effect( KILL cr );
@@ -12305,11 +11602,11 @@
             "MOVSD  $dst,[ESP]\n\t"
             "ADD    ESP,8" %}
   opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double2(src), Push_ResultXD(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
+  ins_encode(convert_long_double2(src), Push_ResultD(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
   predicate (UseSSE>=1);
   match(Set dst (ConvL2F src));
   effect( KILL cr );
@@ -12320,11 +11617,11 @@
             "MOVSS  $dst,[ESP]\n\t"
             "ADD    ESP,8" %}
   opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
+  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
   match(Set dst (ConvL2F src));
   effect( KILL cr );
   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
@@ -12333,7 +11630,7 @@
             "ADD    ESP,8\n\t"
             "FSTP_S $dst\t# F-round" %}
   opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double(src), Pop_Mem_F(dst));
+  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
   ins_pipe( pipe_slow );
 %}
 
@@ -12351,40 +11648,45 @@
   effect( DEF dst, USE src );
   ins_cost(100);
   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
-  opcode(0x8B);
-  ins_encode( OpcP, RegMem(dst,src));
+  ins_encode %{
+    __ movl($dst$$Register, Address(rsp, $src$$disp));
+  %}
   ins_pipe( ialu_reg_mem );
 %}
 
-instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
+instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
   predicate(UseSSE==0);
   match(Set dst (MoveF2I src));
   effect( DEF dst, USE src );
 
   ins_cost(125);
   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
-  ins_encode( Pop_Mem_Reg_F(dst, src) );
+  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
   ins_pipe( fpu_mem_reg );
 %}
 
-instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
+instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
   predicate(UseSSE>=1);
   match(Set dst (MoveF2I src));
   effect( DEF dst, USE src );
 
   ins_cost(95);
   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
+  ins_encode %{
+    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveF2I src));
   effect( DEF dst, USE src );
   ins_cost(85);
   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
-  ins_encode( MovX2I_reg(dst, src));
+  ins_encode %{
+    __ movdl($dst$$Register, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -12394,13 +11696,14 @@
 
   ins_cost(100);
   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
-  opcode(0x89);
-  ins_encode( OpcPRegSS( dst, src ) );
+  ins_encode %{
+    __ movl(Address(rsp, $dst$$disp), $src$$Register);
+  %}
   ins_pipe( ialu_mem_reg );
 %}
 
 
-instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
+instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
   predicate(UseSSE==0);
   match(Set dst (MoveI2F src));
   effect(DEF dst, USE src);
@@ -12410,29 +11713,33 @@
             "FSTP   $dst\t# MoveI2F_stack_reg" %}
   opcode(0xD9);               /* D9 /0, FLD m32real */
   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_F(dst) );
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
+instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
   predicate(UseSSE>=1);
   match(Set dst (MoveI2F src));
   effect( DEF dst, USE src );
 
   ins_cost(95);
   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveI2F src));
   effect( DEF dst, USE src );
 
   ins_cost(85);
   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
-  ins_encode( MovI2X_reg(dst, src) );
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -12448,29 +11755,30 @@
   ins_pipe( ialu_mem_long_reg );
 %}
 
-instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
+instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (MoveD2L src));
   effect(DEF dst, USE src);
 
   ins_cost(125);
   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
-  ins_encode( Pop_Mem_Reg_D(dst, src) );
+  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
   ins_pipe( fpu_mem_reg );
 %}
 
-instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
+instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveD2L src));
   effect(DEF dst, USE src);
   ins_cost(95);
-
   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
+  ins_encode %{
+    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveD2L src));
   effect(DEF dst, USE src, TEMP tmp);
@@ -12478,7 +11786,11 @@
   format %{ "MOVD   $dst.lo,$src\n\t"
             "PSHUFLW $tmp,$src,0x4E\n\t"
             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
-  ins_encode( MovXD2L_reg(dst, src, tmp) );
+  ins_encode %{
+    __ movdl($dst$$Register, $src$$XMMRegister);
+    __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
+    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -12495,7 +11807,7 @@
 %}
 
 
-instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
+instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
   predicate(UseSSE<=1);
   match(Set dst (MoveL2D src));
   effect(DEF dst, USE src);
@@ -12505,34 +11817,38 @@
             "FSTP   $dst\t# MoveL2D_stack_reg" %}
   opcode(0xDD);               /* DD /0, FLD m64real */
   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_D(dst) );
+              Pop_Reg_DPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
 
-instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
+instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
   match(Set dst (MoveL2D src));
   effect(DEF dst, USE src);
 
   ins_cost(95);
   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
   match(Set dst (MoveL2D src));
   effect(DEF dst, USE src);
 
   ins_cost(95);
   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
-  ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveL2D src));
   effect(TEMP dst, USE src, TEMP tmp);
@@ -12540,149 +11856,192 @@
   format %{ "MOVD   $dst,$src.lo\n\t"
             "MOVD   $tmp,$src.hi\n\t"
             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
-  ins_encode( MovL2XD_reg(dst, src, tmp) );
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_reg(regXD dst, regXD src) %{
+instruct Repl8B_reg(regD dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate8B src));
   format %{ "MOVDQA  $dst,$src\n\t"
             "PUNPCKLBW $dst,$dst\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode( pshufd_8x8(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
+    }
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_eRegI(regXD dst, eRegI src) %{
+instruct Repl8B_eRegI(regD dst, eRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate8B src));
   format %{ "MOVD    $dst,$src\n\t"
             "PUNPCKLBW $dst,$dst\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Replicate scalar zero to packed byte (1 byte) values in xmm
-instruct Repl8B_immI0(regXD dst, immI0 zero) %{
+instruct Repl8B_immI0(regD dst, immI0 zero) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate8B zero));
   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_reg(regXD dst, regXD src) %{
+instruct Repl4S_reg(regD dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4S src));
   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
-  ins_encode( pshufd_4x16(dst, src));
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_eRegI(regXD dst, eRegI src) %{
+instruct Repl4S_eRegI(regD dst, eRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4S src));
   format %{ "MOVD    $dst,$src\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
-  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar zero to packed short (2 byte) values in xmm
-instruct Repl4S_immI0(regXD dst, immI0 zero) %{
+instruct Repl4S_immI0(regD dst, immI0 zero) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4S zero));
   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_reg(regXD dst, regXD src) %{
+instruct Repl4C_reg(regD dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4C src));
   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
-  ins_encode( pshufd_4x16(dst, src));
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_eRegI(regXD dst, eRegI src) %{
+instruct Repl4C_eRegI(regD dst, eRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4C src));
   format %{ "MOVD    $dst,$src\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
-  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar zero to packed char (2 byte) values in xmm
-instruct Repl4C_immI0(regXD dst, immI0 zero) %{
+instruct Repl4C_immI0(regD dst, immI0 zero) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4C zero));
   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_reg(regXD dst, regXD src) %{
+instruct Repl2I_reg(regD dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2I src));
   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
-  ins_encode( pshufd(dst, src, 0x00));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_eRegI(regXD dst, eRegI src) %{
+instruct Repl2I_eRegI(regD dst, eRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2I src));
   format %{ "MOVD   $dst,$src\n\t"
             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
-  ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar zero to packed integer (2 byte) values in xmm
-instruct Repl2I_immI0(regXD dst, immI0 zero) %{
+instruct Repl2I_immI0(regD dst, immI0 zero) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2I zero));
   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_reg(regXD dst, regXD src) %{
+instruct Repl2F_reg(regD dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2F src));
   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode( pshufd(dst, src, 0xe0));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_regX(regXD dst, regX src) %{
+instruct Repl2F_regF(regD dst, regF src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2F src));
   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode( pshufd(dst, src, 0xe0));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
+instruct Repl2F_immF0(regD dst, immF0 zero) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2F zero));
   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -12702,7 +12061,7 @@
 %}
 
 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
-                        eAXRegI result, regXD tmp1, eFlagsReg cr) %{
+                        eAXRegI result, regD tmp1, eFlagsReg cr) %{
   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 
@@ -12717,7 +12076,7 @@
 
 // fast string equals
 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
-                       regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
+                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
   match(Set result (StrEquals (Binary str1 str2) cnt));
   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
 
@@ -12732,7 +12091,7 @@
 
 // fast search of substring with known size.
 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
-                            eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
+                            eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
   predicate(UseSSE42Intrinsics);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
@@ -12759,7 +12118,7 @@
 %}
 
 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
-                        eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{
+                        eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
   predicate(UseSSE42Intrinsics);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
@@ -12776,7 +12135,7 @@
 
 // fast array equals
 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
-                      regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
+                      regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
 %{
   match(Set result (AryEq ary1 ary2));
   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
@@ -13602,27 +12961,36 @@
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovD_regS(cmp,flags,dst,src);
+    fcmovDPR_regS(cmp,flags,dst,src);
   %}
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
+instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovXD_regS(cmp,flags,dst,src);
+    fcmovD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
+  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovFPR_regS(cmp,flags,dst,src);
   %}
 %}
 
 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
-  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
@@ -13630,15 +12998,6 @@
   %}
 %}
 
-instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
-  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovX_regS(cmp,flags,dst,src);
-  %}
-%}
-
 //======
 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
@@ -13730,27 +13089,36 @@
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovD_regS(cmp,flags,dst,src);
+    fcmovDPR_regS(cmp,flags,dst,src);
   %}
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
+instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovXD_regS(cmp,flags,dst,src);
+    fcmovD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
+  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovFPR_regS(cmp,flags,dst,src);
   %}
 %}
 
 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
-  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
@@ -13758,15 +13126,6 @@
   %}
 %}
 
-instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
-  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovX_regS(cmp,flags,dst,src);
-  %}
-%}
-
 //======
 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
 // Same as cmpL_reg_flags_LEGT except must negate src
@@ -13863,27 +13222,37 @@
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovD_regS(cmp,flags,dst,src);
+    fcmovDPR_regS(cmp,flags,dst,src);
   %}
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
+instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovXD_regS(cmp,flags,dst,src);
-  %}
-%}
+    fcmovD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
+  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovFPR_regS(cmp,flags,dst,src);
+  %}
+%}
+
 
 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
-  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
@@ -13892,16 +13261,6 @@
 %}
 
 
-instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
-  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovX_regS(cmp,flags,dst,src);
-  %}
-%}
-
-
 // ============================================================================
 // Procedure Call/Return Instructions
 // Call Java Static Instruction
@@ -14076,20 +13435,20 @@
 // inlined locking and unlocking
 
 
-instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
+instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
   match( Set cr (FastLock object box) );
-  effect( TEMP tmp, TEMP scr );
+  effect( TEMP tmp, TEMP scr, USE_KILL box );
   ins_cost(300);
-  format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
+  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
   ins_encode( Fast_Lock(object,box,tmp,scr) );
   ins_pipe( pipe_slow );
 %}
 
 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
   match( Set cr (FastUnlock object box) );
-  effect( TEMP tmp );
+  effect( TEMP tmp, USE_KILL box );
   ins_cost(300);
-  format %{ "FASTUNLOCK $object, $box, $tmp" %}
+  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
   ins_encode( Fast_Unlock(object,box,tmp) );
   ins_pipe( pipe_slow );
 %}
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Jul 05 18:00:12 2017 +0200
@@ -552,7 +552,7 @@
 #define __ _masm.
 
 static int preserve_SP_size() {
-  return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
+  return 3;  // rex.w, op, rm(reg/reg)
 }
 
 // !!!!! Special hack to get all types of calls to specify the byte offset
@@ -797,48 +797,35 @@
   }
 }
 
-void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
-{
-  if (dstenc != srcenc) {
-    if (dstenc < 8) {
-      if (srcenc >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-        srcenc -= 8;
-      }
-    } else {
-      if (srcenc < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-        srcenc -= 8;
-      }
-      dstenc -= 8;
-    }
-
-    emit_opcode(cbuf, 0x8B);
-    emit_rm(cbuf, 0x3, dstenc, srcenc);
-  }
-}
-
-void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
-  if( dst_encoding == src_encoding ) {
-    // reg-reg copy, use an empty encoding
-  } else {
-    MacroAssembler _masm(&cbuf);
-
-    __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
-  }
-}
-
 // This could be in MacroAssembler but it's fairly C2 specific
 void emit_cmpfp_fixup(MacroAssembler& _masm) {
   Label exit;
   __ jccb(Assembler::noParity, exit);
   __ pushf();
+  //
+  // comiss/ucomiss instructions set ZF,PF,CF flags and
+  // zero OF,AF,SF for NaN values.
+  // Fixup flags by zeroing ZF,PF so that compare of NaN
+  // values returns 'less than' result (CF is set).
+  // Leave the rest of flags unchanged.
+  //
+  //    7 6 5 4 3 2 1 0
+  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
+  //    0 0 1 0 1 0 1 1   (0x2B)
+  //
   __ andq(Address(rsp, 0), 0xffffff2b);
   __ popf();
   __ bind(exit);
-  __ nop(); // (target for branch to avoid branch to branch)
+}
+
+void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
+  Label done;
+  __ movl(dst, -1);
+  __ jcc(Assembler::parity, done);
+  __ jcc(Assembler::below, done);
+  __ setb(Assembler::notEqual, dst);
+  __ movzbl(dst, dst);
+  __ bind(done);
 }
 
 
@@ -1274,16 +1261,8 @@
         // 64-bit
         int offset = ra_->reg2offset(src_first);
         if (cbuf) {
-          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-          if (Matcher::_regEncode[dst_first] >= 8) {
-            emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[dst_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("%s  %s, [rsp + #%d]\t# spill",
@@ -1294,25 +1273,17 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[dst_first] >= 8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(src_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xF3);
-          if (Matcher::_regEncode[dst_first] >= 8) {
-            emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x10);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[dst_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movss   %s, [rsp + #%d]\t# spill",
@@ -1322,9 +1293,9 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[dst_first] >= 8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       }
     }
   } else if (src_first_rc == rc_int) {
@@ -1450,25 +1421,8 @@
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_W);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WB);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_WR);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WRB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x6E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdq   %s, %s\t# spill",
@@ -1482,23 +1436,8 @@
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x6E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdl   %s, %s\t# spill",
@@ -1507,9 +1446,9 @@
 #endif
         }
         return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 4
-          : 5; // REX
+          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+          ? 5
+          : (4 + ((UseAVX>0)?1:0)); // REX
       }
     }
   } else if (src_first_rc == rc_float) {
@@ -1521,16 +1460,8 @@
         // 64-bit
         int offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xF2);
-          if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x11);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[src_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movsd   [rsp + #%d], %s\t# spill",
@@ -1540,25 +1471,17 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[src_first] >= 8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xF3);
-          if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x11);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[src_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movss   [rsp + #%d], %s\t# spill",
@@ -1568,9 +1491,9 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[src_first] >=8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       }
     } else if (dst_first_rc == rc_int) {
       // xmm -> gpr
@@ -1578,25 +1501,8 @@
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_W);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WR); // attention!
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_WB); // attention!
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WRB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x7E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[src_first] & 7,
-                  Matcher::_regEncode[dst_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdq   %s, %s\t# spill",
@@ -1610,23 +1516,8 @@
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_R); // attention!
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_B); // attention!
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x7E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[src_first] & 7,
-                  Matcher::_regEncode[dst_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdl   %s, %s\t# spill",
@@ -1635,9 +1526,9 @@
 #endif
         }
         return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 4
-          : 5; // REX
+          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+          ? 5
+          : (4 + ((UseAVX>0)?1:0)); // REX
       }
     } else if (dst_first_rc == rc_float) {
       // xmm -> xmm
@@ -1645,23 +1536,8 @@
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("%s  %s, %s\t# spill",
@@ -1671,32 +1547,16 @@
 #endif
         }
         return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 4
-          : 5; // REX
+          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+          ? 5
+          : (4 + ((UseAVX>0)?1:0)); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          if (!UseXmmRegToRegMoveAll)
-            emit_opcode(*cbuf, 0xF3);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("%s  %s, %s\t# spill",
@@ -1705,10 +1565,10 @@
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? (UseXmmRegToRegMoveAll ? 3 : 4)
-          : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
+        return ((UseAVX>0) ? 5:
+          ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+           ? (UseXmmRegToRegMoveAll ? 4 : 5)
+           : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
       }
     }
   }
@@ -2205,47 +2065,6 @@
     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
   %}
 
-  enc_class cmpfp_fixup() %{
-      MacroAssembler _masm(&cbuf);
-      emit_cmpfp_fixup(_masm);
-  %}
-
-  enc_class cmpfp3(rRegI dst)
-  %{
-    int dstenc = $dst$$reg;
-
-    // movl $dst, -1
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
-    emit_d32(cbuf, -1);
-
-    // jp,s done
-    emit_opcode(cbuf, 0x7A);
-    emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
-
-    // jb,s done
-    emit_opcode(cbuf, 0x72);
-    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
-
-    // setne $dst
-    if (dstenc >= 4) {
-      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x95);
-    emit_opcode(cbuf, 0xC0 | (dstenc & 7));
-
-    // movzbl $dst, $dst
-    if (dstenc >= 4) {
-      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0xB6);
-    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
-  %}
-
   enc_class cdql_enc(no_rax_rdx_RegI div)
   %{
     // Full implementation of Java idiv and irem; checks for
@@ -2472,55 +2291,6 @@
     emit_cc(cbuf, $secondary, $cop$$cmpcode);
   %}
 
-  enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
-  %{
-    // Invert sense of branch from sense of cmov
-    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
-    emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
-                  ? (UseXmmRegToRegMoveAll ? 3 : 4)
-                  : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
-    // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
-    if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
-    if ($dst$$reg < 8) {
-      if ($src$$reg >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-      }
-    } else {
-      if ($src$$reg < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-      }
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
-  %}
-
-  enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
-  %{
-    // Invert sense of branch from sense of cmov
-    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
-    emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
-
-    //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
-    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
-    if ($dst$$reg < 8) {
-      if ($src$$reg >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-      }
-    } else {
-      if ($src$$reg < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-      }
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
-  %}
-
   enc_class enc_PartialSubtypeCheck()
   %{
     Register Rrdi = as_Register(RDI_enc); // result register
@@ -2751,68 +2521,6 @@
     }
   %}
 
-  // Encode a reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_copy(rRegI dst, rRegI src)
-  %{
-    encode_copy(cbuf, $dst$$reg, $src$$reg);
-  %}
-
-  // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_CopyXD( RegD dst, RegD src ) %{
-    encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
-  %}
-
-  enc_class enc_copy_always(rRegI dst, rRegI src)
-  %{
-    int srcenc = $src$$reg;
-    int dstenc = $dst$$reg;
-
-    if (dstenc < 8) {
-      if (srcenc >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-        srcenc -= 8;
-      }
-    } else {
-      if (srcenc < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-        srcenc -= 8;
-      }
-      dstenc -= 8;
-    }
-
-    emit_opcode(cbuf, 0x8B);
-    emit_rm(cbuf, 0x3, dstenc, srcenc);
-  %}
-
-  enc_class enc_copy_wide(rRegL dst, rRegL src)
-  %{
-    int srcenc = $src$$reg;
-    int dstenc = $dst$$reg;
-
-    if (dstenc != srcenc) {
-      if (dstenc < 8) {
-        if (srcenc < 8) {
-          emit_opcode(cbuf, Assembler::REX_W);
-        } else {
-          emit_opcode(cbuf, Assembler::REX_WB);
-          srcenc -= 8;
-        }
-      } else {
-        if (srcenc < 8) {
-          emit_opcode(cbuf, Assembler::REX_WR);
-        } else {
-          emit_opcode(cbuf, Assembler::REX_WRB);
-          srcenc -= 8;
-        }
-        dstenc -= 8;
-      }
-      emit_opcode(cbuf, 0x8B);
-      emit_rm(cbuf, 0x3, dstenc, srcenc);
-    }
-  %}
-
   enc_class Con32(immI src)
   %{
     // Output immediate
@@ -3212,92 +2920,19 @@
   %}
 
   enc_class Push_ResultXD(regD dst) %{
-    int dstenc = $dst$$reg;
-
-    store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
-
-    // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
-    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
-    encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
-
-    // add rsp,8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf,0x83);
-    emit_rm(cbuf,0x3, 0x0, RSP_enc);
-    emit_d8(cbuf,0x08);
+    MacroAssembler _masm(&cbuf);
+    __ fstp_d(Address(rsp, 0));
+    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 8);
   %}
 
   enc_class Push_SrcXD(regD src) %{
-    int srcenc = $src$$reg;
-
-    // subq rsp,#8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 0x8);
-
-    // movsd [rsp],src
-    emit_opcode(cbuf, 0xF2);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
-
-    // fldd [rsp]
-    emit_opcode(cbuf, 0x66);
-    emit_opcode(cbuf, 0xDD);
-    encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
-  %}
-
-
-  enc_class movq_ld(regD dst, memory mem) %{
     MacroAssembler _masm(&cbuf);
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-
-  enc_class movq_st(memory mem, regD src) %{
-    MacroAssembler _masm(&cbuf);
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-
-  enc_class pshufd_8x8(regF dst, regF src) %{
-    MacroAssembler _masm(&cbuf);
-
-    encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
-    __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
-    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
-  %}
-
-  enc_class pshufd_4x16(regF dst, regF src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
-  %}
-
-  enc_class pshufd(regD dst, regD src, int mode) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
-  %}
-
-  enc_class pxor(regD dst, regD src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
-  %}
-
-  enc_class mov_i2x(regD dst, rRegI src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
-  %}
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+  %}
+
 
   // obj: object to lock
   // box: box address (header location) -- killed
@@ -3534,303 +3169,6 @@
                    RELOC_DISP32);
   %}
 
-  enc_class absF_encoding(regF dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signmask_address = (address) StubRoutines::x86::float_sign_mask();
-
-    cbuf.set_insts_mark();
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x54);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signmask_address);
-  %}
-
-  enc_class absD_encoding(regD dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signmask_address = (address) StubRoutines::x86::double_sign_mask();
-
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0x66);
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x54);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signmask_address);
-  %}
-
-  enc_class negF_encoding(regF dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signflip_address = (address) StubRoutines::x86::float_sign_flip();
-
-    cbuf.set_insts_mark();
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x57);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signflip_address);
-  %}
-
-  enc_class negD_encoding(regD dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signflip_address = (address) StubRoutines::x86::double_sign_flip();
-
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0x66);
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x57);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signflip_address);
-  %}
-
-  enc_class f2i_fixup(rRegI dst, regF src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-
-    // cmpl $dst, #0x80000000
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x81);
-    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
-    emit_d32(cbuf, 0x80000000);
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movss [rsp], $src
-    emit_opcode(cbuf, 0xF3);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call f2i_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
-
-  enc_class f2l_fixup(rRegL dst, regF src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-    address const_address = (address) StubRoutines::x86::double_sign_flip();
-
-    // cmpq $dst, [0x8000000000000000]
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
-    emit_opcode(cbuf, 0x39);
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
-    emit_d32_reloc(cbuf, const_address);
-
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movss [rsp], $src
-    emit_opcode(cbuf, 0xF3);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call f2l_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
-
-  enc_class d2i_fixup(rRegI dst, regD src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-
-    // cmpl $dst, #0x80000000
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x81);
-    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
-    emit_d32(cbuf, 0x80000000);
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movsd [rsp], $src
-    emit_opcode(cbuf, 0xF2);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call d2i_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
-
-  enc_class d2l_fixup(rRegL dst, regD src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-    address const_address = (address) StubRoutines::x86::double_sign_flip();
-
-    // cmpq $dst, [0x8000000000000000]
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
-    emit_opcode(cbuf, 0x39);
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
-    emit_d32_reloc(cbuf, const_address);
-
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movsd [rsp], $src
-    emit_opcode(cbuf, 0xF2);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call d2l_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
 %}
 
 
@@ -6156,8 +5494,9 @@
 
   ins_cost(145); // XXX
   format %{ "movss   $dst, $mem\t# float" %}
-  opcode(0xF3, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6169,8 +5508,9 @@
 
   ins_cost(145); // XXX
   format %{ "movlpd  $dst, $mem\t# double" %}
-  opcode(0x66, 0x0F, 0x12);
-  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6181,8 +5521,9 @@
 
   ins_cost(145); // XXX
   format %{ "movsd   $dst, $mem\t# double" %}
-  opcode(0xF2, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6191,7 +5532,9 @@
   match(Set dst (Load8B mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6200,7 +5543,9 @@
   match(Set dst (Load4S mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6209,7 +5554,9 @@
   match(Set dst (Load4C mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6218,16 +5565,20 @@
   match(Set dst (Load2I mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Single to XMM
 instruct loadA2F(regD dst, memory mem) %{
   match(Set dst (Load2F mem));
-  ins_cost(145);
+  ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6540,8 +5891,9 @@
   ins_cost(100);
 
   format %{ "xorps   $dst, $dst\t# float 0.0" %}
-  opcode(0x0F, 0x57);
-  ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -6562,8 +5914,9 @@
   ins_cost(100);
 
   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
-  opcode(0x66, 0x0F, 0x57);
-  ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
+  ins_encode %{
+    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -6606,8 +5959,9 @@
 
   ins_cost(125);
   format %{ "movss   $dst, $src\t# float stk" %}
-  opcode(0xF3, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6972,7 +6326,9 @@
   match(Set mem (Store8B mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed8B" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6981,7 +6337,9 @@
   match(Set mem (Store4C mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed4C" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6990,7 +6348,9 @@
   match(Set mem (Store2I mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed2I" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7024,7 +6384,9 @@
   match(Set mem (Store2F mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed2F" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7035,8 +6397,9 @@
 
   ins_cost(95); // XXX
   format %{ "movss   $mem, $src\t# float" %}
-  opcode(0xF3, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
+  ins_encode %{
+    __ movflt($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7072,8 +6435,9 @@
 
   ins_cost(95); // XXX
   format %{ "movsd   $mem, $src\t# double" %}
-  opcode(0xF2, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
+  ins_encode %{
+    __ movdbl($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7142,8 +6506,9 @@
 
   ins_cost(95); // XXX
   format %{ "movss   $dst, $src\t# float stk" %}
-  opcode(0xF3, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7153,8 +6518,9 @@
 
   ins_cost(95); // XXX
   format %{ "movsd   $dst, $src\t# double stk" %}
-  opcode(0xF2, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7444,6 +6810,16 @@
   ins_pipe(empty);
 %}
 
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-storestore (empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
 //----------Move Instructions--------------------------------------------------
 
 instruct castX2P(rRegP dst, rRegL src)
@@ -7451,7 +6827,11 @@
   match(Set dst (CastX2P src));
 
   format %{ "movq    $dst, $src\t# long->ptr" %}
-  ins_encode(enc_copy_wide(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movptr($dst$$Register, $src$$Register);
+    }
+  %}
   ins_pipe(ialu_reg_reg); // XXX
 %}
 
@@ -7460,7 +6840,11 @@
   match(Set dst (CastP2X src));
 
   format %{ "movq    $dst, $src\t# ptr -> long" %}
-  ins_encode(enc_copy_wide(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movptr($dst$$Register, $src$$Register);
+    }
+  %}
   ins_pipe(ialu_reg_reg); // XXX
 %}
 
@@ -7813,7 +7197,13 @@
   format %{ "jn$cop    skip\t# signed cmove float\n\t"
             "movss     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovf_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -7837,7 +7227,13 @@
   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
             "movss     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovf_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -7857,7 +7253,13 @@
   format %{ "jn$cop    skip\t# signed cmove double\n\t"
             "movsd     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovd_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -7869,7 +7271,13 @@
   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
             "movsd     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovd_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10191,17 +9599,18 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
   match(Set cr (CmpF src1 src2));
 
-  ins_cost(145);
+  ins_cost(100);
   format %{ "ucomiss $src1, $src2" %}
   ins_encode %{
     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
@@ -10219,10 +9628,11 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10231,8 +9641,9 @@
 
   ins_cost(100);
   format %{ "ucomiss $src1, $src2" %}
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10245,7 +9656,7 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
+    "exit:" %}
   ins_encode %{
     __ ucomiss($src$$XMMRegister, $constantaddress($con));
     emit_cmpfp_fixup(_masm);
@@ -10273,10 +9684,11 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10301,10 +9713,11 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10313,8 +9726,9 @@
 
   ins_cost(100);
   format %{ "ucomisd $src1, $src2" %}
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10327,7 +9741,7 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
+    "exit:" %}
   ins_encode %{
     __ ucomisd($src$$XMMRegister, $constantaddress($con));
     emit_cmpfp_fixup(_masm);
@@ -10359,10 +9773,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10380,10 +9794,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10401,15 +9815,8 @@
             "movzbl  $dst, $dst\n"
     "done:" %}
   ins_encode %{
-    Label L_done;
-    Register Rdst = $dst$$Register;
     __ ucomiss($src$$XMMRegister, $constantaddress($con));
-    __ movl(Rdst, -1);
-    __ jcc(Assembler::parity, L_done);
-    __ jcc(Assembler::below, L_done);
-    __ setb(Assembler::notEqual, Rdst);
-    __ movzbl(Rdst, Rdst);
-    __ bind(L_done);
+    emit_cmpfp3(_masm, $dst$$Register);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -10428,10 +9835,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10449,10 +9856,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10470,374 +9877,9 @@
             "movzbl  $dst, $dst\n"
     "done:" %}
   ins_encode %{
-    Register Rdst = $dst$$Register;
-    Label L_done;
     __ ucomisd($src$$XMMRegister, $constantaddress($con));
-    __ movl(Rdst, -1);
-    __ jcc(Assembler::parity, L_done);
-    __ jcc(Assembler::below, L_done);
-    __ setb(Assembler::notEqual, Rdst);
-    __ movzbl(Rdst, Rdst);
-    __ bind(L_done);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct addF_reg(regF dst, regF src)
-%{
-  match(Set dst (AddF dst src));
-
-  format %{ "addss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct addF_mem(regF dst, memory src)
-%{
-  match(Set dst (AddF dst (LoadF src)));
-
-  format %{ "addss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct addF_imm(regF dst, immF con) %{
-  match(Set dst (AddF dst con));
-  format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ addss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct addD_reg(regD dst, regD src)
-%{
-  match(Set dst (AddD dst src));
-
-  format %{ "addsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct addD_mem(regD dst, memory src)
-%{
-  match(Set dst (AddD dst (LoadD src)));
-
-  format %{ "addsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct addD_imm(regD dst, immD con) %{
-  match(Set dst (AddD dst con));
-  format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ addsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct subF_reg(regF dst, regF src)
-%{
-  match(Set dst (SubF dst src));
-
-  format %{ "subss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct subF_mem(regF dst, memory src)
-%{
-  match(Set dst (SubF dst (LoadF src)));
-
-  format %{ "subss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct subF_imm(regF dst, immF con) %{
-  match(Set dst (SubF dst con));
-  format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ subss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct subD_reg(regD dst, regD src)
-%{
-  match(Set dst (SubD dst src));
-
-  format %{ "subsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct subD_mem(regD dst, memory src)
-%{
-  match(Set dst (SubD dst (LoadD src)));
-
-  format %{ "subsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct subD_imm(regD dst, immD con) %{
-  match(Set dst (SubD dst con));
-  format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ subsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulF_reg(regF dst, regF src)
-%{
-  match(Set dst (MulF dst src));
-
-  format %{ "mulss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulF_mem(regF dst, memory src)
-%{
-  match(Set dst (MulF dst (LoadF src)));
-
-  format %{ "mulss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulF_imm(regF dst, immF con) %{
-  match(Set dst (MulF dst con));
-  format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ mulss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulD_reg(regD dst, regD src)
-%{
-  match(Set dst (MulD dst src));
-
-  format %{ "mulsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulD_mem(regD dst, memory src)
-%{
-  match(Set dst (MulD dst (LoadD src)));
-
-  format %{ "mulsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulD_imm(regD dst, immD con) %{
-  match(Set dst (MulD dst con));
-  format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ mulsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct divF_reg(regF dst, regF src)
-%{
-  match(Set dst (DivF dst src));
-
-  format %{ "divss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct divF_mem(regF dst, memory src)
-%{
-  match(Set dst (DivF dst (LoadF src)));
-
-  format %{ "divss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct divF_imm(regF dst, immF con) %{
-  match(Set dst (DivF dst con));
-  format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ divss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct divD_reg(regD dst, regD src)
-%{
-  match(Set dst (DivD dst src));
-
-  format %{ "divsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct divD_mem(regD dst, memory src)
-%{
-  match(Set dst (DivD dst (LoadD src)));
-
-  format %{ "divsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct divD_imm(regD dst, immD con) %{
-  match(Set dst (DivD dst con));
-  format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ divsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_reg(regF dst, regF src)
-%{
-  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
-
-  format %{ "sqrtss  $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_mem(regF dst, memory src)
-%{
-  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
-
-  format %{ "sqrtss  $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_imm(regF dst, immF con) %{
-  match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
-  format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ sqrtss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_reg(regD dst, regD src)
-%{
-  match(Set dst (SqrtD src));
-
-  format %{ "sqrtsd  $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_mem(regD dst, memory src)
-%{
-  match(Set dst (SqrtD (LoadD src)));
-
-  format %{ "sqrtsd  $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_imm(regD dst, immD con) %{
-  match(Set dst (SqrtD con));
-  format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct absF_reg(regF dst)
-%{
-  match(Set dst (AbsF dst));
-
-  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
-  ins_encode(absF_encoding(dst));
-  ins_pipe(pipe_slow);
-%}
-
-instruct absD_reg(regD dst)
-%{
-  match(Set dst (AbsD dst));
-
-  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
-            "# abs double by sign masking" %}
-  ins_encode(absD_encoding(dst));
-  ins_pipe(pipe_slow);
-%}
-
-instruct negF_reg(regF dst)
-%{
-  match(Set dst (NegF dst));
-
-  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
-  ins_encode(negF_encoding(dst));
-  ins_pipe(pipe_slow);
-%}
-
-instruct negD_reg(regD dst)
-%{
-  match(Set dst (NegD dst));
-
-  format %{ "xorpd   $dst, [0x8000000000000000]\t"
-            "# neg double by sign flipping" %}
-  ins_encode(negD_encoding(dst));
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10929,8 +9971,9 @@
   match(Set dst (ConvF2D src));
 
   format %{ "cvtss2sd $dst, $src" %}
-  opcode(0xF3, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10939,8 +9982,9 @@
   match(Set dst (ConvF2D (LoadF src)));
 
   format %{ "cvtss2sd $dst, $src" %}
-  opcode(0xF3, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10949,8 +9993,9 @@
   match(Set dst (ConvD2F src));
 
   format %{ "cvtsd2ss $dst, $src" %}
-  opcode(0xF2, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10959,8 +10004,9 @@
   match(Set dst (ConvD2F (LoadD src)));
 
   format %{ "cvtsd2ss $dst, $src" %}
-  opcode(0xF2, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10978,9 +10024,17 @@
             "call    f2i_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF3, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             f2i_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
+    __ cmpl($dst$$Register, 0x80000000);
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10997,9 +10051,18 @@
             "call    f2l_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF3, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             f2l_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttss2siq($dst$$Register, $src$$XMMRegister);
+    __ cmp64($dst$$Register,
+             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11016,9 +10079,17 @@
             "call    d2i_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF2, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             d2i_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
+    __ cmpl($dst$$Register, 0x80000000);
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11035,9 +10106,18 @@
             "call    d2l_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF2, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             d2l_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
+    __ cmp64($dst$$Register,
+             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11047,8 +10127,9 @@
   match(Set dst (ConvI2F src));
 
   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11057,8 +10138,9 @@
   match(Set dst (ConvI2F (LoadI src)));
 
   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11068,8 +10150,9 @@
   match(Set dst (ConvI2D src));
 
   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11078,8 +10161,9 @@
   match(Set dst (ConvI2D (LoadI src)));
 
   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11116,8 +10200,9 @@
   match(Set dst (ConvL2F src));
 
   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11126,8 +10211,9 @@
   match(Set dst (ConvL2F (LoadL src)));
 
   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11136,8 +10222,9 @@
   match(Set dst (ConvL2D src));
 
   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11146,8 +10233,9 @@
   match(Set dst (ConvL2D (LoadL src)));
 
   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11186,7 +10274,11 @@
   match(Set dst (AndL (ConvI2L src) mask));
 
   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
-  ins_encode(enc_copy(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movl($dst$$Register, $src$$Register);
+    }
+  %}
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -11196,8 +10288,9 @@
   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
 
   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Address);
+  %}
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -11206,7 +10299,9 @@
   match(Set dst (AndL src mask));
 
   format %{ "movl    $dst, $src\t# zero-extend long" %}
-  ins_encode(enc_copy_always(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -11215,7 +10310,9 @@
   match(Set dst (ConvL2I src));
 
   format %{ "movl    $dst, $src\t# l2i" %}
-  ins_encode(enc_copy_always(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -11226,8 +10323,9 @@
 
   ins_cost(125);
   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, Address(rsp, $src$$disp));
+  %}
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -11237,8 +10335,9 @@
 
   ins_cost(125);
   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
-  opcode(0xF3, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11248,8 +10347,9 @@
 
   ins_cost(125);
   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
+  ins_encode %{
+    __ movq($dst$$Register, Address(rsp, $src$$disp));
+  %}
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -11260,8 +10360,9 @@
 
   ins_cost(125);
   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
-  opcode(0x66, 0x0F, 0x12);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11272,8 +10373,9 @@
 
   ins_cost(125);
   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
-  opcode(0xF2, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11284,8 +10386,9 @@
 
   ins_cost(95); // XXX
   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
-  opcode(0xF3, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11295,8 +10398,9 @@
 
   ins_cost(100);
   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
-  opcode(0x89);
-  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
+  ins_encode %{
+    __ movl(Address(rsp, $dst$$disp), $src$$Register);
+  %}
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -11306,8 +10410,9 @@
 
   ins_cost(95); // XXX
   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
-  opcode(0xF2, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11317,8 +10422,9 @@
 
   ins_cost(100);
   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
-  opcode(0x89);
-  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
+  ins_encode %{
+    __ movq(Address(rsp, $dst$$disp), $src$$Register);
+  %}
   ins_pipe(ialu_mem_reg);
 %}
 
@@ -11327,7 +10433,9 @@
   effect(DEF dst, USE src);
   ins_cost(85);
   format %{ "movd    $dst,$src\t# MoveF2I" %}
-  ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
+  ins_encode %{
+    __ movdl($dst$$Register, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11336,7 +10444,9 @@
   effect(DEF dst, USE src);
   ins_cost(85);
   format %{ "movd    $dst,$src\t# MoveD2L" %}
-  ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
+  ins_encode %{
+    __ movdq($dst$$Register, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11346,7 +10456,9 @@
   effect(DEF dst, USE src);
   ins_cost(300);
   format %{ "movd    $dst,$src\t# MoveI2F" %}
-  ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11355,7 +10467,9 @@
   effect(DEF dst, USE src);
   ins_cost(300);
   format %{ "movd    $dst,$src\t# MoveL2D" %}
-  ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
+  ins_encode %{
+     __ movdq($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11365,7 +10479,13 @@
   format %{ "MOVDQA  $dst,$src\n\t"
             "PUNPCKLBW $dst,$dst\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode( pshufd_8x8(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
+    }
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11375,7 +10495,11 @@
   format %{ "MOVD    $dst,$src\n\t"
             "PUNPCKLBW $dst,$dst\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11383,7 +10507,9 @@
 instruct Repl8B_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate8B zero));
   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11391,7 +10517,9 @@
 instruct Repl4S_reg(regD dst, regD src) %{
   match(Set dst (Replicate4S src));
   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
-  ins_encode( pshufd_4x16(dst, src));
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11400,7 +10528,10 @@
   match(Set dst (Replicate4S src));
   format %{ "MOVD    $dst,$src\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
-  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11408,7 +10539,9 @@
 instruct Repl4S_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate4S zero));
   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11416,7 +10549,9 @@
 instruct Repl4C_reg(regD dst, regD src) %{
   match(Set dst (Replicate4C src));
   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
-  ins_encode( pshufd_4x16(dst, src));
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11425,7 +10560,10 @@
   match(Set dst (Replicate4C src));
   format %{ "MOVD    $dst,$src\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
-  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11433,7 +10571,9 @@
 instruct Repl4C_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate4C zero));
   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11441,7 +10581,9 @@
 instruct Repl2I_reg(regD dst, regD src) %{
   match(Set dst (Replicate2I src));
   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
-  ins_encode( pshufd(dst, src, 0x00));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11450,7 +10592,10 @@
   match(Set dst (Replicate2I src));
   format %{ "MOVD   $dst,$src\n\t"
             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
-  ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11458,7 +10603,9 @@
 instruct Repl2I_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate2I zero));
   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11466,7 +10613,9 @@
 instruct Repl2F_reg(regD dst, regD src) %{
   match(Set dst (Replicate2F src));
   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode( pshufd(dst, src, 0xe0));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11474,7 +10623,9 @@
 instruct Repl2F_regF(regD dst, regF src) %{
   match(Set dst (Replicate2F src));
   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode( pshufd(dst, src, 0xe0));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11482,7 +10633,9 @@
 instruct Repl2F_immF0(regD dst, immF0 zero) %{
   match(Set dst (Replicate2F zero));
   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -12162,12 +11315,12 @@
   effect(KILL rcx, KILL cr);
 
   ins_cost(1100);  // slightly larger than the next version
-  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+  format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
-            "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
+            "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
     "miss:\t" %}
 
@@ -12185,12 +11338,12 @@
   effect(KILL rcx, KILL result);
 
   ins_cost(1000);
-  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+  format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
             "jne,s   miss\t\t# Missed: flags nz\n\t"
-            "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
+            "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
     "miss:\t" %}
 
   opcode(0x0); // No need to XOR RDI
@@ -12358,13 +11511,13 @@
 // inlined locking and unlocking
 
 instruct cmpFastLock(rFlagsReg cr,
-                     rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
+                     rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
 %{
   match(Set cr (FastLock object box));
-  effect(TEMP tmp, TEMP scr);
+  effect(TEMP tmp, TEMP scr, USE_KILL box);
 
   ins_cost(300);
-  format %{ "fastlock $object,$box,$tmp,$scr" %}
+  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
   ins_encode(Fast_Lock(object, box, tmp, scr));
   ins_pipe(pipe_slow);
 %}
@@ -12373,10 +11526,10 @@
                        rRegP object, rax_RegP box, rRegP tmp)
 %{
   match(Set cr (FastUnlock object box));
-  effect(TEMP tmp);
+  effect(TEMP tmp, USE_KILL box);
 
   ins_cost(300);
-  format %{ "fastunlock $object, $box, $tmp" %}
+  format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
   ins_encode(Fast_Unlock(object, box, tmp));
   ins_pipe(pipe_slow);
 %}
--- a/hotspot/src/os/bsd/vm/os_bsd.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os/bsd/vm/os_bsd.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -2835,7 +2835,7 @@
 #endif
 }
 
-void os::free_memory(char *addr, size_t bytes) {
+void os::free_memory(char *addr, size_t bytes, size_t alignment_hint) {
   ::madvise(addr, bytes, MADV_DONTNEED);
 }
 
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -2546,8 +2546,8 @@
   }
 }
 
-void os::free_memory(char *addr, size_t bytes) {
-  commit_memory(addr, bytes, false);
+void os::free_memory(char *addr, size_t bytes, size_t alignment_hint) {
+  commit_memory(addr, bytes, alignment_hint, false);
 }
 
 void os::numa_make_global(char *addr, size_t bytes) {
--- a/hotspot/src/os/posix/vm/os_posix.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os/posix/vm/os_posix.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -59,6 +59,10 @@
   VMError::report_coredump_status(buffer, success);
 }
 
+int os::get_last_error() {
+  return errno;
+}
+
 bool os::is_debugger_attached() {
   // not implemented
   return false;
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -2821,7 +2821,7 @@
 }
 
 // Uncommit the pages in a specified region.
-void os::free_memory(char* addr, size_t bytes) {
+void os::free_memory(char* addr, size_t bytes, size_t alignment_hint) {
   if (madvise(addr, bytes, MADV_FREE) < 0) {
     debug_only(warning("MADV_FREE failed."));
     return;
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -132,7 +132,6 @@
 // save DLL module handle, used by GetModuleFileName
 
 HINSTANCE vm_lib_handle;
-static int getLastErrorString(char *buf, size_t len);
 
 BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID reserved) {
   switch (reason) {
@@ -1452,7 +1451,7 @@
     return result;
   }
 
-  long errcode = GetLastError();
+  DWORD errcode = GetLastError();
   if (errcode == ERROR_MOD_NOT_FOUND) {
     strncpy(ebuf, "Can't find dependent libraries", ebuflen-1);
     ebuf[ebuflen-1]='\0';
@@ -1463,11 +1462,11 @@
   // If we can read dll-info and find that dll was built
   // for an architecture other than Hotspot is running in
   // - then print to buffer "DLL was built for a different architecture"
-  // else call getLastErrorString to obtain system error message
+  // else call os::lasterror to obtain system error message
 
   // Read system error message into ebuf
   // It may or may not be overwritten below (in the for loop and just above)
-  getLastErrorString(ebuf, (size_t) ebuflen);
+  lasterror(ebuf, (size_t) ebuflen);
   ebuf[ebuflen-1]='\0';
   int file_descriptor=::open(name, O_RDONLY | O_BINARY, 0);
   if (file_descriptor<0)
@@ -1500,7 +1499,7 @@
   ::close(file_descriptor);
   if (failed_to_get_lib_arch)
   {
-    // file i/o error - report getLastErrorString(...) msg
+    // file i/o error - report os::lasterror(...) msg
     return NULL;
   }
 
@@ -1543,7 +1542,7 @@
     "Didn't find runing architecture code in arch_array");
 
   // If the architure is right
-  // but some other error took place - report getLastErrorString(...) msg
+  // but some other error took place - report os::lasterror(...) msg
   if (lib_arch == running_arch)
   {
     return NULL;
@@ -1775,12 +1774,12 @@
 // This method is a copy of JDK's sysGetLastErrorString
 // from src/windows/hpi/src/system_md.c
 
-size_t os::lasterror(char *buf, size_t len) {
-  long errval;
+size_t os::lasterror(char* buf, size_t len) {
+  DWORD errval;
 
   if ((errval = GetLastError()) != 0) {
-      /* DOS error */
-    int n = (int)FormatMessage(
+    // DOS error
+    size_t n = (size_t)FormatMessage(
           FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS,
           NULL,
           errval,
@@ -1789,7 +1788,7 @@
           (DWORD)len,
           NULL);
     if (n > 3) {
-      /* Drop final '.', CR, LF */
+      // Drop final '.', CR, LF
       if (buf[n - 1] == '\n') n--;
       if (buf[n - 1] == '\r') n--;
       if (buf[n - 1] == '.') n--;
@@ -1799,17 +1798,25 @@
   }
 
   if (errno != 0) {
-    /* C runtime error that has no corresponding DOS error code */
-    const char *s = strerror(errno);
+    // C runtime error that has no corresponding DOS error code
+    const char* s = strerror(errno);
     size_t n = strlen(s);
     if (n >= len) n = len - 1;
     strncpy(buf, s, n);
     buf[n] = '\0';
     return n;
   }
+
   return 0;
 }
 
+int os::get_last_error() {
+  DWORD error = GetLastError();
+  if (error == 0)
+    error = errno;
+  return (int)error;
+}
+
 // sun.misc.Signal
 // NOTE that this is a workaround for an apparent kernel bug where if
 // a signal handler for SIGBREAK is installed then that signal handler
@@ -3130,7 +3137,7 @@
 }
 
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
-void os::free_memory(char *addr, size_t bytes)         { }
+void os::free_memory(char *addr, size_t bytes, size_t alignment_hint)    { }
 void os::numa_make_global(char *addr, size_t bytes)    { }
 void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint)    { }
 bool os::numa_topology_changed()                       { return false; }
@@ -4746,7 +4753,7 @@
           fatal("corrupted C heap");
         }
       }
-      int err = GetLastError();
+      DWORD err = GetLastError();
       if (err != ERROR_NO_MORE_ITEMS && err != ERROR_CALL_NOT_IMPLEMENTED) {
         fatal(err_msg("heap walk aborted with error %d", err));
       }
@@ -4778,45 +4785,6 @@
   return EXCEPTION_CONTINUE_SEARCH;
 }
 
-static int getLastErrorString(char *buf, size_t len)
-{
-    long errval;
-
-    if ((errval = GetLastError()) != 0)
-    {
-      /* DOS error */
-      size_t n = (size_t)FormatMessage(
-            FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS,
-            NULL,
-            errval,
-            0,
-            buf,
-            (DWORD)len,
-            NULL);
-      if (n > 3) {
-        /* Drop final '.', CR, LF */
-        if (buf[n - 1] == '\n') n--;
-        if (buf[n - 1] == '\r') n--;
-        if (buf[n - 1] == '.') n--;
-        buf[n] = '\0';
-      }
-      return (int)n;
-    }
-
-    if (errno != 0)
-    {
-      /* C runtime error that has no corresponding DOS error code */
-      const char *s = strerror(errno);
-      size_t n = strlen(s);
-      if (n >= len) n = len - 1;
-      strncpy(buf, s, n);
-      buf[n] = '\0';
-      return (int)n;
-    }
-    return 0;
-}
-
-
 // We don't build a headless jre for Windows
 bool os::is_headless_jre() { return false; }
 
--- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -28,6 +28,8 @@
   static void setup_fpu();
   static bool supports_sse();
 
+  static jlong rdtsc();
+
   static bool is_allocatable(size_t bytes);
 
   // Used to register dynamic code cache area with the OS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.inline.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_BSD_X86_VM_OS_BSD_X86_INLINE_HPP
+#define OS_CPU_BSD_X86_VM_OS_BSD_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details
+inline jlong os::rdtsc() {
+#ifndef AMD64
+  // 64 bit result in edx:eax
+  uint64_t res;
+  __asm__ __volatile__ ("rdtsc" : "=A" (res));
+  return (jlong)res;
+#else
+  uint64_t res;
+  uint32_t ts1, ts2;
+  __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2));
+  res = ((uint64_t)ts1 | (uint64_t)ts2 << 32);
+  return (jlong)res;
+#endif // AMD64
+}
+
+#endif // OS_CPU_BSD_X86_VM_OS_BSD_X86_INLINE_HPP
--- a/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,8 @@
   static void setup_fpu();
   static bool supports_sse();
 
+  static jlong rdtsc();
+
   static bool is_allocatable(size_t bytes);
 
   // Used to register dynamic code cache area with the OS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.inline.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
+#define OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details
+inline jlong os::rdtsc() {
+#ifndef AMD64
+  // 64 bit result in edx:eax
+  uint64_t res;
+  __asm__ __volatile__ ("rdtsc" : "=A" (res));
+  return (jlong)res;
+#else
+  uint64_t res;
+  uint32_t ts1, ts2;
+  __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2));
+  res = ((uint64_t)ts1 | (uint64_t)ts2 << 32);
+  return (jlong)res;
+#endif // AMD64
+}
+
+#endif // OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -46,6 +46,8 @@
 
   static bool supports_sse();
 
+  static jlong rdtsc();
+
   static bool is_allocatable(size_t bytes);
 
   // Used to register dynamic code cache area with the OS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.inline.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_SOLARIS_X86_VM_OS_SOLARIS_X86_INLINE_HPP
+#define OS_CPU_SOLARIS_X86_VM_OS_SOLARIS_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+inline jlong os::rdtsc() { return _raw_rdtsc(); }
+
+#endif // OS_CPU_SOLARIS_X86_VM_OS_SOLARIS_X86_INLINE_HPP
--- a/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_32.il	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_32.il	Wed Jul 05 18:00:12 2017 +0200
@@ -43,6 +43,11 @@
       movl     %ebp, %eax 
       .end
 
+  // Support for os::rdtsc()
+      .inline _raw_rdtsc,0
+      rdtsc
+      .end
+
   // Support for jint Atomic::add(jint inc, volatile jint* dest)
   // An additional bool (os::is_MP()) is passed as the last argument.
       .inline _Atomic_add,3
@@ -113,7 +118,6 @@
       fistpll   (%eax)
       .end
 
-
   // Support for OrderAccess::acquire()
       .inline _OrderAccess_acquire,0
       movl     0(%esp), %eax
--- a/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_64.il	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_64.il	Wed Jul 05 18:00:12 2017 +0200
@@ -30,12 +30,19 @@
       movq     %fs:0, %rax 
       .end
 
-  // Get the frame pointer from current frame.
+  // Get current fp
       .inline _get_current_fp,0
       .volatile
       movq     %rbp, %rax 
       .end
 
+  // Support for os::rdtsc()
+      .inline _raw_rdtsc,0
+      rdtsc
+      salq     $32, %rdx
+      orq      %rdx, %rax
+      .end
+
   // Support for jint Atomic::add(jint add_value, volatile jint* dest)
       .inline _Atomic_add,2
       movl     %edi, %eax      // save add_value for return
--- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -58,6 +58,8 @@
   static void setup_fpu();
   static bool supports_sse() { return true; }
 
+  static jlong rdtsc();
+
   static bool      register_code_area(char *low, char *high);
 
 #endif // OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.inline.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_INLINE_HPP
+#define OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+inline jlong os::rdtsc() {
+  // 32 bit: 64 bit result in edx:eax
+  // 64 bit: 64 bit value in rax
+  uint64_t res;
+  res = (uint64_t)__rdtsc();
+  return (jlong)res;
+}
+
+#endif // OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_INLINE_HPP
--- a/hotspot/src/share/vm/adlc/formssel.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/adlc/formssel.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -627,6 +627,7 @@
   if( strcmp(_matrule->_opType,"MemBarAcquire") == 0 ) return true;
   if( strcmp(_matrule->_opType,"MemBarReleaseLock") == 0 ) return true;
   if( strcmp(_matrule->_opType,"MemBarAcquireLock") == 0 ) return true;
+  if( strcmp(_matrule->_opType,"MemBarStoreStore") == 0 ) return true;
 
   return false;
 }
@@ -3978,7 +3979,8 @@
     !strcmp(_opType,"MemBarAcquireLock") ||
     !strcmp(_opType,"MemBarReleaseLock") ||
     !strcmp(_opType,"MemBarVolatile" ) ||
-    !strcmp(_opType,"MemBarCPUOrder" ) ;
+    !strcmp(_opType,"MemBarCPUOrder" ) ||
+    !strcmp(_opType,"MemBarStoreStore" );
 }
 
 bool MatchRule::is_ideal_loadPC() const {
--- a/hotspot/src/share/vm/asm/assembler.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/asm/assembler.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -61,6 +61,7 @@
   _code_limit  = cs->limit();
   _code_pos    = cs->end();
   _oop_recorder= code->oop_recorder();
+  DEBUG_ONLY( _short_branch_delta = 0; )
   if (_code_begin == NULL)  {
     vm_exit_out_of_memory(0, err_msg("CodeCache: no room for %s",
                                      code->name()));
--- a/hotspot/src/share/vm/asm/assembler.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/asm/assembler.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -241,6 +241,33 @@
   // Make it return true on platforms which need to verify
   // instruction boundaries for some operations.
   inline static bool pd_check_instruction_mark();
+
+  // Add delta to short branch distance to verify that it still fit into imm8.
+  int _short_branch_delta;
+
+  int  short_branch_delta() const { return _short_branch_delta; }
+  void set_short_branch_delta()   { _short_branch_delta = 32; }
+  void clear_short_branch_delta() { _short_branch_delta = 0; }
+
+  class ShortBranchVerifier: public StackObj {
+   private:
+    AbstractAssembler* _assm;
+
+   public:
+    ShortBranchVerifier(AbstractAssembler* assm) : _assm(assm) {
+      assert(assm->short_branch_delta() == 0, "overlapping instructions");
+      _assm->set_short_branch_delta();
+    }
+    ~ShortBranchVerifier() {
+      _assm->clear_short_branch_delta();
+    }
+  };
+  #else
+  // Dummy in product.
+  class ShortBranchVerifier: public StackObj {
+   public:
+    ShortBranchVerifier(AbstractAssembler* assm) {}
+  };
   #endif
 
   // Label functions
--- a/hotspot/src/share/vm/c1/c1_LIR.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -854,6 +854,9 @@
       if (opTypeCheck->_info_for_exception)       do_info(opTypeCheck->_info_for_exception);
       if (opTypeCheck->_info_for_patch)           do_info(opTypeCheck->_info_for_patch);
       if (opTypeCheck->_object->is_valid())       do_input(opTypeCheck->_object);
+      if (op->code() == lir_store_check && opTypeCheck->_object->is_valid()) {
+        do_temp(opTypeCheck->_object);
+      }
       if (opTypeCheck->_array->is_valid())        do_input(opTypeCheck->_array);
       if (opTypeCheck->_tmp1->is_valid())         do_temp(opTypeCheck->_tmp1);
       if (opTypeCheck->_tmp2->is_valid())         do_temp(opTypeCheck->_tmp2);
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1256,8 +1256,7 @@
     info = state_for(x);
   }
   __ move(new LIR_Address(rcvr.result(), oopDesc::klass_offset_in_bytes(), T_OBJECT), result, info);
-  __ move_wide(new LIR_Address(result, Klass::java_mirror_offset_in_bytes() +
-                               klassOopDesc::klass_part_offset_in_bytes(), T_OBJECT), result);
+  __ move_wide(new LIR_Address(result, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
 }
 
 
--- a/hotspot/src/share/vm/c1/c1_Optimizer.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/c1/c1_Optimizer.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -122,18 +122,32 @@
   if (sux != f_goto->default_sux()) return;
 
   // check if at least one word was pushed on sux_state
+  // inlining depths must match
+  ValueStack* if_state = if_->state();
   ValueStack* sux_state = sux->state();
-  if (sux_state->stack_size() <= if_->state()->stack_size()) return;
+  if (if_state->scope()->level() > sux_state->scope()->level()) {
+    while (sux_state->scope() != if_state->scope()) {
+      if_state = if_state->caller_state();
+      assert(if_state != NULL, "states do not match up");
+    }
+  } else if (if_state->scope()->level() < sux_state->scope()->level()) {
+    while (sux_state->scope() != if_state->scope()) {
+      sux_state = sux_state->caller_state();
+      assert(sux_state != NULL, "states do not match up");
+    }
+  }
+
+  if (sux_state->stack_size() <= if_state->stack_size()) return;
 
   // check if phi function is present at end of successor stack and that
   // only this phi was pushed on the stack
-  Value sux_phi = sux_state->stack_at(if_->state()->stack_size());
+  Value sux_phi = sux_state->stack_at(if_state->stack_size());
   if (sux_phi == NULL || sux_phi->as_Phi() == NULL || sux_phi->as_Phi()->block() != sux) return;
-  if (sux_phi->type()->size() != sux_state->stack_size() - if_->state()->stack_size()) return;
+  if (sux_phi->type()->size() != sux_state->stack_size() - if_state->stack_size()) return;
 
   // get the values that were pushed in the true- and false-branch
-  Value t_value = t_goto->state()->stack_at(if_->state()->stack_size());
-  Value f_value = f_goto->state()->stack_at(if_->state()->stack_size());
+  Value t_value = t_goto->state()->stack_at(if_state->stack_size());
+  Value f_value = f_goto->state()->stack_at(if_state->stack_size());
 
   // backend does not support floats
   assert(t_value->type()->base() == f_value->type()->base(), "incompatible types");
@@ -180,11 +194,7 @@
   Goto* goto_ = new Goto(sux, state_before, if_->is_safepoint() || t_goto->is_safepoint() || f_goto->is_safepoint());
 
   // prepare state for Goto
-  ValueStack* goto_state = if_->state();
-  while (sux_state->scope() != goto_state->scope()) {
-    goto_state = goto_state->caller_state();
-    assert(goto_state != NULL, "states do not match up");
-  }
+  ValueStack* goto_state = if_state;
   goto_state = goto_state->copy(ValueStack::StateAfter, goto_state->bci());
   goto_state->push(result->type(), result);
   assert(goto_state->is_same(sux_state), "states must match now");
--- a/hotspot/src/share/vm/ci/ciInstanceKlass.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/ci/ciInstanceKlass.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -54,7 +54,7 @@
   _flags = ciFlags(access_flags);
   _has_finalizer = access_flags.has_finalizer();
   _has_subklass = ik->subklass() != NULL;
-  _init_state = (instanceKlass::ClassState)ik->get_init_state();
+  _init_state = ik->init_state();
   _nonstatic_field_size = ik->nonstatic_field_size();
   _has_nonstatic_fields = ik->has_nonstatic_fields();
   _nonstatic_fields = NULL; // initialized lazily by compute_nonstatic_fields:
@@ -118,7 +118,7 @@
 void ciInstanceKlass::compute_shared_init_state() {
   GUARDED_VM_ENTRY(
     instanceKlass* ik = get_instanceKlass();
-    _init_state = (instanceKlass::ClassState)ik->get_init_state();
+    _init_state = ik->init_state();
   )
 }
 
--- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1589,7 +1589,7 @@
   _next = NULL;
   _on_work_list = false;
   _backedge_copy = false;
-  _exception_entry = false;
+  _has_monitorenter = false;
   _trap_bci = -1;
   _trap_index = 0;
   df_init();
@@ -2182,6 +2182,10 @@
         !head->is_clonable_exit(lp))
       continue;
 
+    // Avoid BoxLock merge.
+    if (EliminateNestedLocks && head->has_monitorenter())
+      continue;
+
     // check not already cloned
     if (head->backedge_copy_count() != 0)
       continue;
@@ -2322,6 +2326,10 @@
     // Watch for bailouts.
     if (failing())  return;
 
+    if (str.cur_bc() == Bytecodes::_monitorenter) {
+      block->set_has_monitorenter();
+    }
+
     if (res) {
 
       // We have encountered a trap.  Record it in this block.
--- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -544,15 +544,19 @@
     // Has this block been cloned for a loop backedge?
     bool                             _backedge_copy;
 
+    // This block is entry to irreducible loop.
+    bool                             _irreducible_entry;
+
+    // This block has monitor entry point.
+    bool                             _has_monitorenter;
+
     // A pointer used for our internal work list
+    bool                             _on_work_list;      // on the work list
     Block*                           _next;
-    bool                             _on_work_list;      // on the work list
     Block*                           _rpo_next;          // Reverse post order list
 
     // Loop info
     Loop*                            _loop;              // nearest loop
-    bool                             _irreducible_entry; // entry to irreducible loop
-    bool                             _exception_entry;   // entry to exception handler
 
     ciBlock*     ciblock() const     { return _ciblock; }
     StateVector* state() const     { return _state; }
@@ -689,6 +693,8 @@
     bool   is_loop_head() const          { return _loop && _loop->head() == this; }
     void   set_irreducible_entry(bool c) { _irreducible_entry = c; }
     bool   is_irreducible_entry() const  { return _irreducible_entry; }
+    void   set_has_monitorenter()        { _has_monitorenter = true; }
+    bool   has_monitorenter() const      { return _has_monitorenter; }
     bool   is_visited() const            { return has_pre_order(); }
     bool   is_post_visited() const       { return has_post_order(); }
     bool   is_clonable_exit(Loop* lp);
--- a/hotspot/src/share/vm/classfile/classFileParser.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/classfile/classFileParser.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -45,6 +45,7 @@
 #include "oops/methodOop.hpp"
 #include "oops/symbol.hpp"
 #include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/perfData.hpp"
 #include "runtime/reflection.hpp"
@@ -1050,7 +1051,7 @@
 
 class FieldAllocationCount: public ResourceObj {
  public:
-  unsigned int count[MAX_FIELD_ALLOCATION_TYPE];
+  u2 count[MAX_FIELD_ALLOCATION_TYPE];
 
   FieldAllocationCount() {
     for (int i = 0; i < MAX_FIELD_ALLOCATION_TYPE; i++) {
@@ -1060,6 +1061,8 @@
 
   FieldAllocationType update(bool is_static, BasicType type) {
     FieldAllocationType atype = basic_type_to_atype(is_static, type);
+    // Make sure there is no overflow with injected fields.
+    assert(count[atype] < 0xFFFF, "More than 65535 fields");
     count[atype]++;
     return atype;
   }
@@ -1070,7 +1073,7 @@
                                               constantPoolHandle cp, bool is_interface,
                                               FieldAllocationCount *fac,
                                               objArrayHandle* fields_annotations,
-                                              int* java_fields_count_ptr, TRAPS) {
+                                              u2* java_fields_count_ptr, TRAPS) {
   ClassFileStream* cfs = stream();
   typeArrayHandle nullHandle;
   cfs->guarantee_more(2, CHECK_(nullHandle));  // length
@@ -2639,8 +2642,11 @@
                                                     TempNewSymbol& parsed_name,
                                                     bool verify,
                                                     TRAPS) {
-  // So that JVMTI can cache class file in the state before retransformable agents
-  // have modified it
+  // When a retransformable agent is attached, JVMTI caches the
+  // class bytes that existed before the first retransformation.
+  // If RedefineClasses() was used before the retransformable
+  // agent attached, then the cached class bytes may not be the
+  // original class bytes.
   unsigned char *cached_class_file_bytes = NULL;
   jint cached_class_file_length;
 
@@ -2660,6 +2666,25 @@
   _max_bootstrap_specifier_index = -1;
 
   if (JvmtiExport::should_post_class_file_load_hook()) {
+    // Get the cached class file bytes (if any) from the class that
+    // is being redefined or retransformed. We use jvmti_thread_state()
+    // instead of JvmtiThreadState::state_for(jt) so we don't allocate
+    // a JvmtiThreadState any earlier than necessary. This will help
+    // avoid the bug described by 7126851.
+    JvmtiThreadState *state = jt->jvmti_thread_state();
+    if (state != NULL) {
+      KlassHandle *h_class_being_redefined =
+                     state->get_class_being_redefined();
+      if (h_class_being_redefined != NULL) {
+        instanceKlassHandle ikh_class_being_redefined =
+          instanceKlassHandle(THREAD, (*h_class_being_redefined)());
+        cached_class_file_bytes =
+          ikh_class_being_redefined->get_cached_class_file_bytes();
+        cached_class_file_length =
+          ikh_class_being_redefined->get_cached_class_file_len();
+      }
+    }
+
     unsigned char* ptr = cfs->buffer();
     unsigned char* end_ptr = cfs->buffer() + cfs->length();
 
@@ -2843,7 +2868,7 @@
       local_interfaces = parse_interfaces(cp, itfs_len, class_loader, protection_domain, _class_name, CHECK_(nullHandle));
     }
 
-    int java_fields_count = 0;
+    u2 java_fields_count = 0;
     // Fields (offsets are filled in later)
     FieldAllocationCount fac;
     objArrayHandle fields_annotations;
--- a/hotspot/src/share/vm/classfile/classFileParser.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/classfile/classFileParser.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -91,7 +91,7 @@
                                constantPoolHandle cp, bool is_interface,
                                FieldAllocationCount *fac,
                                objArrayHandle* fields_annotations,
-                               int* java_fields_count_ptr, TRAPS);
+                               u2* java_fields_count_ptr, TRAPS);
 
   // Method parsing
   methodHandle parse_method(constantPoolHandle cp, bool is_interface,
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -296,6 +296,7 @@
   template(finalize_method_name,                      "finalize")                                 \
   template(reference_lock_name,                       "lock")                                     \
   template(reference_discovered_name,                 "discovered")                               \
+  template(run_finalization_name,                     "runFinalization")                          \
   template(run_finalizers_on_exit_name,               "runFinalizersOnExit")                      \
   template(uncaughtException_name,                    "uncaughtException")                        \
   template(dispatchUncaughtException_name,            "dispatchUncaughtException")                \
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -2598,7 +2598,7 @@
 AdaptiveWeightedAverage CFLS_LAB::_blocks_to_claim[]    =
   VECTOR_257(AdaptiveWeightedAverage(OldPLABWeight, (float)CMSParPromoteBlocksToClaim));
 size_t CFLS_LAB::_global_num_blocks[]  = VECTOR_257(0);
-int    CFLS_LAB::_global_num_workers[] = VECTOR_257(0);
+uint   CFLS_LAB::_global_num_workers[] = VECTOR_257(0);
 
 CFLS_LAB::CFLS_LAB(CompactibleFreeListSpace* cfls) :
   _cfls(cfls)
@@ -2732,7 +2732,7 @@
         // Update globals stats for num_blocks used
         _global_num_blocks[i] += (_num_blocks[i] - num_retire);
         _global_num_workers[i]++;
-        assert(_global_num_workers[i] <= (ssize_t)ParallelGCThreads, "Too big");
+        assert(_global_num_workers[i] <= ParallelGCThreads, "Too big");
         if (num_retire > 0) {
           _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
           // Reset this list.
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -631,7 +631,7 @@
   static AdaptiveWeightedAverage
                  _blocks_to_claim  [CompactibleFreeListSpace::IndexSetSize];
   static size_t _global_num_blocks [CompactibleFreeListSpace::IndexSetSize];
-  static int    _global_num_workers[CompactibleFreeListSpace::IndexSetSize];
+  static uint   _global_num_workers[CompactibleFreeListSpace::IndexSetSize];
   size_t        _num_blocks        [CompactibleFreeListSpace::IndexSetSize];
 
   // Internal work method
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -3779,7 +3779,7 @@
     terminator()->reset_for_reuse(active_workers);
   }
 
-  void work(int i);
+  void work(uint worker_id);
   bool should_yield() {
     return    ConcurrentMarkSweepThread::should_yield()
            && !_collector->foregroundGCIsActive()
@@ -3852,7 +3852,7 @@
 //    . if neither is available, offer termination
 // -- Terminate and return result
 //
-void CMSConcMarkingTask::work(int i) {
+void CMSConcMarkingTask::work(uint worker_id) {
   elapsedTimer _timer;
   ResourceMark rm;
   HandleMark hm;
@@ -3860,37 +3860,40 @@
   DEBUG_ONLY(_collector->verify_overflow_empty();)
 
   // Before we begin work, our work queue should be empty
-  assert(work_queue(i)->size() == 0, "Expected to be empty");
+  assert(work_queue(worker_id)->size() == 0, "Expected to be empty");
   // Scan the bitmap covering _cms_space, tracing through grey objects.
   _timer.start();
-  do_scan_and_mark(i, _cms_space);
+  do_scan_and_mark(worker_id, _cms_space);
   _timer.stop();
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr("Finished cms space scanning in %dth thread: %3.3f sec",
-      i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
+      worker_id, _timer.seconds());
+      // XXX: need xxx/xxx type of notation, two timers
   }
 
   // ... do the same for the _perm_space
   _timer.reset();
   _timer.start();
-  do_scan_and_mark(i, _perm_space);
+  do_scan_and_mark(worker_id, _perm_space);
   _timer.stop();
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr("Finished perm space scanning in %dth thread: %3.3f sec",
-      i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
+      worker_id, _timer.seconds());
+      // XXX: need xxx/xxx type of notation, two timers
   }
 
   // ... do work stealing
   _timer.reset();
   _timer.start();
-  do_work_steal(i);
+  do_work_steal(worker_id);
   _timer.stop();
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr("Finished work stealing in %dth thread: %3.3f sec",
-      i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
+      worker_id, _timer.seconds());
+      // XXX: need xxx/xxx type of notation, two timers
   }
   assert(_collector->_markStack.isEmpty(), "Should have been emptied");
-  assert(work_queue(i)->size() == 0, "Should have been emptied");
+  assert(work_queue(worker_id)->size() == 0, "Should have been emptied");
   // Note that under the current task protocol, the
   // following assertion is true even of the spaces
   // expanded since the completion of the concurrent
@@ -3946,7 +3949,7 @@
   // We allow that there may be no tasks to do here because
   // we are restarting after a stack overflow.
   assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
-  int nth_task = 0;
+  uint nth_task = 0;
 
   HeapWord* aligned_start = sp->bottom();
   if (sp->used_region().contains(_restart_addr)) {
@@ -5075,7 +5078,7 @@
   ParallelTaskTerminator* terminator() { return &_term; }
   int n_workers() { return _n_workers; }
 
-  void work(int i);
+  void work(uint worker_id);
 
  private:
   // Work method in support of parallel rescan ... of young gen spaces
@@ -5096,7 +5099,7 @@
 // also is passed to do_dirty_card_rescan_tasks() and to
 // do_work_steal() to select the i-th task_queue.
 
-void CMSParRemarkTask::work(int i) {
+void CMSParRemarkTask::work(uint worker_id) {
   elapsedTimer _timer;
   ResourceMark rm;
   HandleMark   hm;
@@ -5107,7 +5110,7 @@
   Par_MarkRefsIntoAndScanClosure par_mrias_cl(_collector,
     _collector->_span, _collector->ref_processor(),
     &(_collector->_markBitMap),
-    work_queue(i), &(_collector->_revisitStack));
+    work_queue(worker_id), &(_collector->_revisitStack));
 
   // Rescan young gen roots first since these are likely
   // coarsely partitioned and may, on that account, constitute
@@ -5128,15 +5131,15 @@
     assert(ect <= _collector->_eden_chunk_capacity, "out of bounds");
     assert(sct <= _collector->_survivor_chunk_capacity, "out of bounds");
 
-    do_young_space_rescan(i, &par_mrias_cl, to_space, NULL, 0);
-    do_young_space_rescan(i, &par_mrias_cl, from_space, sca, sct);
-    do_young_space_rescan(i, &par_mrias_cl, eden_space, eca, ect);
+    do_young_space_rescan(worker_id, &par_mrias_cl, to_space, NULL, 0);
+    do_young_space_rescan(worker_id, &par_mrias_cl, from_space, sca, sct);
+    do_young_space_rescan(worker_id, &par_mrias_cl, eden_space, eca, ect);
 
     _timer.stop();
     if (PrintCMSStatistics != 0) {
       gclog_or_tty->print_cr(
         "Finished young gen rescan work in %dth thread: %3.3f sec",
-        i, _timer.seconds());
+        worker_id, _timer.seconds());
     }
   }
 
@@ -5158,7 +5161,7 @@
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr(
       "Finished remaining root rescan work in %dth thread: %3.3f sec",
-      i, _timer.seconds());
+      worker_id, _timer.seconds());
   }
 
   // ---------- rescan dirty cards ------------
@@ -5167,26 +5170,26 @@
 
   // Do the rescan tasks for each of the two spaces
   // (cms_space and perm_space) in turn.
-  // "i" is passed to select the "i-th" task_queue
-  do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
-  do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
+  // "worker_id" is passed to select the task_queue for "worker_id"
+  do_dirty_card_rescan_tasks(_cms_space, worker_id, &par_mrias_cl);
+  do_dirty_card_rescan_tasks(_perm_space, worker_id, &par_mrias_cl);
   _timer.stop();
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr(
       "Finished dirty card rescan work in %dth thread: %3.3f sec",
-      i, _timer.seconds());
+      worker_id, _timer.seconds());
   }
 
   // ---------- steal work from other threads ...
   // ---------- ... and drain overflow list.
   _timer.reset();
   _timer.start();
-  do_work_steal(i, &par_mrias_cl, _collector->hash_seed(i));
+  do_work_steal(worker_id, &par_mrias_cl, _collector->hash_seed(worker_id));
   _timer.stop();
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr(
       "Finished work stealing in %dth thread: %3.3f sec",
-      i, _timer.seconds());
+      worker_id, _timer.seconds());
   }
 }
 
@@ -5207,8 +5210,8 @@
   SequentialSubTasksDone* pst = space->par_seq_tasks();
   assert(pst->valid(), "Uninitialized use?");
 
-  int nth_task = 0;
-  int n_tasks  = pst->n_tasks();
+  uint nth_task = 0;
+  uint n_tasks  = pst->n_tasks();
 
   HeapWord *start, *end;
   while (!pst->is_task_claimed(/* reference */ nth_task)) {
@@ -5220,12 +5223,12 @@
     } else if (nth_task == 0) {
       start = space->bottom();
       end   = chunk_array[nth_task];
-    } else if (nth_task < (jint)chunk_top) {
+    } else if (nth_task < (uint)chunk_top) {
       assert(nth_task >= 1, "Control point invariant");
       start = chunk_array[nth_task - 1];
       end   = chunk_array[nth_task];
     } else {
-      assert(nth_task == (jint)chunk_top, "Control point invariant");
+      assert(nth_task == (uint)chunk_top, "Control point invariant");
       start = chunk_array[chunk_top - 1];
       end   = space->top();
     }
@@ -5288,7 +5291,7 @@
 
   SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
   assert(pst->valid(), "Uninitialized use?");
-  int nth_task = 0;
+  uint nth_task = 0;
   const int alignment = CardTableModRefBS::card_size * BitsPerWord;
   MemRegion span = sp->used_region();
   HeapWord* start_addr = span.start();
@@ -5736,26 +5739,26 @@
                      CMSParKeepAliveClosure* keep_alive,
                      int* seed);
 
-  virtual void work(int i);
+  virtual void work(uint worker_id);
 };
 
-void CMSRefProcTaskProxy::work(int i) {
+void CMSRefProcTaskProxy::work(uint worker_id) {
   assert(_collector->_span.equals(_span), "Inconsistency in _span");
   CMSParKeepAliveClosure par_keep_alive(_collector, _span,
                                         _mark_bit_map,
                                         &_collector->_revisitStack,
-                                        work_queue(i));
+                                        work_queue(worker_id));
   CMSParDrainMarkingStackClosure par_drain_stack(_collector, _span,
                                                  _mark_bit_map,
                                                  &_collector->_revisitStack,
-                                                 work_queue(i));
+                                                 work_queue(worker_id));
   CMSIsAliveClosure is_alive_closure(_span, _mark_bit_map);
-  _task.work(i, is_alive_closure, par_keep_alive, par_drain_stack);
+  _task.work(worker_id, is_alive_closure, par_keep_alive, par_drain_stack);
   if (_task.marks_oops_alive()) {
-    do_work_steal(i, &par_drain_stack, &par_keep_alive,
-                  _collector->hash_seed(i));
-  }
-  assert(work_queue(i)->size() == 0, "work_queue should be empty");
+    do_work_steal(worker_id, &par_drain_stack, &par_keep_alive,
+                  _collector->hash_seed(worker_id));
+  }
+  assert(work_queue(worker_id)->size() == 0, "work_queue should be empty");
   assert(_collector->_overflow_list == NULL, "non-empty _overflow_list");
 }
 
@@ -5769,9 +5772,9 @@
       _task(task)
   { }
 
-  virtual void work(int i)
+  virtual void work(uint worker_id)
   {
-    _task.work(i);
+    _task.work(worker_id);
   }
 };
 
--- a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -264,7 +264,7 @@
     // or some improperly initialized variable with leads to no
     // active threads, protect against that in a product build.
     n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(),
-                     1);
+                     1U);
   }
   size_t max_waste = n_threads * chunkSize;
   // it should be aligned with respect to chunkSize
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "gc_implementation/shared/vmGCOperations.hpp"
@@ -183,12 +184,11 @@
 void CMMarkStack::allocate(size_t size) {
   _base = NEW_C_HEAP_ARRAY(oop, size);
   if (_base == NULL) {
-    vm_exit_during_initialization("Failed to allocate "
-                                  "CM region mark stack");
+    vm_exit_during_initialization("Failed to allocate CM region mark stack");
   }
   _index = 0;
   _capacity = (jint) size;
-  _oops_do_bound = -1;
+  _saved_index = -1;
   NOT_PRODUCT(_max_depth = 0);
 }
 
@@ -283,7 +283,6 @@
   }
 }
 
-
 CMRegionStack::CMRegionStack() : _base(NULL) {}
 
 void CMRegionStack::allocate(size_t size) {
@@ -302,6 +301,8 @@
 }
 
 void CMRegionStack::push_lock_free(MemRegion mr) {
+  guarantee(false, "push_lock_free(): don't call this any more");
+
   assert(mr.word_size() > 0, "Precondition");
   while (true) {
     jint index = _index;
@@ -325,6 +326,8 @@
 // marking / remark phases. Should only be called in tandem with
 // other lock-free pops.
 MemRegion CMRegionStack::pop_lock_free() {
+  guarantee(false, "pop_lock_free(): don't call this any more");
+
   while (true) {
     jint index = _index;
 
@@ -390,6 +393,8 @@
 #endif
 
 bool CMRegionStack::invalidate_entries_into_cset() {
+  guarantee(false, "invalidate_entries_into_cset(): don't call this any more");
+
   bool result = false;
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   for (int i = 0; i < _oops_do_bound; ++i) {
@@ -438,14 +443,29 @@
   return res;
 }
 
+void CMMarkStack::note_start_of_gc() {
+  assert(_saved_index == -1,
+         "note_start_of_gc()/end_of_gc() bracketed incorrectly");
+  _saved_index = _index;
+}
+
+void CMMarkStack::note_end_of_gc() {
+  // This is intentionally a guarantee, instead of an assert. If we
+  // accidentally add something to the mark stack during GC, it
+  // will be a correctness issue so it's better if we crash. we'll
+  // only check this once per GC anyway, so it won't be a performance
+  // issue in any way.
+  guarantee(_saved_index == _index,
+            err_msg("saved index: %d index: %d", _saved_index, _index));
+  _saved_index = -1;
+}
+
 void CMMarkStack::oops_do(OopClosure* f) {
-  if (_index == 0) return;
-  assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
-         "Bound must be set.");
-  for (int i = 0; i < _oops_do_bound; i++) {
+  assert(_saved_index == _index,
+         err_msg("saved index: %d index: %d", _saved_index, _index));
+  for (int i = 0; i < _index; i += 1) {
     f->do_oop(&_base[i]);
   }
-  _oops_do_bound = -1;
 }
 
 bool ConcurrentMark::not_yet_marked(oop obj) const {
@@ -458,8 +478,8 @@
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
 
-size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
-  return MAX2((n_par_threads + 2) / 4, (size_t)1);
+uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
+  return MAX2((n_par_threads + 2) / 4, 1U);
 }
 
 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
@@ -486,7 +506,7 @@
   _regionStack(),
   // _finger set in set_non_marking_state
 
-  _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
+  _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
   // _active_tasks set in set_non_marking_state
   // _tasks set inside the constructor
   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
@@ -506,7 +526,6 @@
   _cleanup_times(),
   _total_counting_time(0.0),
   _total_rs_scrub_time(0.0),
-
   _parallel_workers(NULL) {
   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   if (verbose_level < no_verbose) {
@@ -568,7 +587,7 @@
       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
       // if both are set
 
-      _parallel_marking_threads = ConcGCThreads;
+      _parallel_marking_threads = (uint) ConcGCThreads;
       _max_parallel_marking_threads = _parallel_marking_threads;
       _sleep_factor             = 0.0;
       _marking_task_overhead    = 1.0;
@@ -589,12 +608,12 @@
       double sleep_factor =
                          (1.0 - marking_task_overhead) / marking_task_overhead;
 
-      _parallel_marking_threads = (size_t) marking_thread_num;
+      _parallel_marking_threads = (uint) marking_thread_num;
       _max_parallel_marking_threads = _parallel_marking_threads;
       _sleep_factor             = sleep_factor;
       _marking_task_overhead    = marking_task_overhead;
     } else {
-      _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
+      _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
       _max_parallel_marking_threads = _parallel_marking_threads;
       _sleep_factor             = 0.0;
       _marking_task_overhead    = 1.0;
@@ -618,7 +637,7 @@
 
     guarantee(parallel_marking_threads() > 0, "peace of mind");
     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
-         (int) _max_parallel_marking_threads, false, true);
+         _max_parallel_marking_threads, false, true);
     if (_parallel_workers == NULL) {
       vm_exit_during_initialization("Failed necessary allocation.");
     } else {
@@ -691,7 +710,7 @@
   set_concurrent_marking_in_progress();
 }
 
-void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
+void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
   assert(active_tasks <= _max_task_num, "we should not have more");
 
   _active_tasks = active_tasks;
@@ -727,12 +746,8 @@
 }
 
 ConcurrentMark::~ConcurrentMark() {
-  for (int i = 0; i < (int) _max_task_num; ++i) {
-    delete _task_queues->queue(i);
-    delete _tasks[i];
-  }
-  delete _task_queues;
-  FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
+  // The ConcurrentMark instance is never freed.
+  ShouldNotReachHere();
 }
 
 // This closure is used to mark refs into the g1 generation
@@ -788,7 +803,7 @@
 public:
   bool doHeapRegion(HeapRegion* r) {
     if (!r->continuesHumongous()) {
-      r->note_start_of_marking(true);
+      r->note_start_of_marking();
     }
     return false;
   }
@@ -809,6 +824,10 @@
 
   // Initialise marking structures. This has to be done in a STW phase.
   reset();
+
+  // For each region note start of marking.
+  NoteStartOfMarkHRClosure startcl;
+  g1h->heap_region_iterate(&startcl);
 }
 
 
@@ -823,10 +842,6 @@
   // every remark and we'll eventually not need to cause one.
   force_overflow_stw()->init();
 
-  // For each region note start of marking.
-  NoteStartOfMarkHRClosure startcl;
-  g1h->heap_region_iterate(&startcl);
-
   // Start Concurrent Marking weak-reference discovery.
   ReferenceProcessor* rp = g1h->ref_processor_cm();
   // enable ("weak") refs discovery
@@ -951,22 +966,9 @@
 }
 #endif // !PRODUCT
 
-void ConcurrentMark::grayRoot(oop p) {
-  HeapWord* addr = (HeapWord*) p;
-  // We can't really check against _heap_start and _heap_end, since it
-  // is possible during an evacuation pause with piggy-backed
-  // initial-mark that the committed space is expanded during the
-  // pause without CM observing this change. So the assertions below
-  // is a bit conservative; but better than nothing.
-  assert(_g1h->g1_committed().contains(addr),
-         "address should be within the heap bounds");
-
-  if (!_nextMarkBitMap->isMarked(addr)) {
-    _nextMarkBitMap->parMark(addr);
-  }
-}
-
 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
+  guarantee(false, "grayRegionIfNecessary(): don't call this any more");
+
   // The objects on the region have already been marked "in bulk" by
   // the caller. We only need to decide whether to push the region on
   // the region stack or not.
@@ -1012,6 +1014,8 @@
 }
 
 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
+  guarantee(false, "markAndGrayObjectIfNecessary(): don't call this any more");
+
   // The object is not marked by the caller. We need to at least mark
   // it and maybe push in on the stack.
 
@@ -1048,7 +1052,7 @@
   ConcurrentMarkThread* _cmt;
 
 public:
-  void work(int worker_i) {
+  void work(uint worker_id) {
     assert(Thread::current()->is_ConcurrentGC_thread(),
            "this should only be done by a conc GC thread");
     ResourceMark rm;
@@ -1057,8 +1061,8 @@
 
     ConcurrentGCThread::stsJoin();
 
-    assert((size_t) worker_i < _cm->active_tasks(), "invariant");
-    CMTask* the_task = _cm->task(worker_i);
+    assert(worker_id < _cm->active_tasks(), "invariant");
+    CMTask* the_task = _cm->task(worker_id);
     the_task->record_start_time();
     if (!_cm->has_aborted()) {
       do {
@@ -1076,7 +1080,7 @@
         double elapsed_time_sec = end_time_sec - start_time_sec;
         _cm->clear_has_overflown();
 
-        bool ret = _cm->do_yield_check(worker_i);
+        bool ret = _cm->do_yield_check(worker_id);
 
         jlong sleep_time_ms;
         if (!_cm->has_aborted() && the_task->has_aborted()) {
@@ -1105,7 +1109,7 @@
     ConcurrentGCThread::stsLeave();
 
     double end_vtime = os::elapsedVTime();
-    _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
+    _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
   }
 
   CMConcurrentMarkingTask(ConcurrentMark* cm,
@@ -1117,9 +1121,9 @@
 
 // Calculates the number of active workers for a concurrent
 // phase.
-size_t ConcurrentMark::calc_parallel_marking_threads() {
+uint ConcurrentMark::calc_parallel_marking_threads() {
   if (G1CollectedHeap::use_parallel_gc_threads()) {
-    size_t n_conc_workers = 0;
+    uint n_conc_workers = 0;
     if (!UseDynamicNumberOfGCThreads ||
         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
          !ForceDynamicNumberOfGCThreads)) {
@@ -1159,7 +1163,7 @@
   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
     "Maximum number of marking threads exceeded");
 
-  size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
+  uint active_workers = MAX2(1U, parallel_marking_threads());
 
   // Parallel task terminator is set in "set_phase()"
   set_phase(active_workers, true /* concurrent */);
@@ -1229,7 +1233,6 @@
                                        true /* expected_active */);
 
     if (VerifyDuringGC) {
-
       HandleMark hm;  // handle scope
       gclog_or_tty->print(" VerifyDuringGC:(after)");
       Universe::heap()->prepare_for_verify();
@@ -1503,7 +1506,7 @@
 protected:
   G1CollectedHeap* _g1h;
   CMBitMap* _bm;
-  size_t _n_workers;
+  uint    _n_workers;
   size_t *_live_bytes;
   size_t *_used_bytes;
   BitMap* _region_bm;
@@ -1535,13 +1538,13 @@
     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
   }
 
-  void work(int i) {
+  void work(uint worker_id) {
     CalcLiveObjectsClosure calccl(true /*final*/,
                                   _bm, _g1h->concurrent_mark(),
                                   _region_bm, _card_bm);
     calccl.no_yield();
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1h->heap_region_par_iterate_chunked(&calccl, i,
+      _g1h->heap_region_par_iterate_chunked(&calccl, worker_id,
                                             (int) _n_workers,
                                             HeapRegion::FinalCountClaimValue);
     } else {
@@ -1549,19 +1552,19 @@
     }
     assert(calccl.complete(), "Shouldn't have yielded!");
 
-    assert((size_t) i < _n_workers, "invariant");
-    _live_bytes[i] = calccl.tot_live();
-    _used_bytes[i] = calccl.tot_used();
+    assert(worker_id < _n_workers, "invariant");
+    _live_bytes[worker_id] = calccl.tot_live();
+    _used_bytes[worker_id] = calccl.tot_used();
   }
   size_t live_bytes()  {
     size_t live_bytes = 0;
-    for (size_t i = 0; i < _n_workers; ++i)
+    for (uint i = 0; i < _n_workers; ++i)
       live_bytes += _live_bytes[i];
     return live_bytes;
   }
   size_t used_bytes()  {
     size_t used_bytes = 0;
-    for (size_t i = 0; i < _n_workers; ++i)
+    for (uint i = 0; i < _n_workers; ++i)
       used_bytes += _used_bytes[i];
     return used_bytes;
   }
@@ -1646,18 +1649,18 @@
     AbstractGangTask("G1 note end"), _g1h(g1h),
     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
 
-  void work(int i) {
+  void work(uint worker_id) {
     double start = os::elapsedTime();
     FreeRegionList local_cleanup_list("Local Cleanup List");
     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
     HRRSCleanupTask hrrs_cleanup_task;
-    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
+    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
                                            &old_proxy_set,
                                            &humongous_proxy_set,
                                            &hrrs_cleanup_task);
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
+      _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
                                             _g1h->workers()->active_workers(),
                                             HeapRegion::NoteEndClaimValue);
     } else {
@@ -1701,8 +1704,8 @@
     double end = os::elapsedTime();
     if (G1PrintParCleanupStats) {
       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
-                          "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
-                          i, start, end, (end-start)*1000.0,
+                          "claimed %u regions (tot = %8.3f ms, max = %8.3f ms).\n",
+                          worker_id, start, end, (end-start)*1000.0,
                           g1_note_end.regions_claimed(),
                           g1_note_end.claimed_region_time_sec()*1000.0,
                           g1_note_end.max_region_time_sec()*1000.0);
@@ -1724,9 +1727,9 @@
     _region_bm(region_bm), _card_bm(card_bm)
   {}
 
-  void work(int i) {
+  void work(uint worker_id) {
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1rs->scrub_par(_region_bm, _card_bm, i,
+      _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
                        HeapRegion::ScrubRemSetClaimValue);
     } else {
       _g1rs->scrub(_region_bm, _card_bm);
@@ -1766,7 +1769,7 @@
 
   HeapRegionRemSet::reset_for_cleanup_tasks();
 
-  size_t n_workers;
+  uint n_workers;
 
   // Do counting once more with the world stopped for good measure.
   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
@@ -1778,7 +1781,7 @@
 
     g1h->set_par_threads();
     n_workers = g1h->n_par_threads();
-    assert(g1h->n_par_threads() == (int) n_workers,
+    assert(g1h->n_par_threads() == n_workers,
            "Should not have been reset");
     g1h->workers()->run_task(&g1_par_count_task);
     // Done with the parallel phase so reset to 0.
@@ -1884,10 +1887,6 @@
   double end = os::elapsedTime();
   _cleanup_times.add((end - start) * 1000.0);
 
-  // G1CollectedHeap::heap()->print();
-  // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
-  // G1CollectedHeap::heap()->get_gc_time_stamp());
-
   if (PrintGC || PrintGCDetails) {
     g1h->print_size_transition(gclog_or_tty,
                                start_used_bytes,
@@ -2169,13 +2168,13 @@
     AbstractGangTask("Process reference objects in parallel"),
     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
 
-  virtual void work(int i) {
-    CMTask* marking_task = _cm->task(i);
+  virtual void work(uint worker_id) {
+    CMTask* marking_task = _cm->task(worker_id);
     G1CMIsAliveClosure g1_is_alive(_g1h);
     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
 
-    _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
+    _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
   }
 };
 
@@ -2201,8 +2200,8 @@
     AbstractGangTask("Enqueue reference objects in parallel"),
     _enq_task(enq_task) { }
 
-  virtual void work(int i) {
-    _enq_task.work(i);
+  virtual void work(uint worker_id) {
+    _enq_task.work(worker_id);
   }
 };
 
@@ -2249,8 +2248,8 @@
 
     // We use the work gang from the G1CollectedHeap and we utilize all
     // the worker threads.
-    int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
-    active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
+    uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
+    active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
 
     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
                                               g1h->workers(), active_workers);
@@ -2314,11 +2313,11 @@
   ConcurrentMark *_cm;
 
 public:
-  void work(int worker_i) {
+  void work(uint worker_id) {
     // Since all available tasks are actually started, we should
     // only proceed if we're supposed to be actived.
-    if ((size_t)worker_i < _cm->active_tasks()) {
-      CMTask* task = _cm->task(worker_i);
+    if (worker_id < _cm->active_tasks()) {
+      CMTask* task = _cm->task(worker_id);
       task->record_start_time();
       do {
         task->do_marking_step(1000000000.0 /* something very large */,
@@ -2347,10 +2346,10 @@
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     G1CollectedHeap::StrongRootsScope srs(g1h);
     // this is remark, so we'll use up all active threads
-    int active_workers = g1h->workers()->active_workers();
+    uint active_workers = g1h->workers()->active_workers();
     if (active_workers == 0) {
       assert(active_workers > 0, "Should have been set earlier");
-      active_workers = ParallelGCThreads;
+      active_workers = (uint) ParallelGCThreads;
       g1h->workers()->set_active_workers(active_workers);
     }
     set_phase(active_workers, false /* concurrent */);
@@ -2366,7 +2365,7 @@
   } else {
     G1CollectedHeap::StrongRootsScope srs(g1h);
     // this is remark, so we'll use up all available threads
-    int active_workers = 1;
+    uint active_workers = 1;
     set_phase(active_workers, false /* concurrent */);
 
     CMRemarkTask remarkTask(this, active_workers);
@@ -2674,6 +2673,8 @@
 }
 
 void ConcurrentMark::drainAllSATBBuffers() {
+  guarantee(false, "drainAllSATBBuffers(): don't call this any more");
+
   CMGlobalObjectClosure oc(this);
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   satb_mq_set.set_closure(&oc);
@@ -2692,12 +2693,6 @@
   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
 }
 
-void ConcurrentMark::markPrev(oop p) {
-  // Note we are overriding the read-only view of the prev map here, via
-  // the cast.
-  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
-}
-
 void ConcurrentMark::clear(oop p) {
   assert(p != NULL && p->is_oop(), "expected an oop");
   HeapWord* addr = (HeapWord*)p;
@@ -2707,13 +2702,21 @@
   _nextMarkBitMap->clear(addr);
 }
 
-void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
+void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
   // Note we are overriding the read-only view of the prev map here, via
   // the cast.
   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
+}
+
+void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
   _nextMarkBitMap->clearRange(mr);
 }
 
+void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
+  clearRangePrevBitmap(mr);
+  clearRangeNextBitmap(mr);
+}
+
 HeapRegion*
 ConcurrentMark::claim_region(int task_num) {
   // "checkpoint" the finger
@@ -2808,6 +2811,9 @@
 }
 
 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
+  guarantee(false, "invalidate_aborted_regions_in_cset(): "
+                   "don't call this any more");
+
   bool result = false;
   for (int i = 0; i < (int)_max_task_num; ++i) {
     CMTask* the_task = _tasks[i];
@@ -2859,24 +2865,135 @@
     // ...then over the contents of the all the task queues.
     queue->oops_do(cl);
   }
-
-  // Invalidate any entries, that are in the region stack, that
-  // point into the collection set
-  if (_regionStack.invalidate_entries_into_cset()) {
-    // otherwise, any gray objects copied during the evacuation pause
-    // might not be visited.
-    assert(_should_gray_objects, "invariant");
+}
+
+#ifndef PRODUCT
+enum VerifyNoCSetOopsPhase {
+  VerifyNoCSetOopsStack,
+  VerifyNoCSetOopsQueues,
+  VerifyNoCSetOopsSATBCompleted,
+  VerifyNoCSetOopsSATBThread
+};
+
+class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
+private:
+  G1CollectedHeap* _g1h;
+  VerifyNoCSetOopsPhase _phase;
+  int _info;
+
+  const char* phase_str() {
+    switch (_phase) {
+    case VerifyNoCSetOopsStack:         return "Stack";
+    case VerifyNoCSetOopsQueues:        return "Queue";
+    case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
+    case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
+    default:                            ShouldNotReachHere();
+    }
+    return NULL;
+  }
+
+  void do_object_work(oop obj) {
+    guarantee(!_g1h->obj_in_cs(obj),
+              err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
+                      (void*) obj, phase_str(), _info));
+  }
+
+public:
+  VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
+
+  void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
+    _phase = phase;
+    _info = info;
+  }
+
+  virtual void do_oop(oop* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
+    do_object_work(obj);
+  }
+
+  virtual void do_oop(narrowOop* p) {
+    // We should not come across narrow oops while scanning marking
+    // stacks and SATB buffers.
+    ShouldNotReachHere();
+  }
+
+  virtual void do_object(oop obj) {
+    do_object_work(obj);
+  }
+};
+
+void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
+                                         bool verify_enqueued_buffers,
+                                         bool verify_thread_buffers,
+                                         bool verify_fingers) {
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
+  if (!G1CollectedHeap::heap()->mark_in_progress()) {
+    return;
   }
 
-  // Invalidate any aborted regions, recorded in the individual CM
-  // tasks, that point into the collection set.
-  if (invalidate_aborted_regions_in_cset()) {
-    // otherwise, any gray objects copied during the evacuation pause
-    // might not be visited.
-    assert(_should_gray_objects, "invariant");
+  VerifyNoCSetOopsClosure cl;
+
+  if (verify_stacks) {
+    // Verify entries on the global mark stack
+    cl.set_phase(VerifyNoCSetOopsStack);
+    _markStack.oops_do(&cl);
+
+    // Verify entries on the task queues
+    for (int i = 0; i < (int) _max_task_num; i += 1) {
+      cl.set_phase(VerifyNoCSetOopsQueues, i);
+      OopTaskQueue* queue = _task_queues->queue(i);
+      queue->oops_do(&cl);
+    }
+  }
+
+  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
+
+  // Verify entries on the enqueued SATB buffers
+  if (verify_enqueued_buffers) {
+    cl.set_phase(VerifyNoCSetOopsSATBCompleted);
+    satb_qs.iterate_completed_buffers_read_only(&cl);
+  }
+
+  // Verify entries on the per-thread SATB buffers
+  if (verify_thread_buffers) {
+    cl.set_phase(VerifyNoCSetOopsSATBThread);
+    satb_qs.iterate_thread_buffers_read_only(&cl);
   }
 
+  if (verify_fingers) {
+    // Verify the global finger
+    HeapWord* global_finger = finger();
+    if (global_finger != NULL && global_finger < _heap_end) {
+      // The global finger always points to a heap region boundary. We
+      // use heap_region_containing_raw() to get the containing region
+      // given that the global finger could be pointing to a free region
+      // which subsequently becomes continues humongous. If that
+      // happens, heap_region_containing() will return the bottom of the
+      // corresponding starts humongous region and the check below will
+      // not hold any more.
+      HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
+      guarantee(global_finger == global_hr->bottom(),
+                err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
+                        global_finger, HR_FORMAT_PARAMS(global_hr)));
+    }
+
+    // Verify the task fingers
+    assert(parallel_marking_threads() <= _max_task_num, "sanity");
+    for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
+      CMTask* task = _tasks[i];
+      HeapWord* task_finger = task->finger();
+      if (task_finger != NULL && task_finger < _heap_end) {
+        // See above note on the global finger verification.
+        HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
+        guarantee(task_finger == task_hr->bottom() ||
+                  !task_hr->in_collection_set(),
+                  err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
+                          task_finger, HR_FORMAT_PARAMS(task_hr)));
+      }
+    }
+  }
 }
+#endif // PRODUCT
 
 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
   _markStack.setEmpty();
@@ -2921,7 +3038,7 @@
   int              _ms_size;
   int              _ms_ind;
   int              _array_increment;
-  int              _worker_i;
+  uint             _worker_id;
 
   bool push(oop obj, int arr_ind = 0) {
     if (_ms_ind == _ms_size) {
@@ -2971,7 +3088,7 @@
   }
 
 public:
-  CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
+  CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, uint worker_id) :
     _g1h(G1CollectedHeap::heap()),
     _cm(cm),
     _bm(cm->nextMarkBitMap()),
@@ -2979,7 +3096,7 @@
     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
     _array_increment(MAX2(ms_size/8, 16)),
-    _worker_i(worker_i) { }
+    _worker_id(worker_id) { }
 
   ~CSetMarkOopClosure() {
     FREE_C_HEAP_ARRAY(oop, _ms);
@@ -3024,14 +3141,14 @@
   CMBitMap*          _bitMap;
   ConcurrentMark*    _cm;
   CSetMarkOopClosure _oop_cl;
-  int                _worker_i;
+  uint               _worker_id;
 
 public:
-  CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
+  CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_id) :
     _g1h(G1CollectedHeap::heap()),
     _bitMap(cm->nextMarkBitMap()),
-    _oop_cl(cm, ms_size, worker_i),
-    _worker_i(worker_i) { }
+    _oop_cl(cm, ms_size, worker_id),
+    _worker_id(worker_id) { }
 
   bool do_bit(size_t offset) {
     // convert offset into a HeapWord*
@@ -3056,17 +3173,17 @@
 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
   CMBitMap*             _bm;
   CSetMarkBitMapClosure _bit_cl;
-  int                   _worker_i;
+  uint                  _worker_id;
 
   enum SomePrivateConstants {
     MSSize = 1000
   };
 
 public:
-  CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) :
+  CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_id) :
     _bm(cm->nextMarkBitMap()),
-    _bit_cl(cm, MSSize, worker_i),
-    _worker_i(worker_i) { }
+    _bit_cl(cm, MSSize, worker_id),
+    _worker_id(worker_id) { }
 
   bool doHeapRegion(HeapRegion* hr) {
     if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
@@ -3085,19 +3202,6 @@
   }
 };
 
-class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
-  jint _claim_value;
-
-public:
-  SetClaimValuesInCSetHRClosure(jint claim_value) :
-    _claim_value(claim_value) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    hr->set_claim_value(_claim_value);
-    return false;
-  }
-};
-
 class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
 protected:
   G1CollectedHeap* _g1h;
@@ -3109,14 +3213,17 @@
     AbstractGangTask("Complete Mark in CSet"),
     _g1h(g1h), _cm(cm) { }
 
-  void work(int worker_i) {
-    CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i);
-    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i);
+  void work(uint worker_id) {
+    CompleteMarkingInCSetHRClosure cmplt(_cm, worker_id);
+    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
     _g1h->collection_set_iterate_from(hr, &cmplt);
   }
 };
 
 void ConcurrentMark::complete_marking_in_collection_set() {
+  guarantee(false, "complete_marking_in_collection_set(): "
+                   "don't call this any more");
+
   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
 
   if (!g1h->mark_in_progress()) {
@@ -3140,9 +3247,8 @@
 
   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
 
-  // Now reset the claim values in the regions in the collection set.
-  SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
-  g1h->collection_set_iterate(&set_cv_cl);
+  // Reset the claim values in the regions in the collection set.
+  g1h->reset_cset_heap_region_claim_values();
 
   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
 
@@ -3165,6 +3271,8 @@
 // newCSet().
 
 void ConcurrentMark::newCSet() {
+  guarantee(false, "newCSet(): don't call this any more");
+
   if (!concurrent_marking_in_progress()) {
     // nothing to do if marking is not in progress
     return;
@@ -3203,6 +3311,8 @@
 }
 
 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
+  guarantee(false, "registerCSetRegion(): don't call this any more");
+
   if (!concurrent_marking_in_progress()) return;
 
   HeapWord* region_end = hr->end();
@@ -3214,6 +3324,9 @@
 // Resets the region fields of active CMTasks whose values point
 // into the collection set.
 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
+  guarantee(false, "reset_active_task_region_fields_in_cset(): "
+                   "don't call this any more");
+
   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
   assert(parallel_marking_threads() <= _max_task_num, "sanity");
 
@@ -3307,13 +3420,13 @@
 // the CMS bit map. Called at the first checkpoint.
 
 // We take a break if someone is trying to stop the world.
-bool ConcurrentMark::do_yield_check(int worker_i) {
+bool ConcurrentMark::do_yield_check(uint worker_id) {
   if (should_yield()) {
-    if (worker_i == 0) {
+    if (worker_id == 0) {
       _g1h->g1_policy()->record_concurrent_pause();
     }
     cmThread()->yield();
-    if (worker_i == 0) {
+    if (worker_id == 0) {
       _g1h->g1_policy()->record_concurrent_pause_end();
     }
     return true;
@@ -3924,6 +4037,10 @@
 }
 
 void CMTask::drain_region_stack(BitMapClosure* bc) {
+  assert(_cm->region_stack_empty(), "region stack should be empty");
+  assert(_aborted_region.is_empty(), "aborted region should be empty");
+  return;
+
   if (has_aborted()) return;
 
   assert(_region_finger == NULL,
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -166,10 +166,10 @@
 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   ConcurrentMark* _cm;
-  oop*   _base;      // bottom of stack
-  jint   _index;     // one more than last occupied index
-  jint   _capacity;  // max #elements
-  jint   _oops_do_bound;  // Number of elements to include in next iteration.
+  oop*   _base;        // bottom of stack
+  jint   _index;       // one more than last occupied index
+  jint   _capacity;    // max #elements
+  jint   _saved_index; // value of _index saved at start of GC
   NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
 
   bool   _overflow;
@@ -247,16 +247,12 @@
 
   void setEmpty()   { _index = 0; clear_overflow(); }
 
-  // Record the current size; a subsequent "oops_do" will iterate only over
-  // indices valid at the time of this call.
-  void set_oops_do_bound(jint bound = -1) {
-    if (bound == -1) {
-      _oops_do_bound = _index;
-    } else {
-      _oops_do_bound = bound;
-    }
-  }
-  jint oops_do_bound() { return _oops_do_bound; }
+  // Record the current index.
+  void note_start_of_gc();
+
+  // Make sure that we have not added any entries to the stack during GC.
+  void note_end_of_gc();
+
   // iterate over the oops in the mark stack, up to the bound recorded via
   // the call above.
   void oops_do(OopClosure* f);
@@ -374,9 +370,9 @@
 protected:
   ConcurrentMarkThread* _cmThread;   // the thread doing the work
   G1CollectedHeap*      _g1h;        // the heap.
-  size_t                _parallel_marking_threads; // the number of marking
+  uint                  _parallel_marking_threads; // the number of marking
                                                    // threads we're use
-  size_t                _max_parallel_marking_threads; // max number of marking
+  uint                  _max_parallel_marking_threads; // max number of marking
                                                    // threads we'll ever use
   double                _sleep_factor; // how much we have to sleep, with
                                        // respect to the work we just did, to
@@ -412,8 +408,8 @@
                                     // last claimed region
 
   // marking tasks
-  size_t                  _max_task_num; // maximum task number
-  size_t                  _active_tasks; // task num currently active
+  uint                    _max_task_num; // maximum task number
+  uint                    _active_tasks; // task num currently active
   CMTask**                _tasks;        // task queue array (max_task_num len)
   CMTaskQueueSet*         _task_queues;  // task queue set
   ParallelTaskTerminator  _terminator;   // for termination
@@ -492,7 +488,7 @@
 
   // It should be called to indicate which phase we're in (concurrent
   // mark or remark) and how many threads are currently active.
-  void set_phase(size_t active_tasks, bool concurrent);
+  void set_phase(uint active_tasks, bool concurrent);
   // We do this after we're done with marking so that the marking data
   // structures are initialised to a sensible and predictable state.
   void set_non_marking_state();
@@ -505,8 +501,8 @@
   }
 
   // accessor methods
-  size_t parallel_marking_threads() { return _parallel_marking_threads; }
-  size_t max_parallel_marking_threads() { return _max_parallel_marking_threads;}
+  uint parallel_marking_threads() { return _parallel_marking_threads; }
+  uint max_parallel_marking_threads() { return _max_parallel_marking_threads;}
   double sleep_factor()             { return _sleep_factor; }
   double marking_task_overhead()    { return _marking_task_overhead;}
   double cleanup_sleep_factor()     { return _cleanup_sleep_factor; }
@@ -514,7 +510,7 @@
 
   HeapWord*               finger()        { return _finger;   }
   bool                    concurrent()    { return _concurrent; }
-  size_t                  active_tasks()  { return _active_tasks; }
+  uint                    active_tasks()  { return _active_tasks; }
   ParallelTaskTerminator* terminator()    { return &_terminator; }
 
   // It claims the next available region to be scanned by a marking
@@ -715,19 +711,18 @@
   // Returns the number of GC threads to be used in a concurrent
   // phase based on the number of GC threads being used in a STW
   // phase.
-  size_t scale_parallel_threads(size_t n_par_threads);
+  uint scale_parallel_threads(uint n_par_threads);
 
   // Calculates the number of GC threads to be used in a concurrent phase.
-  size_t calc_parallel_marking_threads();
+  uint calc_parallel_marking_threads();
 
   // The following three are interaction between CM and
   // G1CollectedHeap
 
   // This notifies CM that a root during initial-mark needs to be
-  // grayed and it's MT-safe. Currently, we just mark it. But, in the
-  // future, we can experiment with pushing it on the stack and we can
-  // do this without changing G1CollectedHeap.
-  void grayRoot(oop p);
+  // grayed. It is MT-safe.
+  inline void grayRoot(oop obj, size_t word_size);
+
   // It's used during evacuation pauses to gray a region, if
   // necessary, and it's MT-safe. It assumes that the caller has
   // marked any objects on that region. If _should_gray_objects is
@@ -735,6 +730,7 @@
   // pushed on the region stack, if it is located below the global
   // finger, otherwise we do nothing.
   void grayRegionIfNecessary(MemRegion mr);
+
   // It's used during evacuation pauses to mark and, if necessary,
   // gray a single object and it's MT-safe. It assumes the caller did
   // not mark the object. If _should_gray_objects is true and we're
@@ -791,24 +787,40 @@
 
   // Mark in the previous bitmap.  NB: this is usually read-only, so use
   // this carefully!
-  void markPrev(oop p);
+  inline void markPrev(oop p);
+  inline void markNext(oop p);
   void clear(oop p);
-  // Clears marks for all objects in the given range, for both prev and
-  // next bitmaps.  NB: the previous bitmap is usually read-only, so use
-  // this carefully!
-  void clearRangeBothMaps(MemRegion mr);
+  // Clears marks for all objects in the given range, for the prev,
+  // next, or both bitmaps.  NB: the previous bitmap is usually
+  // read-only, so use this carefully!
+  void clearRangePrevBitmap(MemRegion mr);
+  void clearRangeNextBitmap(MemRegion mr);
+  void clearRangeBothBitmaps(MemRegion mr);
 
-  // Record the current top of the mark and region stacks; a
-  // subsequent oops_do() on the mark stack and
-  // invalidate_entries_into_cset() on the region stack will iterate
-  // only over indices valid at the time of this call.
-  void set_oops_do_bound() {
-    _markStack.set_oops_do_bound();
-    _regionStack.set_oops_do_bound();
+  // Notify data structures that a GC has started.
+  void note_start_of_gc() {
+    _markStack.note_start_of_gc();
   }
+
+  // Notify data structures that a GC is finished.
+  void note_end_of_gc() {
+    _markStack.note_end_of_gc();
+  }
+
   // Iterate over the oops in the mark stack and all local queues. It
   // also calls invalidate_entries_into_cset() on the region stack.
   void oops_do(OopClosure* f);
+
+  // Verify that there are no CSet oops on the stacks (taskqueues /
+  // global mark stack), enqueued SATB buffers, per-thread SATB
+  // buffers, and fingers (global / per-task). The boolean parameters
+  // decide which of the above data structures to verify. If marking
+  // is not in progress, it's a no-op.
+  void verify_no_cset_oops(bool verify_stacks,
+                           bool verify_enqueued_buffers,
+                           bool verify_thread_buffers,
+                           bool verify_fingers) PRODUCT_RETURN;
+
   // It is called at the end of an evacuation pause during marking so
   // that CM is notified of where the new end of the heap is. It
   // doesn't do anything if concurrent_marking_in_progress() is false,
@@ -873,7 +885,7 @@
     return _prevMarkBitMap->isMarked(addr);
   }
 
-  inline bool do_yield_check(int worker_i = 0);
+  inline bool do_yield_check(uint worker_i = 0);
   inline bool should_yield();
 
   // Called to abort the marking cycle after a Full GC takes palce.
@@ -1166,6 +1178,7 @@
   // It keeps picking SATB buffers and processing them until no SATB
   // buffers are available.
   void drain_satb_buffers();
+
   // It keeps popping regions from the region stack and processing
   // them until the region stack is empty.
   void drain_region_stack(BitMapClosure* closure);
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -153,4 +153,46 @@
   }
 }
 
+inline void ConcurrentMark::markPrev(oop p) {
+  assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::markNext(oop p) {
+  assert(!_nextMarkBitMap->isMarked((HeapWord*) p), "sanity");
+  _nextMarkBitMap->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::grayRoot(oop obj, size_t word_size) {
+  HeapWord* addr = (HeapWord*) obj;
+
+  // Currently we don't do anything with word_size but we will use it
+  // in the very near future in the liveness calculation piggy-backing
+  // changes.
+
+#ifdef ASSERT
+  HeapRegion* hr = _g1h->heap_region_containing(addr);
+  assert(hr != NULL, "sanity");
+  assert(!hr->is_survivor(), "should not allocate survivors during IM");
+  assert(addr < hr->next_top_at_mark_start(),
+         err_msg("addr: "PTR_FORMAT" hr: "HR_FORMAT" NTAMS: "PTR_FORMAT,
+                 addr, HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start()));
+  // We cannot assert that word_size == obj->size() given that obj
+  // might not be in a consistent state (another thread might be in
+  // the process of copying it). So the best thing we can do is to
+  // assert that word_size is under an upper bound which is its
+  // containing region's capacity.
+  assert(word_size * HeapWordSize <= hr->capacity(),
+         err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
+                 word_size * HeapWordSize, hr->capacity(),
+                 HR_FORMAT_PARAMS(hr)));
+#endif // ASSERT
+
+  if (!_nextMarkBitMap->isMarked(addr)) {
+    _nextMarkBitMap->parMark(addr);
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,11 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
+#include "gc_implementation/g1/g1EvacFailure.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "gc_implementation/g1/vm_operations_g1.hpp"
@@ -591,17 +593,29 @@
     }
     res = new_region_try_secondary_free_list();
   }
-  if (res == NULL && do_expand) {
+  if (res == NULL && do_expand && _expand_heap_after_alloc_failure) {
+    // Currently, only attempts to allocate GC alloc regions set
+    // do_expand to true. So, we should only reach here during a
+    // safepoint. If this assumption changes we might have to
+    // reconsider the use of _expand_heap_after_alloc_failure.
+    assert(SafepointSynchronize::is_at_safepoint(), "invariant");
+
     ergo_verbose1(ErgoHeapSizing,
                   "attempt heap expansion",
                   ergo_format_reason("region allocation request failed")
                   ergo_format_byte("allocation request"),
                   word_size * HeapWordSize);
     if (expand(word_size * HeapWordSize)) {
-      // Even though the heap was expanded, it might not have reached
-      // the desired size. So, we cannot assume that the allocation
-      // will succeed.
+      // Given that expand() succeeded in expanding the heap, and we
+      // always expand the heap by an amount aligned to the heap
+      // region size, the free list should in theory not be empty. So
+      // it would probably be OK to use remove_head(). But the extra
+      // check for NULL is unlikely to be a performance issue here (we
+      // just expanded the heap!) so let's just be conservative and
+      // use remove_head_or_null().
       res = _free_list.remove_head_or_null();
+    } else {
+      _expand_heap_after_alloc_failure = false;
     }
   }
   return res;
@@ -1165,9 +1179,9 @@
       _g1(g1)
   { }
 
-  void work(int i) {
-    RebuildRSOutOfRegionClosure rebuild_rs(_g1, i);
-    _g1->heap_region_par_iterate_chunked(&rebuild_rs, i,
+  void work(uint worker_id) {
+    RebuildRSOutOfRegionClosure rebuild_rs(_g1, worker_id);
+    _g1->heap_region_par_iterate_chunked(&rebuild_rs, worker_id,
                                           _g1->workers()->active_workers(),
                                          HeapRegion::RebuildRSClaimValue);
   }
@@ -1374,7 +1388,7 @@
 
     // Rebuild remembered sets of all regions.
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      int n_workers =
+      uint n_workers =
         AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
                                        workers()->active_workers(),
                                        Threads::number_of_non_daemon_threads());
@@ -1838,6 +1852,7 @@
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
   _retained_old_gc_alloc_region(NULL),
+  _expand_heap_after_alloc_failure(true),
   _surviving_young_words(NULL),
   _full_collections_completed(0),
   _in_cset_fast_test(NULL),
@@ -2519,11 +2534,11 @@
 
 void
 G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
-                                                 int worker,
-                                                 int no_of_par_workers,
+                                                 uint worker,
+                                                 uint no_of_par_workers,
                                                  jint claim_value) {
   const size_t regions = n_regions();
-  const size_t max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+  const uint max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
                              no_of_par_workers :
                              1);
   assert(UseDynamicNumberOfGCThreads ||
@@ -2605,12 +2620,16 @@
   }
 };
 
-void
-G1CollectedHeap::reset_heap_region_claim_values() {
+void G1CollectedHeap::reset_heap_region_claim_values() {
   ResetClaimValuesClosure blk;
   heap_region_iterate(&blk);
 }
 
+void G1CollectedHeap::reset_cset_heap_region_claim_values() {
+  ResetClaimValuesClosure blk;
+  collection_set_iterate(&blk);
+}
+
 #ifdef ASSERT
 // This checks whether all regions in the heap have the correct claim
 // value. I also piggy-backed on this a check to ensure that the
@@ -2739,7 +2758,7 @@
   result = g1_policy()->collection_set();
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     size_t cs_size = g1_policy()->cset_region_length();
-    int active_workers = workers()->active_workers();
+    uint active_workers = workers()->active_workers();
     assert(UseDynamicNumberOfGCThreads ||
              active_workers == workers()->total_workers(),
              "Unless dynamic should use total workers");
@@ -3000,14 +3019,20 @@
       } else {
         VerifyObjsInRegionClosure not_dead_yet_cl(r, _vo);
         r->object_iterate(&not_dead_yet_cl);
-        if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
-          gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
-                                 "max_live_bytes "SIZE_FORMAT" "
-                                 "< calculated "SIZE_FORMAT,
-                                 r->bottom(), r->end(),
-                                 r->max_live_bytes(),
+        if (_vo != VerifyOption_G1UseNextMarking) {
+          if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
+            gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
+                                   "max_live_bytes "SIZE_FORMAT" "
+                                   "< calculated "SIZE_FORMAT,
+                                   r->bottom(), r->end(),
+                                   r->max_live_bytes(),
                                  not_dead_yet_cl.live_bytes());
-          _failures = true;
+            _failures = true;
+          }
+        } else {
+          // When vo == UseNextMarking we cannot currently do a sanity
+          // check on the live bytes as the calculation has not been
+          // finalized yet.
         }
       }
     }
@@ -3075,10 +3100,10 @@
     return _failures;
   }
 
-  void work(int worker_i) {
+  void work(uint worker_id) {
     HandleMark hm;
     VerifyRegionClosure blk(_allow_dirty, true, _vo);
-    _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
+    _g1h->heap_region_par_iterate_chunked(&blk, worker_id,
                                           _g1h->workers()->active_workers(),
                                           HeapRegion::ParVerifyClaimValue);
     if (blk.failures()) {
@@ -3641,25 +3666,6 @@
         }
         perm_gen()->save_marks();
 
-        // We must do this before any possible evacuation that should propagate
-        // marks.
-        if (mark_in_progress()) {
-          double start_time_sec = os::elapsedTime();
-
-          _cm->drainAllSATBBuffers();
-          double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
-          g1_policy()->record_satb_drain_time(finish_mark_ms);
-        }
-        // Record the number of elements currently on the mark stack, so we
-        // only iterate over these.  (Since evacuation may add to the mark
-        // stack, doing more exposes race conditions.)  If no mark is in
-        // progress, this will be zero.
-        _cm->set_oops_do_bound();
-
-        if (mark_in_progress()) {
-          concurrent_mark()->newCSet();
-        }
-
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nBefore choosing collection set.\nYoung_list:");
         _young_list->print();
@@ -3668,6 +3674,16 @@
 
         g1_policy()->choose_collection_set(target_pause_time_ms);
 
+        _cm->note_start_of_gc();
+        // We should not verify the per-thread SATB buffers given that
+        // we have not filtered them yet (we'll do so during the
+        // GC). We also call this after choose_collection_set() to
+        // ensure that the CSet has been finalized.
+        _cm->verify_no_cset_oops(true  /* verify_stacks */,
+                                 true  /* verify_enqueued_buffers */,
+                                 false /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+
         if (_hr_printer.is_active()) {
           HeapRegion* hr = g1_policy()->collection_set();
           while (hr != NULL) {
@@ -3684,16 +3700,6 @@
           }
         }
 
-        // We have chosen the complete collection set. If marking is
-        // active then, we clear the region fields of any of the
-        // concurrent marking tasks whose region fields point into
-        // the collection set as these values will become stale. This
-        // will cause the owning marking threads to claim a new region
-        // when marking restarts.
-        if (mark_in_progress()) {
-          concurrent_mark()->reset_active_task_region_fields_in_cset();
-        }
-
 #ifdef ASSERT
         VerifyCSetClosure cl;
         collection_set_iterate(&cl);
@@ -3707,6 +3713,16 @@
         // Actually do the work...
         evacuate_collection_set();
 
+        // We do this to mainly verify the per-thread SATB buffers
+        // (which have been filtered by now) since we didn't verify
+        // them earlier. No point in re-checking the stacks / enqueued
+        // buffers given that the CSet has not changed since last time
+        // we checked.
+        _cm->verify_no_cset_oops(false /* verify_stacks */,
+                                 false /* verify_enqueued_buffers */,
+                                 true  /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+
         free_collection_set(g1_policy()->collection_set());
         g1_policy()->clear_collection_set();
 
@@ -3775,6 +3791,8 @@
           size_t expand_bytes = g1_policy()->expansion_amount();
           if (expand_bytes > 0) {
             size_t bytes_before = capacity();
+            // No need for an ergo verbose message here,
+            // expansion_amount() does this when it returns a value > 0.
             if (!expand(expand_bytes)) {
               // We failed to expand the heap so let's verify that
               // committed/uncommitted amount match the backing store
@@ -3784,6 +3802,14 @@
           }
         }
 
+        // We redo the verificaiton but now wrt to the new CSet which
+        // has just got initialized after the previous CSet was freed.
+        _cm->verify_no_cset_oops(true  /* verify_stacks */,
+                                 true  /* verify_enqueued_buffers */,
+                                 true  /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+        _cm->note_end_of_gc();
+
         double end_time_sec = os::elapsedTime();
         double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
         g1_policy()->record_pause_time_ms(pause_time_ms);
@@ -3831,21 +3857,6 @@
         // CM reference discovery will be re-enabled if necessary.
       }
 
-      {
-        size_t expand_bytes = g1_policy()->expansion_amount();
-        if (expand_bytes > 0) {
-          size_t bytes_before = capacity();
-          // No need for an ergo verbose message here,
-          // expansion_amount() does this when it returns a value > 0.
-          if (!expand(expand_bytes)) {
-            // We failed to expand the heap so let's verify that
-            // committed/uncommitted amount match the backing store
-            assert(capacity() == _g1_storage.committed_size(), "committed size mismatch");
-            assert(max_capacity() == _g1_storage.reserved_size(), "reserved size mismatch");
-          }
-        }
-      }
-
       // We should do this after we potentially expand the heap so
       // that all the COMMIT events are generated before the end GC
       // event, and after we retire the GC alloc regions so that all
@@ -3949,6 +3960,8 @@
     // we allocate to in the region sets. We'll re-add it later, when
     // it's retired again.
     _old_set.remove(retained_region);
+    bool during_im = g1_policy()->during_initial_mark_pause();
+    retained_region->note_start_of_copying(during_im);
     _old_gc_alloc_region.set(retained_region);
     _hr_printer.reuse(retained_region);
   }
@@ -3985,157 +3998,26 @@
   _evac_failure_scan_stack = NULL;
 }
 
-class UpdateRSetDeferred : public OopsInHeapRegionClosure {
-private:
-  G1CollectedHeap* _g1;
-  DirtyCardQueue *_dcq;
-  CardTableModRefBS* _ct_bs;
-
-public:
-  UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
-    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    assert(_from->is_in_reserved(p), "paranoia");
-    if (!_from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) &&
-        !_from->is_survivor()) {
-      size_t card_index = _ct_bs->index_for(p);
-      if (_ct_bs->mark_card_deferred(card_index)) {
-        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
-      }
-    }
-  }
-};
-
-class RemoveSelfPointerClosure: public ObjectClosure {
-private:
-  G1CollectedHeap* _g1;
-  ConcurrentMark* _cm;
-  HeapRegion* _hr;
-  size_t _prev_marked_bytes;
-  size_t _next_marked_bytes;
-  OopsInHeapRegionClosure *_cl;
-public:
-  RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr,
-                           OopsInHeapRegionClosure* cl) :
-    _g1(g1), _hr(hr), _cm(_g1->concurrent_mark()),  _prev_marked_bytes(0),
-    _next_marked_bytes(0), _cl(cl) {}
-
-  size_t prev_marked_bytes() { return _prev_marked_bytes; }
-  size_t next_marked_bytes() { return _next_marked_bytes; }
-
-  // <original comment>
-  // The original idea here was to coalesce evacuated and dead objects.
-  // However that caused complications with the block offset table (BOT).
-  // In particular if there were two TLABs, one of them partially refined.
-  // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~|
-  // The BOT entries of the unrefined part of TLAB_2 point to the start
-  // of TLAB_2. If the last object of the TLAB_1 and the first object
-  // of TLAB_2 are coalesced, then the cards of the unrefined part
-  // would point into middle of the filler object.
-  // The current approach is to not coalesce and leave the BOT contents intact.
-  // </original comment>
-  //
-  // We now reset the BOT when we start the object iteration over the
-  // region and refine its entries for every object we come across. So
-  // the above comment is not really relevant and we should be able
-  // to coalesce dead objects if we want to.
-  void do_object(oop obj) {
-    HeapWord* obj_addr = (HeapWord*) obj;
-    assert(_hr->is_in(obj_addr), "sanity");
-    size_t obj_size = obj->size();
-    _hr->update_bot_for_object(obj_addr, obj_size);
-    if (obj->is_forwarded() && obj->forwardee() == obj) {
-      // The object failed to move.
-      assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
-      _cm->markPrev(obj);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
-      _prev_marked_bytes += (obj_size * HeapWordSize);
-      if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
-        _cm->markAndGrayObjectIfNecessary(obj);
-      }
-      obj->set_mark(markOopDesc::prototype());
-      // While we were processing RSet buffers during the
-      // collection, we actually didn't scan any cards on the
-      // collection set, since we didn't want to update remebered
-      // sets with entries that point into the collection set, given
-      // that live objects fromthe collection set are about to move
-      // and such entries will be stale very soon. This change also
-      // dealt with a reliability issue which involved scanning a
-      // card in the collection set and coming across an array that
-      // was being chunked and looking malformed. The problem is
-      // that, if evacuation fails, we might have remembered set
-      // entries missing given that we skipped cards on the
-      // collection set. So, we'll recreate such entries now.
-      obj->oop_iterate(_cl);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
-    } else {
-      // The object has been either evacuated or is dead. Fill it with a
-      // dummy object.
-      MemRegion mr((HeapWord*)obj, obj_size);
-      CollectedHeap::fill_with_object(mr);
-      _cm->clearRangeBothMaps(mr);
-    }
-  }
-};
-
 void G1CollectedHeap::remove_self_forwarding_pointers() {
-  UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
-  DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
-  UpdateRSetDeferred deferred_update(_g1h, &dcq);
-  OopsInHeapRegionClosure *cl;
-  if (G1DeferredRSUpdate) {
-    cl = &deferred_update;
+  assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
+  assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
+
+  G1ParRemoveSelfForwardPtrsTask rsfp_task(this);
+
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    set_par_threads();
+    workers()->run_task(&rsfp_task);
+    set_par_threads(0);
   } else {
-    cl = &immediate_update;
-  }
-  HeapRegion* cur = g1_policy()->collection_set();
-  while (cur != NULL) {
-    assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
-    assert(!cur->isHumongous(), "sanity");
-
-    if (cur->evacuation_failed()) {
-      assert(cur->in_collection_set(), "bad CS");
-      RemoveSelfPointerClosure rspc(_g1h, cur, cl);
-
-      // In the common case we make sure that this is done when the
-      // region is freed so that it is "ready-to-go" when it's
-      // re-allocated. However, when evacuation failure happens, a
-      // region will remain in the heap and might ultimately be added
-      // to a CSet in the future. So we have to be careful here and
-      // make sure the region's RSet is ready for parallel iteration
-      // whenever this might be required in the future.
-      cur->rem_set()->reset_for_par_iteration();
-      cur->reset_bot();
-      cl->set_region(cur);
-      cur->object_iterate(&rspc);
-
-      // A number of manipulations to make the TAMS be the current top,
-      // and the marked bytes be the ones observed in the iteration.
-      if (_g1h->concurrent_mark()->at_least_one_mark_complete()) {
-        // The comments below are the postconditions achieved by the
-        // calls.  Note especially the last such condition, which says that
-        // the count of marked bytes has been properly restored.
-        cur->note_start_of_marking(false);
-        // _next_top_at_mark_start == top, _next_marked_bytes == 0
-        cur->add_to_marked_bytes(rspc.prev_marked_bytes());
-        // _next_marked_bytes == prev_marked_bytes.
-        cur->note_end_of_marking();
-        // _prev_top_at_mark_start == top(),
-        // _prev_marked_bytes == prev_marked_bytes
-      }
-      // If there is no mark in progress, we modified the _next variables
-      // above needlessly, but harmlessly.
-      if (_g1h->mark_in_progress()) {
-        cur->note_start_of_marking(false);
-        // _next_top_at_mark_start == top, _next_marked_bytes == 0
-        // _next_marked_bytes == next_marked_bytes.
-      }
-    }
-    cur = cur->next_in_collection_set();
-  }
+    rsfp_task.work(0);
+  }
+
+  assert(check_cset_heap_region_claim_values(HeapRegion::ParEvacFailureClaimValue), "sanity");
+
+  // Reset the claim values in the regions in the collection set.
+  reset_cset_heap_region_claim_values();
+
+  assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
   assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
 
   // Now restore saved marks, if any.
@@ -4148,6 +4030,7 @@
       markOop m = _preserved_marks_of_objs->at(i);
       obj->set_mark(m);
     }
+
     // Delete the preserved marks growable arrays (allocated on the C heap).
     delete _objs_with_preserved_marks;
     delete _preserved_marks_of_objs;
@@ -4172,8 +4055,7 @@
 
 oop
 G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
-                                               oop old,
-                                               bool should_mark_root) {
+                                               oop old) {
   assert(obj_in_cs(old),
          err_msg("obj: "PTR_FORMAT" should still be in the CSet",
                  (HeapWord*) old));
@@ -4182,15 +4064,6 @@
   if (forward_ptr == NULL) {
     // Forward-to-self succeeded.
 
-    // should_mark_root will be true when this routine is called
-    // from a root scanning closure during an initial mark pause.
-    // In this case the thread that succeeds in self-forwarding the
-    // object is also responsible for marking the object.
-    if (should_mark_root) {
-      assert(!oopDesc::is_null(old), "shouldn't be");
-      _cm->grayRoot(old);
-    }
-
     if (_evac_failure_closure != cl) {
       MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
       assert(!_drain_in_progress,
@@ -4286,30 +4159,8 @@
   return NULL;
 }
 
-#ifndef PRODUCT
-bool GCLabBitMapClosure::do_bit(size_t offset) {
-  HeapWord* addr = _bitmap->offsetToHeapWord(offset);
-  guarantee(_cm->isMarked(oop(addr)), "it should be!");
-  return true;
-}
-#endif // PRODUCT
-
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
-  ParGCAllocBuffer(gclab_word_size),
-  _should_mark_objects(false),
-  _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size),
-  _retired(false)
-{
-  //_should_mark_objects is set to true when G1ParCopyHelper needs to
-  // mark the forwarded location of an evacuated object.
-  // We set _should_mark_objects to true if marking is active, i.e. when we
-  // need to propagate a mark, or during an initial mark pause, i.e. when we
-  // need to mark objects immediately reachable by the roots.
-  if (G1CollectedHeap::heap()->mark_in_progress() ||
-      G1CollectedHeap::heap()->g1_policy()->during_initial_mark_pause()) {
-    _should_mark_objects = true;
-  }
-}
+  ParGCAllocBuffer(gclab_word_size), _retired(false) { }
 
 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
   : _g1h(g1h),
@@ -4323,8 +4174,7 @@
     _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
     _age_table(false),
     _strong_roots_time(0), _term_time(0),
-    _alloc_buffer_waste(0), _undo_waste(0)
-{
+    _alloc_buffer_waste(0), _undo_waste(0) {
   // we allocate G1YoungSurvRateNumRegions plus one entries, since
   // we "sacrifice" entry 0 to keep track of surviving bytes for
   // non-young regions (where the age is -1)
@@ -4429,35 +4279,53 @@
   } while (!refs()->is_empty());
 }
 
-G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1,
+                                     G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state),
   _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
   _mark_in_progress(_g1->mark_in_progress()) { }
 
-template <class T> void G1ParCopyHelper::mark_object(T* p) {
-  // This is called from do_oop_work for objects that are not
-  // in the collection set. Objects in the collection set
-  // are marked after they have been evacuated.
-
-  T heap_oop = oopDesc::load_heap_oop(p);
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop(heap_oop);
-    HeapWord* addr = (HeapWord*)obj;
-    if (_g1->is_in_g1_reserved(addr)) {
-      _cm->grayRoot(oop(addr));
-    }
-  }
-}
-
-oop G1ParCopyHelper::copy_to_survivor_space(oop old, bool should_mark_root,
-                                                     bool should_mark_copy) {
+void G1ParCopyHelper::mark_object(oop obj) {
+#ifdef ASSERT
+  HeapRegion* hr = _g1->heap_region_containing(obj);
+  assert(hr != NULL, "sanity");
+  assert(!hr->in_collection_set(), "should not mark objects in the CSet");
+#endif // ASSERT
+
+  // We know that the object is not moving so it's safe to read its size.
+  _cm->grayRoot(obj, (size_t) obj->size());
+}
+
+void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) {
+#ifdef ASSERT
+  assert(from_obj->is_forwarded(), "from obj should be forwarded");
+  assert(from_obj->forwardee() == to_obj, "to obj should be the forwardee");
+  assert(from_obj != to_obj, "should not be self-forwarded");
+
+  HeapRegion* from_hr = _g1->heap_region_containing(from_obj);
+  assert(from_hr != NULL, "sanity");
+  assert(from_hr->in_collection_set(), "from obj should be in the CSet");
+
+  HeapRegion* to_hr = _g1->heap_region_containing(to_obj);
+  assert(to_hr != NULL, "sanity");
+  assert(!to_hr->in_collection_set(), "should not mark objects in the CSet");
+#endif // ASSERT
+
+  // The object might be in the process of being copied by another
+  // worker so we cannot trust that its to-space image is
+  // well-formed. So we have to read its size from its from-space
+  // image which we know should not be changing.
+  _cm->grayRoot(to_obj, (size_t) from_obj->size());
+}
+
+oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
   size_t    word_sz = old->size();
   HeapRegion* from_region = _g1->heap_region_containing_raw(old);
   // +1 to make the -1 indexes valid...
   int       young_index = from_region->young_index_in_cset()+1;
-  assert( (from_region->is_young() && young_index > 0) ||
-          (!from_region->is_young() && young_index == 0), "invariant" );
+  assert( (from_region->is_young() && young_index >  0) ||
+         (!from_region->is_young() && young_index == 0), "invariant" );
   G1CollectorPolicy* g1p = _g1->g1_policy();
   markOop m = old->mark();
   int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
@@ -4471,7 +4339,7 @@
     // This will either forward-to-self, or detect that someone else has
     // installed a forwarding pointer.
     OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
-    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root);
+    return _g1->handle_evacuation_failure_par(cl, old);
   }
 
   // We're going to allocate linearly, so might as well prefetch ahead.
@@ -4507,28 +4375,14 @@
       obj->set_mark(m);
     }
 
-    // Mark the evacuated object or propagate "next" mark bit
-    if (should_mark_copy) {
-      if (!use_local_bitmaps ||
-          !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
-        // if we couldn't mark it on the local bitmap (this happens when
-        // the object was not allocated in the GCLab), we have to bite
-        // the bullet and do the standard parallel mark
-        _cm->markAndGrayObjectIfNecessary(obj);
-      }
-
-      if (_g1->isMarkedNext(old)) {
-        // Unmark the object's old location so that marking
-        // doesn't think the old object is alive.
-        _cm->nextMarkBitMap()->parClear((HeapWord*)old);
-      }
-    }
-
     size_t* surv_young_words = _par_scan_state->surviving_young_words();
     surv_young_words[young_index] += word_sz;
 
     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
-      arrayOop(old)->set_length(0);
+      // We keep track of the next start index in the length field of
+      // the to-space object. The actual length can be found in the
+      // length field of the from-space object.
+      arrayOop(obj)->set_length(0);
       oop* old_p = set_partial_array_mask(old);
       _par_scan_state->push_on_queue(old_p);
     } else {
@@ -4550,61 +4404,24 @@
 ::do_oop_work(T* p) {
   oop obj = oopDesc::load_decode_heap_oop(p);
   assert(barrier != G1BarrierRS || obj != NULL,
-         "Precondition: G1BarrierRS implies obj is nonNull");
-
-  // Marking:
-  // If the object is in the collection set, then the thread
-  // that copies the object should mark, or propagate the
-  // mark to, the evacuated object.
-  // If the object is not in the collection set then we
-  // should call the mark_object() method depending on the
-  // value of the template parameter do_mark_object (which will
-  // be true for root scanning closures during an initial mark
-  // pause).
-  // The mark_object() method first checks whether the object
-  // is marked and, if not, attempts to mark the object.
+         "Precondition: G1BarrierRS implies obj is non-NULL");
 
   // here the null check is implicit in the cset_fast_test() test
   if (_g1->in_cset_fast_test(obj)) {
+    oop forwardee;
     if (obj->is_forwarded()) {
-      oopDesc::encode_store_heap_oop(p, obj->forwardee());
-      // If we are a root scanning closure during an initial
-      // mark pause (i.e. do_mark_object will be true) then
-      // we also need to handle marking of roots in the
-      // event of an evacuation failure. In the event of an
-      // evacuation failure, the object is forwarded to itself
-      // and not copied. For root-scanning closures, the
-      // object would be marked after a successful self-forward
-      // but an object could be pointed to by both a root and non
-      // root location and be self-forwarded by a non-root-scanning
-      // closure. Therefore we also have to attempt to mark the
-      // self-forwarded root object here.
-      if (do_mark_object && obj->forwardee() == obj) {
-        mark_object(p);
-      }
+      forwardee = obj->forwardee();
     } else {
-      // During an initial mark pause, objects that are pointed to
-      // by the roots need to be marked - even in the event of an
-      // evacuation failure. We pass the template parameter
-      // do_mark_object (which is true for root scanning closures
-      // during an initial mark pause) to copy_to_survivor_space
-      // which will pass it on to the evacuation failure handling
-      // code. The thread that successfully self-forwards a root
-      // object to itself is responsible for marking the object.
-      bool should_mark_root = do_mark_object;
-
-      // We need to mark the copied object if we're a root scanning
-      // closure during an initial mark pause (i.e. do_mark_object
-      // will be true), or the object is already marked and we need
-      // to propagate the mark to the evacuated copy.
-      bool should_mark_copy = do_mark_object ||
-                              _during_initial_mark ||
-                              (_mark_in_progress && !_g1->is_obj_ill(obj));
-
-      oop copy_oop = copy_to_survivor_space(obj, should_mark_root,
-                                                 should_mark_copy);
-      oopDesc::encode_store_heap_oop(p, copy_oop);
+      forwardee = copy_to_survivor_space(obj);
     }
+    assert(forwardee != NULL, "forwardee should not be NULL");
+    oopDesc::encode_store_heap_oop(p, forwardee);
+    if (do_mark_object && forwardee != obj) {
+      // If the object is self-forwarded we don't need to explicitly
+      // mark it, the evacuation failure protocol will do so.
+      mark_forwarded_object(obj, forwardee);
+    }
+
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
       _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
@@ -4613,8 +4430,8 @@
     // The object is not in collection set. If we're a root scanning
     // closure during an initial mark pause (i.e. do_mark_object will
     // be true) then attempt to mark the object.
-    if (do_mark_object) {
-      mark_object(p);
+    if (do_mark_object && _g1->is_in_g1_reserved(obj)) {
+      mark_object(obj);
     }
   }
 
@@ -4632,35 +4449,51 @@
 
 template <class T> void G1ParScanPartialArrayClosure::do_oop_nv(T* p) {
   assert(has_partial_array_mask(p), "invariant");
-  oop old = clear_partial_array_mask(p);
-  assert(old->is_objArray(), "must be obj array");
-  assert(old->is_forwarded(), "must be forwarded");
-  assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
-
-  objArrayOop obj = objArrayOop(old->forwardee());
-  assert((void*)old != (void*)old->forwardee(), "self forwarding here?");
-  // Process ParGCArrayScanChunk elements now
-  // and push the remainder back onto queue
-  int start     = arrayOop(old)->length();
-  int end       = obj->length();
-  int remainder = end - start;
-  assert(start <= end, "just checking");
+  oop from_obj = clear_partial_array_mask(p);
+
+  assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
+  assert(from_obj->is_objArray(), "must be obj array");
+  objArrayOop from_obj_array = objArrayOop(from_obj);
+  // The from-space object contains the real length.
+  int length                 = from_obj_array->length();
+
+  assert(from_obj->is_forwarded(), "must be forwarded");
+  oop to_obj                 = from_obj->forwardee();
+  assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
+  objArrayOop to_obj_array   = objArrayOop(to_obj);
+  // We keep track of the next start index in the length field of the
+  // to-space object.
+  int next_index             = to_obj_array->length();
+  assert(0 <= next_index && next_index < length,
+         err_msg("invariant, next index: %d, length: %d", next_index, length));
+
+  int start                  = next_index;
+  int end                    = length;
+  int remainder              = end - start;
+  // We'll try not to push a range that's smaller than ParGCArrayScanChunk.
   if (remainder > 2 * ParGCArrayScanChunk) {
-    // Test above combines last partial chunk with a full chunk
     end = start + ParGCArrayScanChunk;
-    arrayOop(old)->set_length(end);
-    // Push remainder.
-    oop* old_p = set_partial_array_mask(old);
-    assert(arrayOop(old)->length() < obj->length(), "Empty push?");
-    _par_scan_state->push_on_queue(old_p);
+    to_obj_array->set_length(end);
+    // Push the remainder before we process the range in case another
+    // worker has run out of things to do and can steal it.
+    oop* from_obj_p = set_partial_array_mask(from_obj);
+    _par_scan_state->push_on_queue(from_obj_p);
   } else {
-    // Restore length so that the heap remains parsable in
-    // case of evacuation failure.
-    arrayOop(old)->set_length(end);
-  }
-  _scanner.set_region(_g1->heap_region_containing_raw(obj));
-  // process our set of indices (include header in first chunk)
-  obj->oop_iterate_range(&_scanner, start, end);
+    assert(length == end, "sanity");
+    // We'll process the final range for this object. Restore the length
+    // so that the heap remains parsable in case of evacuation failure.
+    to_obj_array->set_length(end);
+  }
+  _scanner.set_region(_g1->heap_region_containing_raw(to_obj));
+  // Process indexes [start,end). It will also process the header
+  // along with the first chunk (i.e., the chunk with start == 0).
+  // Note that at this point the length field of to_obj_array is not
+  // correct given that we are using it to keep track of the next
+  // start index. oop_iterate_range() (thankfully!) ignores the length
+  // field and only relies on the start / end parameters.  It does
+  // however return the size of the object which will be incorrect. So
+  // we have to ignore it even if we wanted to use it.
+  to_obj_array->oop_iterate_range(&_scanner, start, end);
 }
 
 class G1ParEvacuateFollowersClosure : public VoidClosure {
@@ -4725,7 +4558,7 @@
   G1CollectedHeap*       _g1h;
   RefToScanQueueSet      *_queues;
   ParallelTaskTerminator _terminator;
-  int _n_workers;
+  uint _n_workers;
 
   Mutex _stats_lock;
   Mutex* stats_lock() { return &_stats_lock; }
@@ -4765,18 +4598,18 @@
     _n_workers = active_workers;
   }
 
-  void work(int i) {
-    if (i >= _n_workers) return;  // no work needed this round
+  void work(uint worker_id) {
+    if (worker_id >= _n_workers) return;  // no work needed this round
 
     double start_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->record_gc_worker_start_time(i, start_time_ms);
+    _g1h->g1_policy()->record_gc_worker_start_time(worker_id, start_time_ms);
 
     ResourceMark rm;
     HandleMark   hm;
 
     ReferenceProcessor*             rp = _g1h->ref_processor_stw();
 
-    G1ParScanThreadState            pss(_g1h, i);
+    G1ParScanThreadState            pss(_g1h, worker_id);
     G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
     G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
     G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);
@@ -4808,7 +4641,7 @@
                                   scan_root_cl,
                                   &push_heap_rs_cl,
                                   scan_perm_cl,
-                                  i);
+                                  worker_id);
     pss.end_strong_roots();
 
     {
@@ -4817,8 +4650,8 @@
       evac.do_void();
       double elapsed_ms = (os::elapsedTime()-start)*1000.0;
       double term_ms = pss.term_time()*1000.0;
-      _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms);
-      _g1h->g1_policy()->record_termination(i, term_ms, pss.term_attempts());
+      _g1h->g1_policy()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
+      _g1h->g1_policy()->record_termination(worker_id, term_ms, pss.term_attempts());
     }
     _g1h->g1_policy()->record_thread_age_table(pss.age_table());
     _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
@@ -4828,12 +4661,12 @@
 
     if (ParallelGCVerbose) {
       MutexLocker x(stats_lock());
-      pss.print_termination_stats(i);
+      pss.print_termination_stats(worker_id);
     }
 
     assert(pss.refs()->is_empty(), "should be empty");
     double end_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->record_gc_worker_end_time(i, end_time_ms);
+    _g1h->g1_policy()->record_gc_worker_end_time(worker_id, end_time_ms);
   }
 };
 
@@ -4893,12 +4726,16 @@
 
   g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
 
-  // Scan strong roots in mark stack.
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
-    concurrent_mark()->oops_do(scan_non_heap_roots);
-  }
-  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
-  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
+  // During conc marking we have to filter the per-thread SATB buffers
+  // to make sure we remove any oops into the CSet (which will show up
+  // as implicitly live).
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) {
+    if (mark_in_progress()) {
+      JavaThread::satb_mark_queue_set().filter_thread_buffers();
+    }
+  }
+  double satb_filtering_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
+  g1_policy()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
   // Now scan the complement of the collection set.
   if (scan_rs != NULL) {
@@ -5091,14 +4928,14 @@
     _terminator(terminator)
   {}
 
-  virtual void work(int i) {
+  virtual void work(uint worker_id) {
     // The reference processing task executed by a single worker.
     ResourceMark rm;
     HandleMark   hm;
 
     G1STWIsAliveClosure is_alive(_g1h);
 
-    G1ParScanThreadState pss(_g1h, i);
+    G1ParScanThreadState pss(_g1h, worker_id);
 
     G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, NULL);
     G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
@@ -5130,7 +4967,7 @@
     G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _task_queues, _terminator);
 
     // Call the reference processing task's work routine.
-    _proc_task.work(i, is_alive, keep_alive, drain_queue);
+    _proc_task.work(worker_id, is_alive, keep_alive, drain_queue);
 
     // Note we cannot assert that the refs array is empty here as not all
     // of the processing tasks (specifically phase2 - pp2_work) execute
@@ -5165,8 +5002,8 @@
     _enq_task(enq_task)
   { }
 
-  virtual void work(int i) {
-    _enq_task.work(i);
+  virtual void work(uint worker_id) {
+    _enq_task.work(worker_id);
   }
 };
 
@@ -5195,7 +5032,7 @@
   G1CollectedHeap* _g1h;
   RefToScanQueueSet      *_queues;
   ParallelTaskTerminator _terminator;
-  int _n_workers;
+  uint _n_workers;
 
 public:
   G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h,int workers, RefToScanQueueSet *task_queues) :
@@ -5206,11 +5043,11 @@
     _n_workers(workers)
   { }
 
-  void work(int i) {
+  void work(uint worker_id) {
     ResourceMark rm;
     HandleMark   hm;
 
-    G1ParScanThreadState            pss(_g1h, i);
+    G1ParScanThreadState            pss(_g1h, worker_id);
     G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, NULL);
     G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
     G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, NULL);
@@ -5246,17 +5083,17 @@
 
     ReferenceProcessor* rp = _g1h->ref_processor_cm();
 
-    int limit = ReferenceProcessor::number_of_subclasses_of_ref() * rp->max_num_q();
-    int stride = MIN2(MAX2(_n_workers, 1), limit);
+    uint limit = ReferenceProcessor::number_of_subclasses_of_ref() * rp->max_num_q();
+    uint stride = MIN2(MAX2(_n_workers, 1U), limit);
 
     // limit is set using max_num_q() - which was set using ParallelGCThreads.
     // So this must be true - but assert just in case someone decides to
     // change the worker ids.
-    assert(0 <= i && i < limit, "sanity");
+    assert(0 <= worker_id && worker_id < limit, "sanity");
     assert(!rp->discovery_is_atomic(), "check this code");
 
     // Select discovered lists [i, i+stride, i+2*stride,...,limit)
-    for (int idx = i; idx < limit; idx += stride) {
+    for (uint idx = worker_id; idx < limit; idx += stride) {
       DiscoveredList& ref_list = rp->discovered_refs()[idx];
 
       DiscoveredListIterator iter(ref_list, &keep_alive, &always_alive);
@@ -5310,7 +5147,7 @@
   // referents points to another object which is also referenced by an
   // object discovered by the STW ref processor.
 
-  int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+  uint active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
                         workers()->active_workers() : 1);
 
   assert(!G1CollectedHeap::use_parallel_gc_threads() ||
@@ -5416,7 +5253,7 @@
   } else {
     // Parallel reference enqueuing
 
-    int active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1);
+    uint active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1);
     assert(active_workers == workers()->active_workers(),
            "Need to reset active_workers");
     assert(rp->num_q() == active_workers, "sanity");
@@ -5439,13 +5276,14 @@
 }
 
 void G1CollectedHeap::evacuate_collection_set() {
+  _expand_heap_after_alloc_failure = true;
   set_evacuation_failed(false);
 
   g1_rem_set()->prepare_for_oops_into_collection_set_do();
   concurrent_g1_refine()->set_use_cache(false);
   concurrent_g1_refine()->clear_hot_cache_claimed_index();
 
-  int n_workers;
+  uint n_workers;
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     n_workers =
       AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
@@ -5516,13 +5354,6 @@
 
   finalize_for_evac_failure();
 
-  // Must do this before clearing the per-region evac-failure flags
-  // (which is currently done when we free the collection set).
-  // We also only do this if marking is actually in progress and so
-  // have to do this before we set the mark_in_progress flag at the
-  // end of an initial mark pause.
-  concurrent_mark()->complete_marking_in_collection_set();
-
   if (evacuation_failed()) {
     remove_self_forwarding_pointers();
     if (PrintGCDetails) {
@@ -5658,7 +5489,7 @@
     AbstractGangTask("G1 Par Cleanup CT Task"),
     _ct_bs(ct_bs), _g1h(g1h) { }
 
-  void work(int i) {
+  void work(uint worker_id) {
     HeapRegion* r;
     while (r = _g1h->pop_dirty_cards_region()) {
       clear_cards(r);
@@ -6141,7 +5972,7 @@
   // Don't change the number of workers.  Use the value previously set
   // in the workgroup.
   assert(G1CollectedHeap::use_parallel_gc_threads(), "shouldn't be here otherwise");
-  int n_workers = workers()->active_workers();
+  uint n_workers = workers()->active_workers();
   assert(UseDynamicNumberOfGCThreads ||
            n_workers == workers()->total_workers(),
       "Otherwise should be using the total number of workers");
@@ -6179,6 +6010,8 @@
       } else {
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Old);
       }
+      bool during_im = g1_policy()->during_initial_mark_pause();
+      new_alloc_region->note_start_of_copying(during_im);
       return new_alloc_region;
     } else {
       g1_policy()->note_alloc_region_limit_reached(ap);
@@ -6190,7 +6023,8 @@
 void G1CollectedHeap::retire_gc_alloc_region(HeapRegion* alloc_region,
                                              size_t allocated_bytes,
                                              GCAllocPurpose ap) {
-  alloc_region->note_end_of_copying();
+  bool during_im = g1_policy()->during_initial_mark_pause();
+  alloc_region->note_end_of_copying(during_im);
   g1_policy()->record_bytes_copied_during_gc(allocated_bytes);
   if (ap == GCAllocForSurvived) {
     young_list()->add_survivor_region(alloc_region);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -285,6 +285,14 @@
   // Typically, it is not full so we should re-use it during the next GC.
   HeapRegion* _retained_old_gc_alloc_region;
 
+  // It specifies whether we should attempt to expand the heap after a
+  // region allocation failure. If heap expansion fails we set this to
+  // false so that we don't re-attempt the heap expansion (it's likely
+  // that subsequent expansion attempts will also fail if one fails).
+  // Currently, it is only consulted during GC and it's reset at the
+  // start of each GC.
+  bool _expand_heap_after_alloc_failure;
+
   // It resets the mutator alloc region before new allocations can take place.
   void init_mutator_alloc_region();
 
@@ -861,8 +869,7 @@
   void finalize_for_evac_failure();
 
   // An attempt to evacuate "obj" has failed; take necessary steps.
-  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj,
-                                    bool should_mark_root);
+  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj);
   void handle_evacuation_failure_common(oop obj, markOop m);
 
   // ("Weak") Reference processing support.
@@ -954,7 +961,7 @@
   unsigned int* _worker_cset_start_region_time_stamp;
 
   enum G1H_process_strong_roots_tasks {
-    G1H_PS_mark_stack_oops_do,
+    G1H_PS_filter_satb_buffers,
     G1H_PS_refProcessor_oops_do,
     // Leave this one last.
     G1H_PS_NumElements
@@ -995,7 +1002,7 @@
   // Initialize weak reference processing.
   virtual void ref_processing_init();
 
-  void set_par_threads(int t) {
+  void set_par_threads(uint t) {
     SharedHeap::set_par_threads(t);
     // Done in SharedHeap but oddly there are
     // two _process_strong_tasks's in a G1CollectedHeap
@@ -1298,13 +1305,17 @@
   // chunk.)  For now requires that "doHeapRegion" always returns "false",
   // i.e., that a closure never attempt to abort a traversal.
   void heap_region_par_iterate_chunked(HeapRegionClosure* blk,
-                                       int worker,
-                                       int no_of_par_workers,
+                                       uint worker,
+                                       uint no_of_par_workers,
                                        jint claim_value);
 
   // It resets all the region claim values to the default.
   void reset_heap_region_claim_values();
 
+  // Resets the claim values of regions in the current
+  // collection set to the default.
+  void reset_cset_heap_region_claim_values();
+
 #ifdef ASSERT
   bool check_heap_region_claim_values(jint claim_value);
 
@@ -1740,10 +1751,8 @@
       _gclab_word_size(gclab_word_size),
       _real_start_word(NULL),
       _real_end_word(NULL),
-      _start_word(NULL)
-  {
-    guarantee( size_in_words() >= bitmap_size_in_words(),
-               "just making sure");
+      _start_word(NULL) {
+    guarantee(false, "GCLabBitMap::GCLabBitmap(): don't call this any more");
   }
 
   inline unsigned heapWordToOffset(HeapWord* addr) {
@@ -1797,6 +1806,8 @@
   }
 
   void set_buffer(HeapWord* start) {
+    guarantee(false, "set_buffer(): don't call this any more");
+
     guarantee(use_local_bitmaps, "invariant");
     clear();
 
@@ -1820,6 +1831,8 @@
 #endif // PRODUCT
 
   void retire() {
+    guarantee(false, "retire(): don't call this any more");
+
     guarantee(use_local_bitmaps, "invariant");
     assert(fields_well_formed(), "invariant");
 
@@ -1853,32 +1866,18 @@
 class G1ParGCAllocBuffer: public ParGCAllocBuffer {
 private:
   bool        _retired;
-  bool        _should_mark_objects;
-  GCLabBitMap _bitmap;
 
 public:
   G1ParGCAllocBuffer(size_t gclab_word_size);
 
-  inline bool mark(HeapWord* addr) {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(_should_mark_objects, "invariant");
-    return _bitmap.mark(addr);
-  }
-
-  inline void set_buf(HeapWord* buf) {
-    if (use_local_bitmaps && _should_mark_objects) {
-      _bitmap.set_buffer(buf);
-    }
+  void set_buf(HeapWord* buf) {
     ParGCAllocBuffer::set_buf(buf);
     _retired = false;
   }
 
-  inline void retire(bool end_of_gc, bool retain) {
+  void retire(bool end_of_gc, bool retain) {
     if (_retired)
       return;
-    if (use_local_bitmaps && _should_mark_objects) {
-      _bitmap.retire();
-    }
     ParGCAllocBuffer::retire(end_of_gc, retain);
     _retired = true;
   }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -136,7 +136,6 @@
   _stop_world_start(0.0),
   _all_stop_world_times_ms(new NumberSeq()),
   _all_yield_times_ms(new NumberSeq()),
-  _using_new_ratio_calculations(false),
 
   _summary(new Summary()),
 
@@ -230,7 +229,9 @@
   _inc_cset_bytes_used_before(0),
   _inc_cset_max_finger(NULL),
   _inc_cset_recorded_rs_lengths(0),
+  _inc_cset_recorded_rs_lengths_diffs(0),
   _inc_cset_predicted_elapsed_time_ms(0.0),
+  _inc_cset_predicted_elapsed_time_ms_diffs(0.0),
 
 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
@@ -280,7 +281,7 @@
 
   _par_last_gc_worker_start_times_ms = new double[_parallel_gc_threads];
   _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads];
-  _par_last_mark_stack_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_satb_filtering_times_ms = new double[_parallel_gc_threads];
 
   _par_last_update_rs_times_ms = new double[_parallel_gc_threads];
   _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
@@ -407,11 +408,7 @@
 
   initialize_all();
   _collectionSetChooser = new CollectionSetChooser();
-}
-
-// Increment "i", mod "len"
-static void inc_mod(int& i, int len) {
-  i++; if (i == len) i = 0;
+  _young_gen_sizer = new G1YoungGenSizer(); // Must be after call to initialize_flags
 }
 
 void G1CollectorPolicy::initialize_flags() {
@@ -423,39 +420,74 @@
   CollectorPolicy::initialize_flags();
 }
 
-// The easiest way to deal with the parsing of the NewSize /
-// MaxNewSize / etc. parameteres is to re-use the code in the
-// TwoGenerationCollectorPolicy class. This is similar to what
-// ParallelScavenge does with its GenerationSizer class (see
-// ParallelScavengeHeap::initialize()). We might change this in the
-// future, but it's a good start.
-class G1YoungGenSizer : public TwoGenerationCollectorPolicy {
-private:
-  size_t size_to_region_num(size_t byte_size) {
-    return MAX2((size_t) 1, byte_size / HeapRegion::GrainBytes);
+G1YoungGenSizer::G1YoungGenSizer() : _sizer_kind(SizerDefaults), _adaptive_size(true) {
+  assert(G1DefaultMinNewGenPercent <= G1DefaultMaxNewGenPercent, "Min larger than max");
+  assert(G1DefaultMinNewGenPercent > 0 && G1DefaultMinNewGenPercent < 100, "Min out of bounds");
+  assert(G1DefaultMaxNewGenPercent > 0 && G1DefaultMaxNewGenPercent < 100, "Max out of bounds");
+
+  if (FLAG_IS_CMDLINE(NewRatio)) {
+    if (FLAG_IS_CMDLINE(NewSize) || FLAG_IS_CMDLINE(MaxNewSize)) {
+      warning("-XX:NewSize and -XX:MaxNewSize override -XX:NewRatio");
+    } else {
+      _sizer_kind = SizerNewRatio;
+      _adaptive_size = false;
+      return;
+    }
   }
 
-public:
-  G1YoungGenSizer() {
-    initialize_flags();
-    initialize_size_info();
+  if (FLAG_IS_CMDLINE(NewSize)) {
+     _min_desired_young_length = MAX2((size_t) 1, NewSize / HeapRegion::GrainBytes);
+    if (FLAG_IS_CMDLINE(MaxNewSize)) {
+      _max_desired_young_length = MAX2((size_t) 1, MaxNewSize / HeapRegion::GrainBytes);
+      _sizer_kind = SizerMaxAndNewSize;
+      _adaptive_size = _min_desired_young_length == _max_desired_young_length;
+    } else {
+      _sizer_kind = SizerNewSizeOnly;
+    }
+  } else if (FLAG_IS_CMDLINE(MaxNewSize)) {
+    _max_desired_young_length = MAX2((size_t) 1, MaxNewSize / HeapRegion::GrainBytes);
+    _sizer_kind = SizerMaxNewSizeOnly;
   }
-  size_t min_young_region_num() {
-    return size_to_region_num(_min_gen0_size);
-  }
-  size_t initial_young_region_num() {
-    return size_to_region_num(_initial_gen0_size);
+}
+
+size_t G1YoungGenSizer::calculate_default_min_length(size_t new_number_of_heap_regions) {
+  size_t default_value = (new_number_of_heap_regions * G1DefaultMinNewGenPercent) / 100;
+  return MAX2((size_t)1, default_value);
+}
+
+size_t G1YoungGenSizer::calculate_default_max_length(size_t new_number_of_heap_regions) {
+  size_t default_value = (new_number_of_heap_regions * G1DefaultMaxNewGenPercent) / 100;
+  return MAX2((size_t)1, default_value);
+}
+
+void G1YoungGenSizer::heap_size_changed(size_t new_number_of_heap_regions) {
+  assert(new_number_of_heap_regions > 0, "Heap must be initialized");
+
+  switch (_sizer_kind) {
+    case SizerDefaults:
+      _min_desired_young_length = calculate_default_min_length(new_number_of_heap_regions);
+      _max_desired_young_length = calculate_default_max_length(new_number_of_heap_regions);
+      break;
+    case SizerNewSizeOnly:
+      _max_desired_young_length = calculate_default_max_length(new_number_of_heap_regions);
+      _max_desired_young_length = MAX2(_min_desired_young_length, _max_desired_young_length);
+      break;
+    case SizerMaxNewSizeOnly:
+      _min_desired_young_length = calculate_default_min_length(new_number_of_heap_regions);
+      _min_desired_young_length = MIN2(_min_desired_young_length, _max_desired_young_length);
+      break;
+    case SizerMaxAndNewSize:
+      // Do nothing. Values set on the command line, don't update them at runtime.
+      break;
+    case SizerNewRatio:
+      _min_desired_young_length = new_number_of_heap_regions / (NewRatio + 1);
+      _max_desired_young_length = _min_desired_young_length;
+      break;
+    default:
+      ShouldNotReachHere();
   }
-  size_t max_young_region_num() {
-    return size_to_region_num(_max_gen0_size);
-  }
-};
-
-void G1CollectorPolicy::update_young_list_size_using_newratio(size_t number_of_heap_regions) {
-  assert(number_of_heap_regions > 0, "Heap must be initialized");
-  size_t young_size = number_of_heap_regions / (NewRatio + 1);
-  _min_desired_young_length = young_size;
-  _max_desired_young_length = young_size;
+
+  assert(_min_desired_young_length <= _max_desired_young_length, "Invalid min/max young gen size values");
 }
 
 void G1CollectorPolicy::init() {
@@ -466,28 +498,10 @@
 
   initialize_gc_policy_counters();
 
-  G1YoungGenSizer sizer;
-  _min_desired_young_length = sizer.min_young_region_num();
-  _max_desired_young_length = sizer.max_young_region_num();
-
-  if (FLAG_IS_CMDLINE(NewRatio)) {
-    if (FLAG_IS_CMDLINE(NewSize) || FLAG_IS_CMDLINE(MaxNewSize)) {
-      warning("-XX:NewSize and -XX:MaxNewSize override -XX:NewRatio");
-    } else {
-      // Treat NewRatio as a fixed size that is only recalculated when the heap size changes
-      update_young_list_size_using_newratio(_g1->n_regions());
-      _using_new_ratio_calculations = true;
-    }
-  }
-
-  assert(_min_desired_young_length <= _max_desired_young_length, "Invalid min/max young gen size values");
-
-  set_adaptive_young_list_length(_min_desired_young_length < _max_desired_young_length);
   if (adaptive_young_list_length()) {
     _young_list_fixed_length = 0;
   } else {
-    assert(_min_desired_young_length == _max_desired_young_length, "Min and max young size differ");
-    _young_list_fixed_length = _min_desired_young_length;
+    _young_list_fixed_length = _young_gen_sizer->min_desired_young_length();
   }
   _free_regions_at_end_of_collection = _g1->free_regions();
   update_young_list_target_length();
@@ -541,11 +555,7 @@
   // smaller than 1.0) we'll get 1.
   _reserve_regions = (size_t) ceil(reserve_regions_d);
 
-  if (_using_new_ratio_calculations) {
-    // -XX:NewRatio was specified so we need to update the
-    // young gen length when the heap size has changed.
-    update_young_list_size_using_newratio(new_number_of_regions);
-  }
+  _young_gen_sizer->heap_size_changed(new_number_of_regions);
 }
 
 size_t G1CollectorPolicy::calculate_young_list_desired_min_length(
@@ -563,14 +573,14 @@
   }
   desired_min_length += base_min_length;
   // make sure we don't go below any user-defined minimum bound
-  return MAX2(_min_desired_young_length, desired_min_length);
+  return MAX2(_young_gen_sizer->min_desired_young_length(), desired_min_length);
 }
 
 size_t G1CollectorPolicy::calculate_young_list_desired_max_length() {
   // Here, we might want to also take into account any additional
   // constraints (i.e., user-defined minimum bound). Currently, we
   // effectively don't set this bound.
-  return _max_desired_young_length;
+  return _young_gen_sizer->max_desired_young_length();
 }
 
 void G1CollectorPolicy::update_young_list_target_length(size_t rs_lengths) {
@@ -895,10 +905,19 @@
     gclog_or_tty->print(" (%s)", gcs_are_young() ? "young" : "mixed");
   }
 
-  // We only need to do this here as the policy will only be applied
-  // to the GC we're about to start. so, no point is calculating this
-  // every time we calculate / recalculate the target young length.
-  update_survivors_policy();
+  if (!during_initial_mark_pause()) {
+    // We only need to do this here as the policy will only be applied
+    // to the GC we're about to start. so, no point is calculating this
+    // every time we calculate / recalculate the target young length.
+    update_survivors_policy();
+  } else {
+    // The marking phase has a "we only copy implicitly live
+    // objects during marking" invariant. The easiest way to ensure it
+    // holds is not to allocate any survivor regions and tenure all
+    // objects. In the future we might change this and handle survivor
+    // regions specially during marking.
+    tenure_all_objects();
+  }
 
   assert(_g1->used() == _g1->recalculate_used(),
          err_msg("sanity, used: "SIZE_FORMAT" recalculate_used: "SIZE_FORMAT,
@@ -929,7 +948,7 @@
   for (int i = 0; i < _parallel_gc_threads; ++i) {
     _par_last_gc_worker_start_times_ms[i] = -1234.0;
     _par_last_ext_root_scan_times_ms[i] = -1234.0;
-    _par_last_mark_stack_scan_times_ms[i] = -1234.0;
+    _par_last_satb_filtering_times_ms[i] = -1234.0;
     _par_last_update_rs_times_ms[i] = -1234.0;
     _par_last_update_rs_processed_buffers[i] = -1234.0;
     _par_last_scan_rs_times_ms[i] = -1234.0;
@@ -1217,7 +1236,7 @@
   // of the PrintGCDetails output, in the non-parallel case.
 
   double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
-  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
+  double satb_filtering_time = avg_value(_par_last_satb_filtering_times_ms);
   double update_rs_time = avg_value(_par_last_update_rs_times_ms);
   double update_rs_processed_buffers =
     sum_of_values(_par_last_update_rs_processed_buffers);
@@ -1226,7 +1245,7 @@
   double termination_time = avg_value(_par_last_termination_times_ms);
 
   double known_time = ext_root_scan_time +
-                      mark_stack_scan_time +
+                      satb_filtering_time +
                       update_rs_time +
                       scan_rs_time +
                       obj_copy_time;
@@ -1272,7 +1291,7 @@
     body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
 
     body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
-    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
+    body_summary->record_satb_filtering_time_ms(satb_filtering_time);
     body_summary->record_update_rs_time_ms(update_rs_time);
     body_summary->record_scan_rs_time_ms(scan_rs_time);
     body_summary->record_obj_copy_time_ms(obj_copy_time);
@@ -1366,16 +1385,12 @@
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
-    if (print_marking_info) {
-      print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
-    }
-
     if (parallel) {
       print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
       print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
       print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
       if (print_marking_info) {
-        print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
+        print_par_stats(2, "SATB Filtering", _par_last_satb_filtering_times_ms);
       }
       print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
       print_par_sizes(3, "Processed Buffers", _par_last_update_rs_processed_buffers);
@@ -1389,7 +1404,7 @@
         _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
 
         double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
-                                   _par_last_mark_stack_scan_times_ms[i] +
+                                   _par_last_satb_filtering_times_ms[i] +
                                    _par_last_update_rs_times_ms[i] +
                                    _par_last_scan_rs_times_ms[i] +
                                    _par_last_obj_copy_times_ms[i] +
@@ -1402,7 +1417,7 @@
     } else {
       print_stats(1, "Ext Root Scanning", ext_root_scan_time);
       if (print_marking_info) {
-        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+        print_stats(1, "SATB Filtering", satb_filtering_time);
       }
       print_stats(1, "Update RS", update_rs_time);
       print_stats(2, "Processed Buffers", (int)update_rs_processed_buffers);
@@ -1551,10 +1566,19 @@
       }
     }
 
-    // It turns out that, sometimes, _max_rs_lengths can get smaller
-    // than _recorded_rs_lengths which causes rs_length_diff to get
-    // very large and mess up the RSet length predictions. We'll be
-    // defensive until we work out why this happens.
+    // This is defensive. For a while _max_rs_lengths could get
+    // smaller than _recorded_rs_lengths which was causing
+    // rs_length_diff to get very large and mess up the RSet length
+    // predictions. The reason was unsafe concurrent updates to the
+    // _inc_cset_recorded_rs_lengths field which the code below guards
+    // against (see CR 7118202). This bug has now been fixed (see CR
+    // 7119027). However, I'm still worried that
+    // _inc_cset_recorded_rs_lengths might still end up somewhat
+    // inaccurate. The concurrent refinement thread calculates an
+    // RSet's length concurrently with other CR threads updating it
+    // which might cause it to calculate the length incorrectly (if,
+    // say, it's in mid-coarsening). So I'll leave in the defensive
+    // conditional below just in case.
     size_t rs_length_diff = 0;
     if (_max_rs_lengths > _recorded_rs_lengths) {
       rs_length_diff = _max_rs_lengths - _recorded_rs_lengths;
@@ -1964,11 +1988,10 @@
   if (summary->get_total_seq()->num() > 0) {
     print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq());
     if (body_summary != NULL) {
-      print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq());
       if (parallel) {
         print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
         print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(2, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
+        print_summary(2, "SATB Filtering", body_summary->get_satb_filtering_seq());
         print_summary(2, "Update RS", body_summary->get_update_rs_seq());
         print_summary(2, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(2, "Object Copy", body_summary->get_obj_copy_seq());
@@ -1977,7 +2000,7 @@
         {
           NumberSeq* other_parts[] = {
             body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_satb_filtering_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq(),
@@ -1990,7 +2013,7 @@
         }
       } else {
         print_summary(1, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(1, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
+        print_summary(1, "SATB Filtering", body_summary->get_satb_filtering_seq());
         print_summary(1, "Update RS", body_summary->get_update_rs_seq());
         print_summary(1, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(1, "Object Copy", body_summary->get_obj_copy_seq());
@@ -2017,7 +2040,7 @@
             body_summary->get_satb_drain_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_satb_filtering_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq()
           };
@@ -2321,17 +2344,19 @@
     _g1(G1CollectedHeap::heap())
   {}
 
-  void work(int i) {
-    ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i);
+  void work(uint worker_id) {
+    ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted,
+                                               _chunk_size,
+                                               worker_id);
     // Back to zero for the claim value.
-    _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i,
+    _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, worker_id,
                                          _g1->workers()->active_workers(),
                                          HeapRegion::InitialClaimValue);
     jint regions_added = parKnownGarbageCl.marked_regions_added();
     _hrSorted->incNumMarkedHeapRegions(regions_added);
     if (G1PrintParCleanupStats) {
       gclog_or_tty->print_cr("     Thread %d called %d times, added %d regions to list.",
-                 i, parKnownGarbageCl.invokes(), regions_added);
+                 worker_id, parKnownGarbageCl.invokes(), regions_added);
     }
   }
 };
@@ -2412,9 +2437,6 @@
   assert(_inc_cset_build_state == Active, "Precondition");
   assert(!hr->is_young(), "non-incremental add of young region");
 
-  if (_g1->mark_in_progress())
-    _g1->concurrent_mark()->registerCSetRegion(hr);
-
   assert(!hr->in_collection_set(), "should not already be in the CSet");
   hr->set_in_collection_set(true);
   hr->set_next_in_collection_set(_collection_set);
@@ -2436,10 +2458,45 @@
 
   _inc_cset_max_finger = 0;
   _inc_cset_recorded_rs_lengths = 0;
-  _inc_cset_predicted_elapsed_time_ms = 0;
+  _inc_cset_recorded_rs_lengths_diffs = 0;
+  _inc_cset_predicted_elapsed_time_ms = 0.0;
+  _inc_cset_predicted_elapsed_time_ms_diffs = 0.0;
   _inc_cset_build_state = Active;
 }
 
+void G1CollectorPolicy::finalize_incremental_cset_building() {
+  assert(_inc_cset_build_state == Active, "Precondition");
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
+
+  // The two "main" fields, _inc_cset_recorded_rs_lengths and
+  // _inc_cset_predicted_elapsed_time_ms, are updated by the thread
+  // that adds a new region to the CSet. Further updates by the
+  // concurrent refinement thread that samples the young RSet lengths
+  // are accumulated in the *_diffs fields. Here we add the diffs to
+  // the "main" fields.
+
+  if (_inc_cset_recorded_rs_lengths_diffs >= 0) {
+    _inc_cset_recorded_rs_lengths += _inc_cset_recorded_rs_lengths_diffs;
+  } else {
+    // This is defensive. The diff should in theory be always positive
+    // as RSets can only grow between GCs. However, given that we
+    // sample their size concurrently with other threads updating them
+    // it's possible that we might get the wrong size back, which
+    // could make the calculations somewhat inaccurate.
+    size_t diffs = (size_t) (-_inc_cset_recorded_rs_lengths_diffs);
+    if (_inc_cset_recorded_rs_lengths >= diffs) {
+      _inc_cset_recorded_rs_lengths -= diffs;
+    } else {
+      _inc_cset_recorded_rs_lengths = 0;
+    }
+  }
+  _inc_cset_predicted_elapsed_time_ms +=
+                                     _inc_cset_predicted_elapsed_time_ms_diffs;
+
+  _inc_cset_recorded_rs_lengths_diffs = 0;
+  _inc_cset_predicted_elapsed_time_ms_diffs = 0.0;
+}
+
 void G1CollectorPolicy::add_to_incremental_cset_info(HeapRegion* hr, size_t rs_length) {
   // This routine is used when:
   // * adding survivor regions to the incremental cset at the end of an
@@ -2455,10 +2512,8 @@
 
   double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true);
   size_t used_bytes = hr->used();
-
   _inc_cset_recorded_rs_lengths += rs_length;
   _inc_cset_predicted_elapsed_time_ms += region_elapsed_time_ms;
-
   _inc_cset_bytes_used_before += used_bytes;
 
   // Cache the values we have added to the aggregated informtion
@@ -2469,37 +2524,33 @@
   hr->set_predicted_elapsed_time_ms(region_elapsed_time_ms);
 }
 
-void G1CollectorPolicy::remove_from_incremental_cset_info(HeapRegion* hr) {
-  // This routine is currently only called as part of the updating of
-  // existing policy information for regions in the incremental cset that
-  // is performed by the concurrent refine thread(s) as part of young list
-  // RSet sampling. Therefore we should not be at a safepoint.
-
-  assert(!SafepointSynchronize::is_at_safepoint(), "should not be at safepoint");
-  assert(hr->is_young(), "it should be");
-
-  size_t used_bytes = hr->used();
-  size_t old_rs_length = hr->recorded_rs_length();
+void G1CollectorPolicy::update_incremental_cset_info(HeapRegion* hr,
+                                                     size_t new_rs_length) {
+  // Update the CSet information that is dependent on the new RS length
+  assert(hr->is_young(), "Precondition");
+  assert(!SafepointSynchronize::is_at_safepoint(),
+                                               "should not be at a safepoint");
+
+  // We could have updated _inc_cset_recorded_rs_lengths and
+  // _inc_cset_predicted_elapsed_time_ms directly but we'd need to do
+  // that atomically, as this code is executed by a concurrent
+  // refinement thread, potentially concurrently with a mutator thread
+  // allocating a new region and also updating the same fields. To
+  // avoid the atomic operations we accumulate these updates on two
+  // separate fields (*_diffs) and we'll just add them to the "main"
+  // fields at the start of a GC.
+
+  ssize_t old_rs_length = (ssize_t) hr->recorded_rs_length();
+  ssize_t rs_lengths_diff = (ssize_t) new_rs_length - old_rs_length;
+  _inc_cset_recorded_rs_lengths_diffs += rs_lengths_diff;
+
   double old_elapsed_time_ms = hr->predicted_elapsed_time_ms();
-
-  // Subtract the old recorded/predicted policy information for
-  // the given heap region from the collection set info.
-  _inc_cset_recorded_rs_lengths -= old_rs_length;
-  _inc_cset_predicted_elapsed_time_ms -= old_elapsed_time_ms;
-
-  _inc_cset_bytes_used_before -= used_bytes;
-
-  // Clear the values cached in the heap region
-  hr->set_recorded_rs_length(0);
-  hr->set_predicted_elapsed_time_ms(0);
-}
-
-void G1CollectorPolicy::update_incremental_cset_info(HeapRegion* hr, size_t new_rs_length) {
-  // Update the collection set information that is dependent on the new RS length
-  assert(hr->is_young(), "Precondition");
-
-  remove_from_incremental_cset_info(hr);
-  add_to_incremental_cset_info(hr, new_rs_length);
+  double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true);
+  double elapsed_ms_diff = new_region_elapsed_time_ms - old_elapsed_time_ms;
+  _inc_cset_predicted_elapsed_time_ms_diffs += elapsed_ms_diff;
+
+  hr->set_recorded_rs_length(new_rs_length);
+  hr->set_predicted_elapsed_time_ms(new_region_elapsed_time_ms);
 }
 
 void G1CollectorPolicy::add_region_to_incremental_cset_common(HeapRegion* hr) {
@@ -2591,6 +2642,7 @@
   double non_young_start_time_sec = os::elapsedTime();
 
   YoungList* young_list = _g1->young_list();
+  finalize_incremental_cset_building();
 
   guarantee(target_pause_time_ms > 0.0,
             err_msg("target_pause_time_ms = %1.6lf should be positive",
@@ -2654,9 +2706,6 @@
   // Clear the fields that point to the survivor list - they are all young now.
   young_list->clear_survivors();
 
-  if (_g1->mark_in_progress())
-    _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger);
-
   _collection_set = _inc_cset_head;
   _collection_set_bytes_used_before = _inc_cset_bytes_used_before;
   time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,7 +67,7 @@
   define_num_seq(satb_drain) // optional
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
-    define_num_seq(mark_stack_scan)
+    define_num_seq(satb_filtering)
     define_num_seq(update_rs)
     define_num_seq(scan_rs)
     define_num_seq(obj_copy)
@@ -83,6 +83,72 @@
   virtual MainBodySummary*    main_body_summary()    { return this; }
 };
 
+// There are three command line options related to the young gen size:
+// NewSize, MaxNewSize and NewRatio (There is also -Xmn, but that is
+// just a short form for NewSize==MaxNewSize). G1 will use its internal
+// heuristics to calculate the actual young gen size, so these options
+// basically only limit the range within which G1 can pick a young gen
+// size. Also, these are general options taking byte sizes. G1 will
+// internally work with a number of regions instead. So, some rounding
+// will occur.
+//
+// If nothing related to the the young gen size is set on the command
+// line we should allow the young gen to be between
+// G1DefaultMinNewGenPercent and G1DefaultMaxNewGenPercent of the
+// heap size. This means that every time the heap size changes the
+// limits for the young gen size will be updated.
+//
+// If only -XX:NewSize is set we should use the specified value as the
+// minimum size for young gen. Still using G1DefaultMaxNewGenPercent
+// of the heap as maximum.
+//
+// If only -XX:MaxNewSize is set we should use the specified value as the
+// maximum size for young gen. Still using G1DefaultMinNewGenPercent
+// of the heap as minimum.
+//
+// If -XX:NewSize and -XX:MaxNewSize are both specified we use these values.
+// No updates when the heap size changes. There is a special case when
+// NewSize==MaxNewSize. This is interpreted as "fixed" and will use a
+// different heuristic for calculating the collection set when we do mixed
+// collection.
+//
+// If only -XX:NewRatio is set we should use the specified ratio of the heap
+// as both min and max. This will be interpreted as "fixed" just like the
+// NewSize==MaxNewSize case above. But we will update the min and max
+// everytime the heap size changes.
+//
+// NewSize and MaxNewSize override NewRatio. So, NewRatio is ignored if it is
+// combined with either NewSize or MaxNewSize. (A warning message is printed.)
+class G1YoungGenSizer : public CHeapObj {
+private:
+  enum SizerKind {
+    SizerDefaults,
+    SizerNewSizeOnly,
+    SizerMaxNewSizeOnly,
+    SizerMaxAndNewSize,
+    SizerNewRatio
+  };
+  SizerKind _sizer_kind;
+  size_t _min_desired_young_length;
+  size_t _max_desired_young_length;
+  bool _adaptive_size;
+  size_t calculate_default_min_length(size_t new_number_of_heap_regions);
+  size_t calculate_default_max_length(size_t new_number_of_heap_regions);
+
+public:
+  G1YoungGenSizer();
+  void heap_size_changed(size_t new_number_of_heap_regions);
+  size_t min_desired_young_length() {
+    return _min_desired_young_length;
+  }
+  size_t max_desired_young_length() {
+    return _max_desired_young_length;
+  }
+  bool adaptive_young_list_length() {
+    return _adaptive_size;
+  }
+};
+
 class G1CollectorPolicy: public CollectorPolicy {
 private:
   // either equal to the number of parallel threads, if ParallelGCThreads
@@ -149,7 +215,7 @@
 
   double* _par_last_gc_worker_start_times_ms;
   double* _par_last_ext_root_scan_times_ms;
-  double* _par_last_mark_stack_scan_times_ms;
+  double* _par_last_satb_filtering_times_ms;
   double* _par_last_update_rs_times_ms;
   double* _par_last_update_rs_processed_buffers;
   double* _par_last_scan_rs_times_ms;
@@ -167,9 +233,6 @@
   // indicates whether we are in young or mixed GC mode
   bool _gcs_are_young;
 
-  // if true, then it tries to dynamically adjust the length of the
-  // young list
-  bool _adaptive_young_list_length;
   size_t _young_list_target_length;
   size_t _young_list_fixed_length;
   size_t _prev_eden_capacity; // used for logging
@@ -227,9 +290,7 @@
 
   TruncatedSeq* _young_gc_eff_seq;
 
-  bool   _using_new_ratio_calculations;
-  size_t _min_desired_young_length; // as set on the command line or default calculations
-  size_t _max_desired_young_length; // as set on the command line or default calculations
+  G1YoungGenSizer* _young_gen_sizer;
 
   size_t _eden_cset_region_length;
   size_t _survivor_cset_region_length;
@@ -588,16 +649,29 @@
   // Used to record the highest end of heap region in collection set
   HeapWord* _inc_cset_max_finger;
 
-  // The RSet lengths recorded for regions in the collection set
-  // (updated by the periodic sampling of the regions in the
-  // young list/collection set).
+  // The RSet lengths recorded for regions in the CSet. It is updated
+  // by the thread that adds a new region to the CSet. We assume that
+  // only one thread can be allocating a new CSet region (currently,
+  // it does so after taking the Heap_lock) hence no need to
+  // synchronize updates to this field.
   size_t _inc_cset_recorded_rs_lengths;
 
-  // The predicted elapsed time it will take to collect the regions
-  // in the collection set (updated by the periodic sampling of the
-  // regions in the young list/collection set).
+  // A concurrent refinement thread periodcially samples the young
+  // region RSets and needs to update _inc_cset_recorded_rs_lengths as
+  // the RSets grow. Instead of having to syncronize updates to that
+  // field we accumulate them in this field and add it to
+  // _inc_cset_recorded_rs_lengths_diffs at the start of a GC.
+  ssize_t _inc_cset_recorded_rs_lengths_diffs;
+
+  // The predicted elapsed time it will take to collect the regions in
+  // the CSet. This is updated by the thread that adds a new region to
+  // the CSet. See the comment for _inc_cset_recorded_rs_lengths about
+  // MT-safety assumptions.
   double _inc_cset_predicted_elapsed_time_ms;
 
+  // See the comment for _inc_cset_recorded_rs_lengths_diffs.
+  double _inc_cset_predicted_elapsed_time_ms_diffs;
+
   // Stash a pointer to the g1 heap.
   G1CollectedHeap* _g1;
 
@@ -682,8 +756,6 @@
   // Count the number of bytes used in the CS.
   void count_CS_bytes_used();
 
-  void update_young_list_size_using_newratio(size_t number_of_heap_regions);
-
 public:
 
   G1CollectorPolicy();
@@ -710,8 +782,6 @@
   // This should be called after the heap is resized.
   void record_new_heap_size(size_t new_number_of_regions);
 
-public:
-
   void init();
 
   // Create jstat counters for the policy.
@@ -771,8 +841,8 @@
     _par_last_ext_root_scan_times_ms[worker_i] = ms;
   }
 
-  void record_mark_stack_scan_time(int worker_i, double ms) {
-    _par_last_mark_stack_scan_times_ms[worker_i] = ms;
+  void record_satb_filtering_time(int worker_i, double ms) {
+    _par_last_satb_filtering_times_ms[worker_i] = ms;
   }
 
   void record_satb_drain_time(double ms) {
@@ -894,6 +964,10 @@
   // Initialize incremental collection set info.
   void start_incremental_cset_building();
 
+  // Perform any final calculations on the incremental CSet fields
+  // before we can use them.
+  void finalize_incremental_cset_building();
+
   void clear_incremental_cset() {
     _inc_cset_head = NULL;
     _inc_cset_tail = NULL;
@@ -902,10 +976,9 @@
   // Stop adding regions to the incremental collection set
   void stop_incremental_cset_building() { _inc_cset_build_state = Inactive; }
 
-  // Add/remove information about hr to the aggregated information
-  // for the incrementally built collection set.
+  // Add information about hr to the aggregated information for the
+  // incrementally built collection set.
   void add_to_incremental_cset_info(HeapRegion* hr, size_t rs_length);
-  void remove_from_incremental_cset_info(HeapRegion* hr);
 
   // Update information about hr in the aggregated information for
   // the incrementally built collection set.
@@ -998,10 +1071,7 @@
   }
 
   bool adaptive_young_list_length() {
-    return _adaptive_young_list_length;
-  }
-  void set_adaptive_young_list_length(bool adaptive_young_list_length) {
-    _adaptive_young_list_length = adaptive_young_list_length;
+    return _young_gen_sizer->adaptive_young_list_length();
   }
 
   inline double get_gc_eff_factor() {
@@ -1076,6 +1146,11 @@
     _survivor_surv_rate_group->stop_adding_regions();
   }
 
+  void tenure_all_objects() {
+    _max_survivor_regions = 0;
+    _tenuring_threshold = 0;
+  }
+
   void record_survivor_regions(size_t      regions,
                                HeapRegion* head,
                                HeapRegion* tail) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
+
+#include "gc_implementation/g1/concurrentMark.inline.hpp"
+#include "gc_implementation/g1/dirtyCardQueue.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1_globals.hpp"
+#include "gc_implementation/g1/g1OopClosures.inline.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#include "gc_implementation/g1/heapRegionRemSet.hpp"
+#include "utilities/workgroup.hpp"
+
+// Closures and tasks associated with any self-forwarding pointers
+// installed as a result of an evacuation failure.
+
+class UpdateRSetDeferred : public OopsInHeapRegionClosure {
+private:
+  G1CollectedHeap* _g1;
+  DirtyCardQueue *_dcq;
+  CardTableModRefBS* _ct_bs;
+
+public:
+  UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
+    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    assert(_from->is_in_reserved(p), "paranoia");
+    if (!_from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) &&
+        !_from->is_survivor()) {
+      size_t card_index = _ct_bs->index_for(p);
+      if (_ct_bs->mark_card_deferred(card_index)) {
+        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
+      }
+    }
+  }
+};
+
+class RemoveSelfForwardPtrObjClosure: public ObjectClosure {
+private:
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  HeapRegion* _hr;
+  size_t _marked_bytes;
+  OopsInHeapRegionClosure *_update_rset_cl;
+  bool _during_initial_mark;
+  bool _during_conc_mark;
+public:
+  RemoveSelfForwardPtrObjClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
+                                 HeapRegion* hr,
+                                 OopsInHeapRegionClosure* update_rset_cl,
+                                 bool during_initial_mark,
+                                 bool during_conc_mark) :
+    _g1(g1), _cm(cm), _hr(hr), _marked_bytes(0),
+    _update_rset_cl(update_rset_cl),
+    _during_initial_mark(during_initial_mark),
+    _during_conc_mark(during_conc_mark) { }
+
+  size_t marked_bytes() { return _marked_bytes; }
+
+  // <original comment>
+  // The original idea here was to coalesce evacuated and dead objects.
+  // However that caused complications with the block offset table (BOT).
+  // In particular if there were two TLABs, one of them partially refined.
+  // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~|
+  // The BOT entries of the unrefined part of TLAB_2 point to the start
+  // of TLAB_2. If the last object of the TLAB_1 and the first object
+  // of TLAB_2 are coalesced, then the cards of the unrefined part
+  // would point into middle of the filler object.
+  // The current approach is to not coalesce and leave the BOT contents intact.
+  // </original comment>
+  //
+  // We now reset the BOT when we start the object iteration over the
+  // region and refine its entries for every object we come across. So
+  // the above comment is not really relevant and we should be able
+  // to coalesce dead objects if we want to.
+  void do_object(oop obj) {
+    HeapWord* obj_addr = (HeapWord*) obj;
+    assert(_hr->is_in(obj_addr), "sanity");
+    size_t obj_size = obj->size();
+    _hr->update_bot_for_object(obj_addr, obj_size);
+
+    if (obj->is_forwarded() && obj->forwardee() == obj) {
+      // The object failed to move.
+
+      // We consider all objects that we find self-forwarded to be
+      // live. What we'll do is that we'll update the prev marking
+      // info so that they are all under PTAMS and explicitly marked.
+      _cm->markPrev(obj);
+      if (_during_initial_mark) {
+        // For the next marking info we'll only mark the
+        // self-forwarded objects explicitly if we are during
+        // initial-mark (since, normally, we only mark objects pointed
+        // to by roots if we succeed in copying them). By marking all
+        // self-forwarded objects we ensure that we mark any that are
+        // still pointed to be roots. During concurrent marking, and
+        // after initial-mark, we don't need to mark any objects
+        // explicitly and all objects in the CSet are considered
+        // (implicitly) live. So, we won't mark them explicitly and
+        // we'll leave them over NTAMS.
+        _cm->markNext(obj);
+      }
+      _marked_bytes += (obj_size * HeapWordSize);
+      obj->set_mark(markOopDesc::prototype());
+
+      // While we were processing RSet buffers during the collection,
+      // we actually didn't scan any cards on the collection set,
+      // since we didn't want to update remembered sets with entries
+      // that point into the collection set, given that live objects
+      // from the collection set are about to move and such entries
+      // will be stale very soon.
+      // This change also dealt with a reliability issue which
+      // involved scanning a card in the collection set and coming
+      // across an array that was being chunked and looking malformed.
+      // The problem is that, if evacuation fails, we might have
+      // remembered set entries missing given that we skipped cards on
+      // the collection set. So, we'll recreate such entries now.
+      obj->oop_iterate(_update_rset_cl);
+      assert(_cm->isPrevMarked(obj), "Should be marked!");
+    } else {
+      // The object has been either evacuated or is dead. Fill it with a
+      // dummy object.
+      MemRegion mr((HeapWord*) obj, obj_size);
+      CollectedHeap::fill_with_object(mr);
+    }
+  }
+};
+
+class RemoveSelfForwardPtrHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  ConcurrentMark* _cm;
+  OopsInHeapRegionClosure *_update_rset_cl;
+
+public:
+  RemoveSelfForwardPtrHRClosure(G1CollectedHeap* g1h,
+                                OopsInHeapRegionClosure* update_rset_cl) :
+    _g1h(g1h), _update_rset_cl(update_rset_cl),
+    _cm(_g1h->concurrent_mark()) { }
+
+  bool doHeapRegion(HeapRegion *hr) {
+    bool during_initial_mark = _g1h->g1_policy()->during_initial_mark_pause();
+    bool during_conc_mark = _g1h->mark_in_progress();
+
+    assert(!hr->isHumongous(), "sanity");
+    assert(hr->in_collection_set(), "bad CS");
+
+    if (hr->claimHeapRegion(HeapRegion::ParEvacFailureClaimValue)) {
+      if (hr->evacuation_failed()) {
+        RemoveSelfForwardPtrObjClosure rspc(_g1h, _cm, hr, _update_rset_cl,
+                                            during_initial_mark,
+                                            during_conc_mark);
+
+        MemRegion mr(hr->bottom(), hr->end());
+        // We'll recreate the prev marking info so we'll first clear
+        // the prev bitmap range for this region. We never mark any
+        // CSet objects explicitly so the next bitmap range should be
+        // cleared anyway.
+        _cm->clearRangePrevBitmap(mr);
+
+        hr->note_self_forwarding_removal_start(during_initial_mark,
+                                               during_conc_mark);
+
+        // In the common case (i.e. when there is no evacuation
+        // failure) we make sure that the following is done when
+        // the region is freed so that it is "ready-to-go" when it's
+        // re-allocated. However, when evacuation failure happens, a
+        // region will remain in the heap and might ultimately be added
+        // to a CSet in the future. So we have to be careful here and
+        // make sure the region's RSet is ready for parallel iteration
+        // whenever this might be required in the future.
+        hr->rem_set()->reset_for_par_iteration();
+        hr->reset_bot();
+        _update_rset_cl->set_region(hr);
+        hr->object_iterate(&rspc);
+
+        hr->note_self_forwarding_removal_end(during_initial_mark,
+                                             during_conc_mark,
+                                             rspc.marked_bytes());
+      }
+    }
+    return false;
+  }
+};
+
+class G1ParRemoveSelfForwardPtrsTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+
+public:
+  G1ParRemoveSelfForwardPtrsTask(G1CollectedHeap* g1h) :
+    AbstractGangTask("G1 Remove Self-forwarding Pointers"),
+    _g1h(g1h) { }
+
+  void work(uint worker_id) {
+    UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
+    DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
+    UpdateRSetDeferred deferred_update(_g1h, &dcq);
+
+    OopsInHeapRegionClosure *update_rset_cl = &deferred_update;
+    if (!G1DeferredRSUpdate) {
+      update_rset_cl = &immediate_update;
+    }
+
+    RemoveSelfForwardPtrHRClosure rsfp_cl(_g1h, update_rset_cl);
+
+    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
+    _g1h->collection_set_iterate_from(hr, &rsfp_cl);
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -121,17 +121,25 @@
 class G1ParCopyHelper : public G1ParClosureSuper {
   G1ParScanClosure *_scanner;
 protected:
-  template <class T> void mark_object(T* p);
-  oop copy_to_survivor_space(oop obj, bool should_mark_root,
-                                      bool should_mark_copy);
+  // Mark the object if it's not already marked. This is used to mark
+  // objects pointed to by roots that are guaranteed not to move
+  // during the GC (i.e., non-CSet objects). It is MT-safe.
+  void mark_object(oop obj);
+
+  // Mark the object if it's not already marked. This is used to mark
+  // objects pointed to by roots that have been forwarded during a
+  // GC. It is MT-safe.
+  void mark_forwarded_object(oop from_obj, oop to_obj);
+
+  oop copy_to_survivor_space(oop obj);
+
 public:
   G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                   G1ParScanClosure *scanner) :
     G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
 };
 
-template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_object>
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
 class G1ParCopyClosure : public G1ParCopyHelper {
   G1ParScanClosure _scanner;
 
@@ -140,9 +148,8 @@
 public:
   G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                    ReferenceProcessor* rp) :
-    _scanner(g1, par_scan_state, rp),
-    G1ParCopyHelper(g1, par_scan_state, &_scanner)
-  {
+      _scanner(g1, par_scan_state, rp),
+      G1ParCopyHelper(g1, par_scan_state, &_scanner) {
     assert(_ref_processor == NULL, "sanity");
   }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -558,11 +558,11 @@
 }
 
 void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
-                                int worker_num, int claim_val) {
+                                uint worker_num, int claim_val) {
   ScrubRSClosure scrub_cl(region_bm, card_bm);
   _g1->heap_region_par_iterate_chunked(&scrub_cl,
                                        worker_num,
-                                       (int) n_workers(),
+                                       n_workers(),
                                        claim_val);
 }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -40,7 +40,7 @@
 protected:
   G1CollectedHeap* _g1;
   unsigned _conc_refine_cards;
-  size_t n_workers();
+  uint n_workers();
 
 protected:
   enum SomePrivateConstants {
@@ -122,7 +122,7 @@
   // parallel thread id of the current thread, and "claim_val" is the
   // value that should be used to claim heap regions.
   void scrub_par(BitMap* region_bm, BitMap* card_bm,
-                 int worker_num, int claim_val);
+                 uint worker_num, int claim_val);
 
   // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
   // join and leave around parts that must be atomic wrt GC.  (NULL means
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -29,7 +29,7 @@
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "oops/oop.inline.hpp"
 
-inline size_t G1RemSet::n_workers() {
+inline uint G1RemSet::n_workers() {
   if (_g1->workers() != NULL) {
     return _g1->workers()->total_workers();
   } else {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -289,7 +289,15 @@
                                                                             \
   develop(uintx, G1ConcMarkForceOverflow, 0,                                \
           "The number of times we'll force an overflow during "             \
-          "concurrent marking")
+          "concurrent marking")                                             \
+                                                                            \
+  develop(uintx, G1DefaultMinNewGenPercent, 20,                             \
+          "Percentage (0-100) of the heap size to use as minimum "          \
+          "young gen size.")                                                \
+                                                                            \
+  develop(uintx, G1DefaultMaxNewGenPercent, 80,                             \
+          "Percentage (0-100) of the heap size to use as maximum "          \
+          "young gen size.")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
 
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -94,7 +94,8 @@
 #endif // PRODUCT
   }
 
-  template <class T> void do_oop_work(T* p) {
+  template <class T>
+  void do_oop_work(T* p) {
     assert(_containing_obj != NULL, "Precondition");
     assert(!_g1h->is_obj_dead_cond(_containing_obj, _vo),
            "Precondition");
@@ -102,8 +103,10 @@
     if (!oopDesc::is_null(heap_oop)) {
       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
       bool failed = false;
-      if (!_g1h->is_in_closed_subset(obj) ||
-          _g1h->is_obj_dead_cond(obj, _vo)) {
+      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead_cond(obj, _vo)) {
+        MutexLockerEx x(ParGCRareEvent_lock,
+                        Mutex::_no_safepoint_check_flag);
+
         if (!_failures) {
           gclog_or_tty->print_cr("");
           gclog_or_tty->print_cr("----------");
@@ -133,6 +136,7 @@
           print_object(gclog_or_tty, obj);
         }
         gclog_or_tty->print_cr("----------");
+        gclog_or_tty->flush();
         _failures = true;
         failed = true;
         _n_failures++;
@@ -155,6 +159,9 @@
                                   cv_field == dirty
                                : cv_obj == dirty || cv_field == dirty));
           if (is_bad) {
+            MutexLockerEx x(ParGCRareEvent_lock,
+                            Mutex::_no_safepoint_check_flag);
+
             if (!_failures) {
               gclog_or_tty->print_cr("");
               gclog_or_tty->print_cr("----------");
@@ -174,6 +181,7 @@
             gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
                           cv_obj, cv_field);
             gclog_or_tty->print_cr("----------");
+            gclog_or_tty->flush();
             _failures = true;
             if (!failed) _n_failures++;
           }
@@ -567,6 +575,40 @@
   oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
 }
 
+void HeapRegion::note_self_forwarding_removal_start(bool during_initial_mark,
+                                                    bool during_conc_mark) {
+  // We always recreate the prev marking info and we'll explicitly
+  // mark all objects we find to be self-forwarded on the prev
+  // bitmap. So all objects need to be below PTAMS.
+  _prev_top_at_mark_start = top();
+  _prev_marked_bytes = 0;
+
+  if (during_initial_mark) {
+    // During initial-mark, we'll also explicitly mark all objects
+    // we find to be self-forwarded on the next bitmap. So all
+    // objects need to be below NTAMS.
+    _next_top_at_mark_start = top();
+    set_top_at_conc_mark_count(bottom());
+    _next_marked_bytes = 0;
+  } else if (during_conc_mark) {
+    // During concurrent mark, all objects in the CSet (including
+    // the ones we find to be self-forwarded) are implicitly live.
+    // So all objects need to be above NTAMS.
+    _next_top_at_mark_start = bottom();
+    set_top_at_conc_mark_count(bottom());
+    _next_marked_bytes = 0;
+  }
+}
+
+void HeapRegion::note_self_forwarding_removal_end(bool during_initial_mark,
+                                                  bool during_conc_mark,
+                                                  size_t marked_bytes) {
+  assert(0 <= marked_bytes && marked_bytes <= used(),
+         err_msg("marked: "SIZE_FORMAT" used: "SIZE_FORMAT,
+                 marked_bytes, used()));
+  _prev_marked_bytes = marked_bytes;
+}
+
 HeapWord*
 HeapRegion::object_iterate_mem_careful(MemRegion mr,
                                                  ObjectClosure* cl) {
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -373,7 +373,8 @@
     ScrubRemSetClaimValue      = 3,
     ParVerifyClaimValue        = 4,
     RebuildRSClaimValue        = 5,
-    CompleteMarkCSetClaimValue = 6
+    CompleteMarkCSetClaimValue = 6,
+    ParEvacFailureClaimValue   = 7
   };
 
   inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
@@ -582,37 +583,33 @@
   // that the collector is about to start or has finished (concurrently)
   // marking the heap.
 
-  // Note the start of a marking phase. Record the
-  // start of the unmarked area of the region here.
-  void note_start_of_marking(bool during_initial_mark) {
-    init_top_at_conc_mark_count();
-    _next_marked_bytes = 0;
-    if (during_initial_mark && is_young() && !is_survivor())
-      _next_top_at_mark_start = bottom();
-    else
-      _next_top_at_mark_start = top();
-  }
+  // Notify the region that concurrent marking is starting. Initialize
+  // all fields related to the next marking info.
+  inline void note_start_of_marking();
+
+  // Notify the region that concurrent marking has finished. Copy the
+  // (now finalized) next marking info fields into the prev marking
+  // info fields.
+  inline void note_end_of_marking();
+
+  // Notify the region that it will be used as to-space during a GC
+  // and we are about to start copying objects into it.
+  inline void note_start_of_copying(bool during_initial_mark);
 
-  // Note the end of a marking phase. Install the start of
-  // the unmarked area that was captured at start of marking.
-  void note_end_of_marking() {
-    _prev_top_at_mark_start = _next_top_at_mark_start;
-    _prev_marked_bytes = _next_marked_bytes;
-    _next_marked_bytes = 0;
+  // Notify the region that it ceases being to-space during a GC and
+  // we will not copy objects into it any more.
+  inline void note_end_of_copying(bool during_initial_mark);
 
-    guarantee(_prev_marked_bytes <=
-              (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize,
-              "invariant");
-  }
+  // Notify the region that we are about to start processing
+  // self-forwarded objects during evac failure handling.
+  void note_self_forwarding_removal_start(bool during_initial_mark,
+                                          bool during_conc_mark);
 
-  // After an evacuation, we need to update _next_top_at_mark_start
-  // to be the current top.  Note this is only valid if we have only
-  // ever evacuated into this region.  If we evacuate, allocate, and
-  // then evacuate we are in deep doodoo.
-  void note_end_of_copying() {
-    assert(top() >= _next_top_at_mark_start, "Increase only");
-    _next_top_at_mark_start = top();
-  }
+  // Notify the region that we have finished processing self-forwarded
+  // objects during evac failure handling.
+  void note_self_forwarding_removal_end(bool during_initial_mark,
+                                        bool during_conc_mark,
+                                        size_t marked_bytes);
 
   // Returns "false" iff no object in the region was allocated when the
   // last mark phase ended.
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,4 +55,71 @@
   return _offsets.block_start_const(p);
 }
 
+inline void HeapRegion::note_start_of_marking() {
+  init_top_at_conc_mark_count();
+  _next_marked_bytes = 0;
+  _next_top_at_mark_start = top();
+}
+
+inline void HeapRegion::note_end_of_marking() {
+  _prev_top_at_mark_start = _next_top_at_mark_start;
+  _prev_marked_bytes = _next_marked_bytes;
+  _next_marked_bytes = 0;
+
+  assert(_prev_marked_bytes <=
+         (size_t) pointer_delta(prev_top_at_mark_start(), bottom()) *
+         HeapWordSize, "invariant");
+}
+
+inline void HeapRegion::note_start_of_copying(bool during_initial_mark) {
+  if (during_initial_mark) {
+    if (is_survivor()) {
+      assert(false, "should not allocate survivors during IM");
+    } else {
+      // During initial-mark we'll explicitly mark any objects on old
+      // regions that are pointed to by roots. Given that explicit
+      // marks only make sense under NTAMS it'd be nice if we could
+      // check that condition if we wanted to. Given that we don't
+      // know where the top of this region will end up, we simply set
+      // NTAMS to the end of the region so all marks will be below
+      // NTAMS. We'll set it to the actual top when we retire this region.
+      _next_top_at_mark_start = end();
+    }
+  } else {
+    if (is_survivor()) {
+      // This is how we always allocate survivors.
+      assert(_next_top_at_mark_start == bottom(), "invariant");
+    } else {
+      // We could have re-used this old region as to-space over a
+      // couple of GCs since the start of the concurrent marking
+      // cycle. This means that [bottom,NTAMS) will contain objects
+      // copied up to and including initial-mark and [NTAMS, top)
+      // will contain objects copied during the concurrent marking cycle.
+      assert(top() >= _next_top_at_mark_start, "invariant");
+    }
+  }
+}
+
+inline void HeapRegion::note_end_of_copying(bool during_initial_mark) {
+  if (during_initial_mark) {
+    if (is_survivor()) {
+      assert(false, "should not allocate survivors during IM");
+    } else {
+      // See the comment for note_start_of_copying() for the details
+      // on this.
+      assert(_next_top_at_mark_start == end(), "pre-condition");
+      _next_top_at_mark_start = top();
+    }
+  } else {
+    if (is_survivor()) {
+      // This is how we always allocate survivors.
+      assert(_next_top_at_mark_start == bottom(), "invariant");
+    } else {
+      // See the comment for note_start_of_copying() for the details
+      // on this.
+      assert(top() >= _next_top_at_mark_start, "invariant");
+    }
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -70,7 +70,7 @@
   // given PtrQueueSet.
   PtrQueue(PtrQueueSet* qset, bool perm = false, bool active = false);
   // Release any contained resources.
-  void flush();
+  virtual void flush();
   // Calls flush() when destroyed.
   ~PtrQueue() { flush(); }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,14 @@
 #include "runtime/thread.hpp"
 #include "runtime/vmThread.hpp"
 
+void ObjPtrQueue::flush() {
+  // The buffer might contain refs into the CSet. We have to filter it
+  // first before we flush it, otherwise we might end up with an
+  // enqueued buffer with refs into the CSet which breaks our invariants.
+  filter();
+  PtrQueue::flush();
+}
+
 // This method removes entries from an SATB buffer that will not be
 // useful to the concurrent marking threads. An entry is removed if it
 // satisfies one of the following conditions:
@@ -44,38 +52,27 @@
 //     process it again).
 //
 // The rest of the entries will be retained and are compacted towards
-// the top of the buffer. If with this filtering we clear a large
-// enough chunk of the buffer we can re-use it (instead of enqueueing
-// it) and we can just allow the mutator to carry on executing.
-
-bool ObjPtrQueue::should_enqueue_buffer() {
-  assert(_lock == NULL || _lock->owned_by_self(),
-         "we should have taken the lock before calling this");
+// the top of the buffer. Note that, because we do not allow old
+// regions in the CSet during marking, all objects on the CSet regions
+// are young (eden or survivors) and therefore implicitly live. So any
+// references into the CSet will be removed during filtering.
 
-  // A value of 0 means "don't filter SATB buffers".
-  if (G1SATBBufferEnqueueingThresholdPercent == 0) {
-    return true;
-  }
-
+void ObjPtrQueue::filter() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  // This method should only be called if there is a non-NULL buffer
-  // that is full.
-  assert(_index == 0, "pre-condition");
-  assert(_buf != NULL, "pre-condition");
-
   void** buf = _buf;
   size_t sz = _sz;
 
+  if (buf == NULL) {
+    // nothing to do
+    return;
+  }
+
   // Used for sanity checking at the end of the loop.
   debug_only(size_t entries = 0; size_t retained = 0;)
 
   size_t i = sz;
   size_t new_index = sz;
 
-  // Given that we are expecting _index == 0, we could have changed
-  // the loop condition to (i > 0). But we are using _index for
-  // generality.
   while (i > _index) {
     assert(i > 0, "we should have at least one more entry to process");
     i -= oopSize;
@@ -103,22 +100,58 @@
       debug_only(retained += 1;)
     }
   }
+
+#ifdef ASSERT
   size_t entries_calc = (sz - _index) / oopSize;
   assert(entries == entries_calc, "the number of entries we counted "
          "should match the number of entries we calculated");
   size_t retained_calc = (sz - new_index) / oopSize;
   assert(retained == retained_calc, "the number of retained entries we counted "
          "should match the number of retained entries we calculated");
-  size_t perc = retained_calc * 100 / entries_calc;
+#endif // ASSERT
+
+  _index = new_index;
+}
+
+// This method will first apply the above filtering to the buffer. If
+// post-filtering a large enough chunk of the buffer has been cleared
+// we can re-use the buffer (instead of enqueueing it) and we can just
+// allow the mutator to carry on executing using the same buffer
+// instead of replacing it.
+
+bool ObjPtrQueue::should_enqueue_buffer() {
+  assert(_lock == NULL || _lock->owned_by_self(),
+         "we should have taken the lock before calling this");
+
+  // Even if G1SATBBufferEnqueueingThresholdPercent == 0 we have to
+  // filter the buffer given that this will remove any references into
+  // the CSet as we currently assume that no such refs will appear in
+  // enqueued buffers.
+
+  // This method should only be called if there is a non-NULL buffer
+  // that is full.
+  assert(_index == 0, "pre-condition");
+  assert(_buf != NULL, "pre-condition");
+
+  filter();
+
+  size_t sz = _sz;
+  size_t all_entries = sz / oopSize;
+  size_t retained_entries = (sz - _index) / oopSize;
+  size_t perc = retained_entries * 100 / all_entries;
   bool should_enqueue = perc > (size_t) G1SATBBufferEnqueueingThresholdPercent;
-  _index = new_index;
-
   return should_enqueue;
 }
 
 void ObjPtrQueue::apply_closure(ObjectClosure* cl) {
   if (_buf != NULL) {
     apply_closure_to_buffer(cl, _buf, _index, _sz);
+  }
+}
+
+void ObjPtrQueue::apply_closure_and_empty(ObjectClosure* cl) {
+  if (_buf != NULL) {
+    apply_closure_to_buffer(cl, _buf, _index, _sz);
     _index = _sz;
   }
 }
@@ -135,6 +168,21 @@
   }
 }
 
+#ifndef PRODUCT
+// Helpful for debugging
+
+void ObjPtrQueue::print(const char* name) {
+  print(name, _buf, _index, _sz);
+}
+
+void ObjPtrQueue::print(const char* name,
+                        void** buf, size_t index, size_t sz) {
+  gclog_or_tty->print_cr("  SATB BUFFER [%s] buf: "PTR_FORMAT" "
+                         "index: "SIZE_FORMAT" sz: "SIZE_FORMAT,
+                         name, buf, index, sz);
+}
+#endif // PRODUCT
+
 #ifdef ASSERT
 void ObjPtrQueue::verify_oops_in_buffer() {
   if (_buf == NULL) return;
@@ -150,12 +198,9 @@
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
 
-
 SATBMarkQueueSet::SATBMarkQueueSet() :
-  PtrQueueSet(),
-  _closure(NULL), _par_closures(NULL),
-  _shared_satb_queue(this, true /*perm*/)
-{}
+  PtrQueueSet(), _closure(NULL), _par_closures(NULL),
+  _shared_satb_queue(this, true /*perm*/) { }
 
 void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
                                   int process_completed_threshold,
@@ -167,7 +212,6 @@
   }
 }
 
-
 void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
   DEBUG_ONLY(t->satb_mark_queue().verify_oops_in_buffer();)
   t->satb_mark_queue().handle_zero_index();
@@ -228,6 +272,13 @@
   }
 }
 
+void SATBMarkQueueSet::filter_thread_buffers() {
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().filter();
+  }
+  shared_satb_queue()->filter();
+}
+
 void SATBMarkQueueSet::set_closure(ObjectClosure* closure) {
   _closure = closure;
 }
@@ -239,9 +290,9 @@
 
 void SATBMarkQueueSet::iterate_closure_all_threads() {
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    t->satb_mark_queue().apply_closure(_closure);
+    t->satb_mark_queue().apply_closure_and_empty(_closure);
   }
-  shared_satb_queue()->apply_closure(_closure);
+  shared_satb_queue()->apply_closure_and_empty(_closure);
 }
 
 void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) {
@@ -250,7 +301,7 @@
 
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
     if (t->claim_oops_do(true, parity)) {
-      t->satb_mark_queue().apply_closure(_par_closures[worker]);
+      t->satb_mark_queue().apply_closure_and_empty(_par_closures[worker]);
     }
   }
 
@@ -264,7 +315,7 @@
 
   VMThread* vmt = VMThread::vm_thread();
   if (vmt->claim_oops_do(true, parity)) {
-    shared_satb_queue()->apply_closure(_par_closures[worker]);
+    shared_satb_queue()->apply_closure_and_empty(_par_closures[worker]);
   }
 }
 
@@ -292,6 +343,61 @@
   }
 }
 
+void SATBMarkQueueSet::iterate_completed_buffers_read_only(ObjectClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  assert(cl != NULL, "pre-condition");
+
+  BufferNode* nd = _completed_buffers_head;
+  while (nd != NULL) {
+    void** buf = BufferNode::make_buffer_from_node(nd);
+    ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz);
+    nd = nd->next();
+  }
+}
+
+void SATBMarkQueueSet::iterate_thread_buffers_read_only(ObjectClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  assert(cl != NULL, "pre-condition");
+
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().apply_closure(cl);
+  }
+  shared_satb_queue()->apply_closure(cl);
+}
+
+#ifndef PRODUCT
+// Helpful for debugging
+
+#define SATB_PRINTER_BUFFER_SIZE 256
+
+void SATBMarkQueueSet::print_all(const char* msg) {
+  char buffer[SATB_PRINTER_BUFFER_SIZE];
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+
+  gclog_or_tty->cr();
+  gclog_or_tty->print_cr("SATB BUFFERS [%s]", msg);
+
+  BufferNode* nd = _completed_buffers_head;
+  int i = 0;
+  while (nd != NULL) {
+    void** buf = BufferNode::make_buffer_from_node(nd);
+    jio_snprintf(buffer, SATB_PRINTER_BUFFER_SIZE, "Enqueued: %d", i);
+    ObjPtrQueue::print(buffer, buf, 0, _sz);
+    nd = nd->next();
+    i += 1;
+  }
+
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    jio_snprintf(buffer, SATB_PRINTER_BUFFER_SIZE, "Thread: %s", t->name());
+    t->satb_mark_queue().print(buffer);
+  }
+
+  shared_satb_queue()->print("Shared");
+
+  gclog_or_tty->cr();
+}
+#endif // PRODUCT
+
 void SATBMarkQueueSet::abandon_partial_marking() {
   BufferNode* buffers_to_delete = NULL;
   {
@@ -316,5 +422,5 @@
   for (JavaThread* t = Threads::first(); t; t = t->next()) {
     t->satb_mark_queue().reset();
   }
-  shared_satb_queue()->reset();
+ shared_satb_queue()->reset();
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,9 +29,26 @@
 
 class ObjectClosure;
 class JavaThread;
+class SATBMarkQueueSet;
 
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class ObjPtrQueue: public PtrQueue {
+  friend class SATBMarkQueueSet;
+
+private:
+  // Filter out unwanted entries from the buffer.
+  void filter();
+
+  // Apply the closure to all elements.
+  void apply_closure(ObjectClosure* cl);
+
+  // Apply the closure to all elements and empty the buffer;
+  void apply_closure_and_empty(ObjectClosure* cl);
+
+  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
+  static void apply_closure_to_buffer(ObjectClosure* cl,
+                                      void** buf, size_t index, size_t sz);
+
 public:
   ObjPtrQueue(PtrQueueSet* qset, bool perm = false) :
     // SATB queues are only active during marking cycles. We create
@@ -41,23 +58,23 @@
     // field to true. This is done in JavaThread::initialize_queues().
     PtrQueue(qset, perm, false /* active */) { }
 
+  // Overrides PtrQueue::flush() so that it can filter the buffer
+  // before it is flushed.
+  virtual void flush();
+
   // Overrides PtrQueue::should_enqueue_buffer(). See the method's
   // definition for more information.
   virtual bool should_enqueue_buffer();
 
-  // Apply the closure to all elements, and reset the index to make the
-  // buffer empty.
-  void apply_closure(ObjectClosure* cl);
-
-  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
-  static void apply_closure_to_buffer(ObjectClosure* cl,
-                                      void** buf, size_t index, size_t sz);
+#ifndef PRODUCT
+  // Helpful for debugging
+  void print(const char* name);
+  static void print(const char* name, void** buf, size_t index, size_t sz);
+#endif // PRODUCT
 
   void verify_oops_in_buffer() NOT_DEBUG_RETURN;
 };
 
-
-
 class SATBMarkQueueSet: public PtrQueueSet {
   ObjectClosure* _closure;
   ObjectClosure** _par_closures;  // One per ParGCThread.
@@ -88,6 +105,9 @@
   // set itself, has an active value same as expected_active.
   void set_active_all_threads(bool b, bool expected_active);
 
+  // Filter all the currently-active SATB buffers.
+  void filter_thread_buffers();
+
   // Register "blk" as "the closure" for all queues.  Only one such closure
   // is allowed.  The "apply_closure_to_completed_buffer" method will apply
   // this closure to a completed buffer, and "iterate_closure_all_threads"
@@ -98,10 +118,9 @@
   // closures, one for each parallel GC thread.
   void set_par_closure(int i, ObjectClosure* closure);
 
-  // If there is a registered closure for buffers, apply it to all entries
-  // in all currently-active buffers.  This should only be applied at a
-  // safepoint.  (Currently must not be called in parallel; this should
-  // change in the future.)
+  // Apply the registered closure to all entries on each
+  // currently-active buffer and then empty the buffer. It should only
+  // be called serially and at a safepoint.
   void iterate_closure_all_threads();
   // Parallel version of the above.
   void par_iterate_closure_all_threads(int worker);
@@ -117,11 +136,21 @@
     return apply_closure_to_completed_buffer_work(true, worker);
   }
 
+  // Apply the given closure on enqueued and currently-active buffers
+  // respectively. Both methods are read-only, i.e., they do not
+  // modify any of the buffers.
+  void iterate_completed_buffers_read_only(ObjectClosure* cl);
+  void iterate_thread_buffers_read_only(ObjectClosure* cl);
+
+#ifndef PRODUCT
+  // Helpful for debugging
+  void print_all(const char* msg);
+#endif // PRODUCT
+
   ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; }
 
   // If a marking is being abandoned, reset any unprocessed log buffers.
   void abandon_partial_marking();
-
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP
--- a/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -56,14 +56,14 @@
                           lowest_non_clean_base_chunk_index,
                           lowest_non_clean_chunk_size);
 
-  int n_strides = n_threads * ParGCStridesPerThread;
+  uint n_strides = n_threads * ParGCStridesPerThread;
   SequentialSubTasksDone* pst = sp->par_seq_tasks();
   // Sets the condition for completion of the subtask (how many threads
   // need to finish in order to be done).
   pst->set_n_threads(n_threads);
   pst->set_n_tasks(n_strides);
 
-  int stride = 0;
+  uint stride = 0;
   while (!pst->is_task_claimed(/* reference */ stride)) {
     process_stride(sp, mr, stride, n_strides, cl, ct,
                    lowest_non_clean,
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -590,7 +590,7 @@
 // called after a task is started.  So "i" is based on
 // first-come-first-served.
 
-void ParNewGenTask::work(int i) {
+void ParNewGenTask::work(uint worker_id) {
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   // Since this is being done in a separate thread, need new resource
   // and handle marks.
@@ -601,8 +601,8 @@
 
   Generation* old_gen = gch->next_gen(_gen);
 
-  ParScanThreadState& par_scan_state = _state_set->thread_state(i);
-  assert(_state_set->is_valid(i), "Should not have been called");
+  ParScanThreadState& par_scan_state = _state_set->thread_state(worker_id);
+  assert(_state_set->is_valid(worker_id), "Should not have been called");
 
   par_scan_state.set_young_old_boundary(_young_old_boundary);
 
@@ -755,7 +755,7 @@
                          ParScanThreadStateSet& state_set);
 
 private:
-  virtual void work(int i);
+  virtual void work(uint worker_id);
   virtual void set_for_termination(int active_workers) {
     _state_set.terminator()->reset_for_reuse(active_workers);
   }
@@ -781,13 +781,13 @@
 {
 }
 
-void ParNewRefProcTaskProxy::work(int i)
+void ParNewRefProcTaskProxy::work(uint worker_id)
 {
   ResourceMark rm;
   HandleMark hm;
-  ParScanThreadState& par_scan_state = _state_set.thread_state(i);
+  ParScanThreadState& par_scan_state = _state_set.thread_state(worker_id);
   par_scan_state.set_young_old_boundary(_young_old_boundary);
-  _task.work(i, par_scan_state.is_alive_closure(),
+  _task.work(worker_id, par_scan_state.is_alive_closure(),
              par_scan_state.keep_alive_closure(),
              par_scan_state.evacuate_followers_closure());
 }
@@ -802,9 +802,9 @@
       _task(task)
   { }
 
-  virtual void work(int i)
+  virtual void work(uint worker_id)
   {
-    _task.work(i);
+    _task.work(worker_id);
   }
 };
 
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -239,7 +239,7 @@
 
   HeapWord* young_old_boundary() { return _young_old_boundary; }
 
-  void work(int i);
+  void work(uint worker_id);
 
   // Reset the terminator in ParScanThreadStateSet for
   // "active_workers" threads.
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -282,7 +282,7 @@
     // large page can be broken down if we require small pages.
     os::realign_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
     // Then we uncommit the pages in the range.
-    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
     // And make them local/first-touch biased.
     os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size(), lgrp_id);
   }
@@ -297,7 +297,7 @@
     assert((intptr_t)aligned_region.start()     % page_size() == 0 &&
            (intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
     assert(region().contains(aligned_region), "Sanity");
-    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
   }
 }
 
@@ -954,7 +954,7 @@
     if (e != scan_end) {
       if ((page_expected.size != page_size || page_expected.lgrp_id != lgrp_id())
           && page_expected.size != 0) {
-        os::free_memory(s, pointer_delta(e, s, sizeof(char)));
+        os::free_memory(s, pointer_delta(e, s, sizeof(char)), page_size);
       }
       page_expected = page_found;
     }
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -51,7 +51,7 @@
       size_t size = pointer_delta(end, start, sizeof(char));
       if (clear_space) {
         // Prefer page reallocation to migration.
-        os::free_memory((char*)start, size);
+        os::free_memory((char*)start, size, page_size);
       }
       os::numa_make_global((char*)start, size);
     }
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -478,18 +478,22 @@
 void CollectedHeap::test_is_in() {
   CollectedHeap* heap = Universe::heap();
 
+  uintptr_t epsilon    = (uintptr_t) MinObjAlignment;
+  uintptr_t heap_start = (uintptr_t) heap->_reserved.start();
+  uintptr_t heap_end   = (uintptr_t) heap->_reserved.end();
+
   // Test that NULL is not in the heap.
   assert(!heap->is_in(NULL), "NULL is unexpectedly in the heap");
 
   // Test that a pointer to before the heap start is reported as outside the heap.
-  assert(heap->_reserved.start() >= (void*)MinObjAlignment, "sanity");
-  void* before_heap = (void*)((intptr_t)heap->_reserved.start() - MinObjAlignment);
+  assert(heap_start >= ((uintptr_t)NULL + epsilon), "sanity");
+  void* before_heap = (void*)(heap_start - epsilon);
   assert(!heap->is_in(before_heap),
       err_msg("before_heap: " PTR_FORMAT " is unexpectedly in the heap", before_heap));
 
   // Test that a pointer to after the heap end is reported as outside the heap.
-  assert(heap->_reserved.end() <= (void*)(uintptr_t(-1) - (uint)MinObjAlignment), "sanity");
-  void* after_heap = (void*)((intptr_t)heap->_reserved.end() + MinObjAlignment);
+  assert(heap_end <= ((uintptr_t)-1 - epsilon), "sanity");
+  void* after_heap = (void*)(heap_end + epsilon);
   assert(!heap->is_in(after_heap),
       err_msg("after_heap: " PTR_FORMAT " is unexpectedly in the heap", after_heap));
 }
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -69,7 +69,7 @@
   MemRegion _reserved;
   BarrierSet* _barrier_set;
   bool _is_gc_active;
-  int _n_par_threads;
+  uint _n_par_threads;
 
   unsigned int _total_collections;          // ... started
   unsigned int _total_full_collections;     // ... started
@@ -309,10 +309,10 @@
   GCCause::Cause gc_cause() { return _gc_cause; }
 
   // Number of threads currently working on GC tasks.
-  int n_par_threads() { return _n_par_threads; }
+  uint n_par_threads() { return _n_par_threads; }
 
   // May be overridden to set additional parallelism.
-  virtual void set_par_threads(int t) { _n_par_threads = t; };
+  virtual void set_par_threads(uint t) { _n_par_threads = t; };
 
   // Preload classes into the shared portion of the heap, and then dump
   // that data to a file so that it can be loaded directly by another
--- a/hotspot/src/share/vm/memory/dump.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/memory/dump.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1402,7 +1402,7 @@
         instanceKlass* ik = (instanceKlass*) k;
         // Link the class to cause the bytecodes to be rewritten and the
         // cpcache to be created.
-        if (ik->get_init_state() < instanceKlass::linked) {
+        if (ik->init_state() < instanceKlass::linked) {
           ik->link_class(THREAD);
           guarantee(!HAS_PENDING_EXCEPTION, "exception in class rewriting");
         }
@@ -1535,7 +1535,7 @@
         // are loaded in order that the related data structures (klass,
         // cpCache, Sting constants) are located together.
 
-        if (ik->get_init_state() < instanceKlass::linked) {
+        if (ik->init_state() < instanceKlass::linked) {
           ik->link_class(THREAD);
           guarantee(!(HAS_PENDING_EXCEPTION), "exception in class rewriting");
         }
--- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -703,7 +703,7 @@
   return collector_policy()->satisfy_failed_allocation(size, is_tlab);
 }
 
-void GenCollectedHeap::set_par_threads(int t) {
+void GenCollectedHeap::set_par_threads(uint t) {
   SharedHeap::set_par_threads(t);
   _gen_process_strong_tasks->set_n_threads(t);
 }
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -419,8 +419,7 @@
   // asserted to be this type.
   static GenCollectedHeap* heap();
 
-  void set_par_threads(int t);
-
+  void set_par_threads(uint t);
 
   // Invoke the "do_oop" method of one of the closures "not_older_gens"
   // or "older_gens" on root locations for the generation at
--- a/hotspot/src/share/vm/memory/referenceProcessor.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/memory/referenceProcessor.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -88,9 +88,9 @@
 
 ReferenceProcessor::ReferenceProcessor(MemRegion span,
                                        bool      mt_processing,
-                                       int       mt_processing_degree,
+                                       uint      mt_processing_degree,
                                        bool      mt_discovery,
-                                       int       mt_discovery_degree,
+                                       uint      mt_discovery_degree,
                                        bool      atomic_discovery,
                                        BoolObjectClosure* is_alive_non_header,
                                        bool      discovered_list_needs_barrier)  :
@@ -105,7 +105,7 @@
   _span = span;
   _discovery_is_atomic = atomic_discovery;
   _discovery_is_mt     = mt_discovery;
-  _num_q               = MAX2(1, mt_processing_degree);
+  _num_q               = MAX2(1U, mt_processing_degree);
   _max_num_q           = MAX2(_num_q, mt_discovery_degree);
   _discovered_refs     = NEW_C_HEAP_ARRAY(DiscoveredList,
                                           _max_num_q * number_of_subclasses_of_ref());
@@ -118,7 +118,7 @@
   _discoveredPhantomRefs = &_discoveredFinalRefs[_max_num_q];
 
   // Initialize all entries to NULL
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     _discovered_refs[i].set_head(NULL);
     _discovered_refs[i].set_length(0);
   }
@@ -133,7 +133,7 @@
 #ifndef PRODUCT
 void ReferenceProcessor::verify_no_references_recorded() {
   guarantee(!_discovering_refs, "Discovering refs?");
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     guarantee(_discovered_refs[i].is_empty(),
               "Found non-empty discovered list");
   }
@@ -141,7 +141,7 @@
 #endif
 
 void ReferenceProcessor::weak_oops_do(OopClosure* f) {
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     if (UseCompressedOops) {
       f->do_oop((narrowOop*)_discovered_refs[i].adr_head());
     } else {
@@ -437,7 +437,7 @@
     task_executor->execute(tsk);
   } else {
     // Serial code: call the parent class's implementation
-    for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+    for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
       enqueue_discovered_reflist(_discovered_refs[i], pending_list_addr);
       _discovered_refs[i].set_head(NULL);
       _discovered_refs[i].set_length(0);
@@ -696,7 +696,7 @@
 
 void ReferenceProcessor::abandon_partial_discovery() {
   // loop over the lists
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) {
       gclog_or_tty->print_cr("\nAbandoning %s discovered list", list_name(i));
     }
@@ -787,7 +787,7 @@
     gclog_or_tty->print_cr("\nBalance ref_lists ");
   }
 
-  for (int i = 0; i < _max_num_q; ++i) {
+  for (uint i = 0; i < _max_num_q; ++i) {
     total_refs += ref_lists[i].length();
     if (TraceReferenceGC && PrintGCDetails) {
       gclog_or_tty->print("%d ", ref_lists[i].length());
@@ -797,8 +797,8 @@
     gclog_or_tty->print_cr(" = %d", total_refs);
   }
   size_t avg_refs = total_refs / _num_q + 1;
-  int to_idx = 0;
-  for (int from_idx = 0; from_idx < _max_num_q; from_idx++) {
+  uint to_idx = 0;
+  for (uint from_idx = 0; from_idx < _max_num_q; from_idx++) {
     bool move_all = false;
     if (from_idx >= _num_q) {
       move_all = ref_lists[from_idx].length() > 0;
@@ -857,7 +857,7 @@
   }
 #ifdef ASSERT
   size_t balanced_total_refs = 0;
-  for (int i = 0; i < _max_num_q; ++i) {
+  for (uint i = 0; i < _max_num_q; ++i) {
     balanced_total_refs += ref_lists[i].length();
     if (TraceReferenceGC && PrintGCDetails) {
       gclog_or_tty->print("%d ", ref_lists[i].length());
@@ -903,7 +903,7 @@
   }
   if (PrintReferenceGC && PrintGCDetails) {
     size_t total = 0;
-    for (int i = 0; i < _max_num_q; ++i) {
+    for (uint i = 0; i < _max_num_q; ++i) {
       total += refs_lists[i].length();
     }
     gclog_or_tty->print(", %u refs", total);
@@ -919,7 +919,7 @@
       RefProcPhase1Task phase1(*this, refs_lists, policy, true /*marks_oops_alive*/);
       task_executor->execute(phase1);
     } else {
-      for (int i = 0; i < _max_num_q; i++) {
+      for (uint i = 0; i < _max_num_q; i++) {
         process_phase1(refs_lists[i], policy,
                        is_alive, keep_alive, complete_gc);
       }
@@ -935,7 +935,7 @@
     RefProcPhase2Task phase2(*this, refs_lists, !discovery_is_atomic() /*marks_oops_alive*/);
     task_executor->execute(phase2);
   } else {
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       process_phase2(refs_lists[i], is_alive, keep_alive, complete_gc);
     }
   }
@@ -946,7 +946,7 @@
     RefProcPhase3Task phase3(*this, refs_lists, clear_referent, true /*marks_oops_alive*/);
     task_executor->execute(phase3);
   } else {
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       process_phase3(refs_lists[i], clear_referent,
                      is_alive, keep_alive, complete_gc);
     }
@@ -955,7 +955,7 @@
 
 void ReferenceProcessor::clean_up_discovered_references() {
   // loop over the lists
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) {
       gclog_or_tty->print_cr(
         "\nScrubbing %s discovered list of Null referents",
@@ -1000,7 +1000,7 @@
 }
 
 inline DiscoveredList* ReferenceProcessor::get_discovered_list(ReferenceType rt) {
-  int id = 0;
+  uint id = 0;
   // Determine the queue index to use for this object.
   if (_discovery_is_mt) {
     // During a multi-threaded discovery phase,
@@ -1282,7 +1282,7 @@
   {
     TraceTime tt("Preclean SoftReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1295,7 +1295,7 @@
   {
     TraceTime tt("Preclean WeakReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1308,7 +1308,7 @@
   {
     TraceTime tt("Preclean FinalReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1321,7 +1321,7 @@
   {
     TraceTime tt("Preclean PhantomReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1386,7 +1386,7 @@
   )
 }
 
-const char* ReferenceProcessor::list_name(int i) {
+const char* ReferenceProcessor::list_name(uint i) {
    assert(i >= 0 && i <= _max_num_q * number_of_subclasses_of_ref(),
           "Out of bounds index");
 
@@ -1410,7 +1410,7 @@
 #ifndef PRODUCT
 void ReferenceProcessor::clear_discovered_references() {
   guarantee(!_discovering_refs, "Discovering refs?");
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     clear_discovered_references(_discovered_refs[i]);
   }
 }
--- a/hotspot/src/share/vm/memory/referenceProcessor.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/memory/referenceProcessor.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -231,7 +231,7 @@
   bool        _enqueuing_is_done;       // true if all weak references enqueued
   bool        _processing_is_mt;        // true during phases when
                                         // reference processing is MT.
-  int         _next_id;                 // round-robin mod _num_q counter in
+  uint        _next_id;                 // round-robin mod _num_q counter in
                                         // support of work distribution
 
   // For collectors that do not keep GC liveness information
@@ -252,9 +252,9 @@
   // The discovered ref lists themselves
 
   // The active MT'ness degree of the queues below
-  int             _num_q;
+  uint             _num_q;
   // The maximum MT'ness degree of the queues below
-  int             _max_num_q;
+  uint             _max_num_q;
 
   // Master array of discovered oops
   DiscoveredList* _discovered_refs;
@@ -268,9 +268,9 @@
  public:
   static int number_of_subclasses_of_ref() { return (REF_PHANTOM - REF_OTHER); }
 
-  int num_q()                              { return _num_q; }
-  int max_num_q()                          { return _max_num_q; }
-  void set_active_mt_degree(int v)         { _num_q = v; }
+  uint num_q()                             { return _num_q; }
+  uint max_num_q()                         { return _max_num_q; }
+  void set_active_mt_degree(uint v)        { _num_q = v; }
 
   DiscoveredList* discovered_refs()        { return _discovered_refs; }
 
@@ -368,7 +368,7 @@
 
   // Returns the name of the discovered reference list
   // occupying the i / _num_q slot.
-  const char* list_name(int i);
+  const char* list_name(uint i);
 
   void enqueue_discovered_reflists(HeapWord* pending_list_addr, AbstractRefProcTaskExecutor* task_executor);
 
@@ -388,8 +388,8 @@
                                    YieldClosure*      yield);
 
   // round-robin mod _num_q (not: _not_ mode _max_num_q)
-  int next_id() {
-    int id = _next_id;
+  uint next_id() {
+    uint id = _next_id;
     if (++_next_id == _num_q) {
       _next_id = 0;
     }
@@ -434,8 +434,8 @@
 
   // Default parameters give you a vanilla reference processor.
   ReferenceProcessor(MemRegion span,
-                     bool mt_processing = false, int mt_processing_degree = 1,
-                     bool mt_discovery  = false, int mt_discovery_degree  = 1,
+                     bool mt_processing = false, uint mt_processing_degree = 1,
+                     bool mt_discovery  = false, uint mt_discovery_degree  = 1,
                      bool atomic_discovery = true,
                      BoolObjectClosure* is_alive_non_header = NULL,
                      bool discovered_list_needs_barrier = false);
--- a/hotspot/src/share/vm/memory/sharedHeap.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/memory/sharedHeap.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -94,7 +94,7 @@
              && _thread_holds_heap_lock_for_gc);
 }
 
-void SharedHeap::set_par_threads(int t) {
+void SharedHeap::set_par_threads(uint t) {
   assert(t == 0 || !UseSerialGC, "Cannot have parallel threads");
   _n_par_threads = t;
   _process_strong_tasks->set_n_threads(t);
--- a/hotspot/src/share/vm/memory/sharedHeap.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/memory/sharedHeap.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -287,7 +287,7 @@
 
   // Sets the number of parallel threads that will be doing tasks
   // (such as process strong roots) subsequently.
-  virtual void set_par_threads(int t);
+  virtual void set_par_threads(uint t);
 
   int n_termination();
   void set_n_termination(int t);
--- a/hotspot/src/share/vm/oops/arrayKlass.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/oops/arrayKlass.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -73,7 +73,7 @@
   oop* adr_component_mirror()           { return (oop*)&this->_component_mirror;}
 
   // Compiler/Interpreter offset
-  static ByteSize component_mirror_offset() { return byte_offset_of(arrayKlass, _component_mirror); }
+  static ByteSize component_mirror_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(arrayKlass, _component_mirror)); }
 
   virtual klassOop java_super() const;//{ return SystemDictionary::Object_klass(); }
 
--- a/hotspot/src/share/vm/oops/instanceKlass.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -208,7 +208,7 @@
   // abort if someone beat us to the initialization
   if (!this_oop->is_not_initialized()) return;  // note: not equivalent to is_initialized()
 
-  ClassState old_state = this_oop->_init_state;
+  ClassState old_state = this_oop->init_state();
   link_class_impl(this_oop, true, THREAD);
   if (HAS_PENDING_EXCEPTION) {
     CLEAR_PENDING_EXCEPTION;
@@ -2479,7 +2479,7 @@
   bool good_state = as_klassOop()->is_shared() ? (_init_state <= state)
                                                : (_init_state < state);
   assert(good_state || state == allocated, "illegal state transition");
-  _init_state = state;
+  _init_state = (u1)state;
 }
 #endif
 
--- a/hotspot/src/share/vm/oops/instanceKlass.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/oops/instanceKlass.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -227,16 +227,16 @@
   // (including inherited fields but after header_size()).
   int             _nonstatic_field_size;
   int             _static_field_size;    // number words used by static fields (oop and non-oop) in this klass
-  int             _static_oop_field_count;// number of static oop fields in this klass
+  u2              _static_oop_field_count;// number of static oop fields in this klass
+  u2              _java_fields_count;    // The number of declared Java fields
   int             _nonstatic_oop_map_size;// size in words of nonstatic oop map blocks
-  int             _java_fields_count;    // The number of declared Java fields
+
   bool            _is_marked_dependent;  // used for marking during flushing and deoptimization
   bool            _rewritten;            // methods rewritten.
   bool            _has_nonstatic_fields; // for sizing with UseCompressedOops
   bool            _should_verify_class;  // allow caching of preverification
   u2              _minor_version;        // minor version number of class file
   u2              _major_version;        // major version number of class file
-  ClassState      _init_state;           // state of class
   Thread*         _init_thread;          // Pointer to current thread doing initialization (to handle recusive initialization)
   int             _vtable_len;           // length of Java vtable (in words)
   int             _itable_len;           // length of Java itable (in words)
@@ -260,6 +260,11 @@
   JvmtiCachedClassFieldMap* _jvmti_cached_class_field_map;  // JVMTI: used during heap iteration
   volatile u2     _idnum_allocated_count;         // JNI/JVMTI: increments with the addition of methods, old ids don't change
 
+  // Class states are defined as ClassState (see above).
+  // Place the _init_state here to utilize the unused 2-byte after
+  // _idnum_allocated_count.
+  u1              _init_state;                    // state of class
+
   // embedded Java vtable follows here
   // embedded Java itables follows here
   // embedded static fields follows here
@@ -279,8 +284,8 @@
   int static_field_size() const            { return _static_field_size; }
   void set_static_field_size(int size)     { _static_field_size = size; }
 
-  int static_oop_field_count() const        { return _static_oop_field_count; }
-  void set_static_oop_field_count(int size) { _static_oop_field_count = size; }
+  int static_oop_field_count() const       { return (int)_static_oop_field_count; }
+  void set_static_oop_field_count(u2 size) { _static_oop_field_count = size; }
 
   // Java vtable
   int  vtable_length() const               { return _vtable_len; }
@@ -320,14 +325,14 @@
   Symbol* field_signature   (int index) const { return field(index)->signature(constants()); }
 
   // Number of Java declared fields
-  int java_fields_count() const           { return _java_fields_count; }
+  int java_fields_count() const           { return (int)_java_fields_count; }
 
   // Number of fields including any injected fields
   int all_fields_count() const            { return _fields->length() / sizeof(FieldInfo::field_slots); }
 
   typeArrayOop fields() const              { return _fields; }
 
-  void set_fields(typeArrayOop f, int java_fields_count) {
+  void set_fields(typeArrayOop f, u2 java_fields_count) {
     oop_store_without_check((oop*) &_fields, (oop) f);
     _java_fields_count = java_fields_count;
   }
@@ -377,7 +382,7 @@
   bool is_being_initialized() const        { return _init_state == being_initialized; }
   bool is_in_error_state() const           { return _init_state == initialization_error; }
   bool is_reentrant_initialization(Thread *thread)  { return thread == _init_thread; }
-  int  get_init_state()                    { return _init_state; } // Useful for debugging
+  ClassState  init_state()                 { return (ClassState)_init_state; }
   bool is_rewritten() const                { return _rewritten; }
 
   // defineClass specified verification
@@ -405,7 +410,7 @@
   ReferenceType reference_type() const     { return _reference_type; }
   void set_reference_type(ReferenceType t) { _reference_type = t; }
 
-  static int reference_type_offset_in_bytes() { return offset_of(instanceKlass, _reference_type); }
+  static ByteSize reference_type_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _reference_type)); }
 
   // find local field, returns true if found
   bool find_local_field(Symbol* name, Symbol* sig, fieldDescriptor* fd) const;
@@ -616,8 +621,8 @@
   void set_breakpoints(BreakpointInfo* bps) { _breakpoints = bps; };
 
   // support for stub routines
-  static int init_state_offset_in_bytes()    { return offset_of(instanceKlass, _init_state); }
-  static int init_thread_offset_in_bytes()   { return offset_of(instanceKlass, _init_thread); }
+  static ByteSize init_state_offset()  { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_state)); }
+  static ByteSize init_thread_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_thread)); }
 
   // subclass/subinterface checks
   bool implements_interface(klassOop k) const;
@@ -754,7 +759,7 @@
 #ifdef ASSERT
   void set_init_state(ClassState state);
 #else
-  void set_init_state(ClassState state) { _init_state = state; }
+  void set_init_state(ClassState state) { _init_state = (u1)state; }
 #endif
   void set_rewritten()                  { _rewritten = true; }
   void set_init_thread(Thread *thread)  { _init_thread = thread; }
--- a/hotspot/src/share/vm/oops/klass.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/oops/klass.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -144,7 +144,7 @@
     }
     kl->set_secondary_supers(NULL);
     oop_store_without_check((oop*) &kl->_primary_supers[0], k);
-    kl->set_super_check_offset(primary_supers_offset_in_bytes() + sizeof(oopDesc));
+    kl->set_super_check_offset(in_bytes(primary_supers_offset()));
   }
 
   kl->set_java_mirror(NULL);
--- a/hotspot/src/share/vm/oops/klass.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/oops/klass.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -313,7 +313,7 @@
   // Can this klass be a primary super?  False for interfaces and arrays of
   // interfaces.  False also for arrays or classes with long super chains.
   bool can_be_primary_super() const {
-    const juint secondary_offset = secondary_super_cache_offset_in_bytes() + sizeof(oopDesc);
+    const juint secondary_offset = in_bytes(secondary_super_cache_offset());
     return super_check_offset() != secondary_offset;
   }
   virtual bool can_be_primary_super_slow() const;
@@ -323,7 +323,7 @@
     if (!can_be_primary_super()) {
       return primary_super_limit();
     } else {
-      juint d = (super_check_offset() - (primary_supers_offset_in_bytes() + sizeof(oopDesc))) / sizeof(klassOop);
+      juint d = (super_check_offset() - in_bytes(primary_supers_offset())) / sizeof(klassOop);
       assert(d < primary_super_limit(), "oob");
       assert(_primary_supers[d] == as_klassOop(), "proper init");
       return d;
@@ -373,15 +373,15 @@
   virtual void set_alloc_size(juint n) = 0;
 
   // Compiler support
-  static int super_offset_in_bytes()         { return offset_of(Klass, _super); }
-  static int super_check_offset_offset_in_bytes() { return offset_of(Klass, _super_check_offset); }
-  static int primary_supers_offset_in_bytes(){ return offset_of(Klass, _primary_supers); }
-  static int secondary_super_cache_offset_in_bytes() { return offset_of(Klass, _secondary_super_cache); }
-  static int secondary_supers_offset_in_bytes() { return offset_of(Klass, _secondary_supers); }
-  static int java_mirror_offset_in_bytes()   { return offset_of(Klass, _java_mirror); }
-  static int modifier_flags_offset_in_bytes(){ return offset_of(Klass, _modifier_flags); }
-  static int layout_helper_offset_in_bytes() { return offset_of(Klass, _layout_helper); }
-  static int access_flags_offset_in_bytes()  { return offset_of(Klass, _access_flags); }
+  static ByteSize super_offset()                 { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _super)); }
+  static ByteSize super_check_offset_offset()    { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _super_check_offset)); }
+  static ByteSize primary_supers_offset()        { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _primary_supers)); }
+  static ByteSize secondary_super_cache_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _secondary_super_cache)); }
+  static ByteSize secondary_supers_offset()      { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _secondary_supers)); }
+  static ByteSize java_mirror_offset()           { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _java_mirror)); }
+  static ByteSize modifier_flags_offset()        { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _modifier_flags)); }
+  static ByteSize layout_helper_offset()         { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _layout_helper)); }
+  static ByteSize access_flags_offset()          { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _access_flags)); }
 
   // Unpacking layout_helper:
   enum {
@@ -478,7 +478,7 @@
   bool is_subtype_of(klassOop k) const {
     juint    off = k->klass_part()->super_check_offset();
     klassOop sup = *(klassOop*)( (address)as_klassOop() + off );
-    const juint secondary_offset = secondary_super_cache_offset_in_bytes() + sizeof(oopDesc);
+    const juint secondary_offset = in_bytes(secondary_super_cache_offset());
     if (sup == k) {
       return true;
     } else if (off != secondary_offset) {
@@ -674,7 +674,7 @@
   // are potential problems in setting the bias pattern for
   // JVM-internal oops.
   inline void set_prototype_header(markOop header);
-  static int prototype_header_offset_in_bytes() { return offset_of(Klass, _prototype_header); }
+  static ByteSize prototype_header_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _prototype_header)); }
 
   int  biased_lock_revocation_count() const { return (int) _biased_lock_revocation_count; }
   // Atomically increments biased_lock_revocation_count and returns updated value
--- a/hotspot/src/share/vm/oops/klassOop.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/oops/klassOop.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -38,14 +38,8 @@
 
 class klassOopDesc : public oopDesc {
  public:
-  // size operation
-  static int header_size()                       { return sizeof(klassOopDesc)/HeapWordSize; }
-
-  // support for code generation
-  static int klass_part_offset_in_bytes()        { return sizeof(klassOopDesc); }
-
   // returns the Klass part containing dispatching behavior
-  Klass* klass_part() const                      { return (Klass*)((address)this + klass_part_offset_in_bytes()); }
+  Klass* klass_part() const                      { return (Klass*)((address)this + sizeof(klassOopDesc)); }
 
   // Convenience wrapper
   inline oop java_mirror() const;
--- a/hotspot/src/share/vm/oops/objArrayKlass.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/oops/objArrayKlass.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -47,7 +47,7 @@
   oop* bottom_klass_addr()            { return (oop*)&_bottom_klass; }
 
   // Compiler/Interpreter offset
-  static int element_klass_offset_in_bytes() { return offset_of(objArrayKlass, _element_klass); }
+  static ByteSize element_klass_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(objArrayKlass, _element_klass)); }
 
   // Dispatched operation
   bool can_be_primary_super_slow() const;
--- a/hotspot/src/share/vm/opto/c2_globals.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -426,6 +426,9 @@
   product(bool, EliminateLocks, true,                                       \
           "Coarsen locks when possible")                                    \
                                                                             \
+  product(bool, EliminateNestedLocks, true,                                 \
+          "Eliminate nested locks of the same object when possible")        \
+                                                                            \
   notproduct(bool, PrintLockStatistics, false,                              \
           "Print precise statistics on the dynamic lock usage")             \
                                                                             \
--- a/hotspot/src/share/vm/opto/callnode.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/callnode.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -400,10 +400,10 @@
       Node *box = mcall->monitor_box(this, i);
       Node *obj = mcall->monitor_obj(this, i);
       if ( OptoReg::is_valid(regalloc->get_reg_first(box)) ) {
-        while( !box->is_BoxLock() )  box = box->in(1);
+        box = BoxLockNode::box_node(box);
         format_helper( regalloc, st, box, "MON-BOX[", i, &scobjs );
       } else {
-        OptoReg::Name box_reg = BoxLockNode::stack_slot(box);
+        OptoReg::Name box_reg = BoxLockNode::reg(box);
         st->print(" MON-BOX%d=%s+%d",
                    i,
                    OptoReg::regname(OptoReg::c_frame_pointer),
@@ -411,8 +411,7 @@
       }
       const char* obj_msg = "MON-OBJ[";
       if (EliminateLocks) {
-        while( !box->is_BoxLock() )  box = box->in(1);
-        if (box->as_BoxLock()->is_eliminated())
+        if (BoxLockNode::box_node(box)->is_eliminated())
           obj_msg = "MON-OBJ(LOCK ELIMINATED)[";
       }
       format_helper( regalloc, st, obj, obj_msg, i, &scobjs );
@@ -1387,8 +1386,9 @@
     Node *n = ctrl_proj->in(0);
     if (n != NULL && n->is_Unlock()) {
       UnlockNode *unlock = n->as_Unlock();
-      if ((lock->obj_node() == unlock->obj_node()) &&
-          (lock->box_node() == unlock->box_node()) && !unlock->is_eliminated()) {
+      if (lock->obj_node()->eqv_uncast(unlock->obj_node()) &&
+          BoxLockNode::same_slot(lock->box_node(), unlock->box_node()) &&
+          !unlock->is_eliminated()) {
         lock_ops.append(unlock);
         return true;
       }
@@ -1431,8 +1431,8 @@
   }
   if (ctrl->is_Lock()) {
     LockNode *lock = ctrl->as_Lock();
-    if ((lock->obj_node() == unlock->obj_node()) &&
-            (lock->box_node() == unlock->box_node())) {
+    if (lock->obj_node()->eqv_uncast(unlock->obj_node()) &&
+        BoxLockNode::same_slot(lock->box_node(), unlock->box_node())) {
       lock_result = lock;
     }
   }
@@ -1462,8 +1462,9 @@
       }
       if (lock1_node != NULL && lock1_node->is_Lock()) {
         LockNode *lock1 = lock1_node->as_Lock();
-        if ((lock->obj_node() == lock1->obj_node()) &&
-            (lock->box_node() == lock1->box_node()) && !lock1->is_eliminated()) {
+        if (lock->obj_node()->eqv_uncast(lock1->obj_node()) &&
+            BoxLockNode::same_slot(lock->box_node(), lock1->box_node()) &&
+            !lock1->is_eliminated()) {
           lock_ops.append(lock1);
           return true;
         }
@@ -1507,19 +1508,16 @@
 void AbstractLockNode::create_lock_counter(JVMState* state) {
   _counter = OptoRuntime::new_named_counter(state, NamedCounter::LockCounter);
 }
-#endif
 
-void AbstractLockNode::set_eliminated() {
-  _eliminate = true;
-#ifndef PRODUCT
+void AbstractLockNode::set_eliminated_lock_counter() {
   if (_counter) {
     // Update the counter to indicate that this lock was eliminated.
     // The counter update code will stay around even though the
     // optimizer will eliminate the lock operation itself.
     _counter->set_tag(NamedCounter::EliminatedLockCounter);
   }
+}
 #endif
-}
 
 //=============================================================================
 Node *LockNode::Ideal(PhaseGVN *phase, bool can_reshape) {
@@ -1535,7 +1533,7 @@
   // prevents macro expansion from expanding the lock.  Since we don't
   // modify the graph, the value returned from this function is the
   // one computed above.
-  if (can_reshape && EliminateLocks && (!is_eliminated() || is_coarsened())) {
+  if (can_reshape && EliminateLocks && !is_non_esc_obj()) {
     //
     // If we are locking an unescaped object, the lock/unlock is unnecessary
     //
@@ -1544,16 +1542,11 @@
     if (cgr != NULL)
       es = cgr->escape_state(obj_node());
     if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-      if (!is_eliminated()) {
-        // Mark it eliminated to update any counters
-        this->set_eliminated();
-      } else {
-        assert(is_coarsened(), "sanity");
-        // The lock could be marked eliminated by lock coarsening
-        // code during first IGVN before EA. Clear coarsened flag
-        // to eliminate all associated locks/unlocks.
-        this->clear_coarsened();
-      }
+      assert(!is_eliminated() || is_coarsened(), "sanity");
+      // The lock could be marked eliminated by lock coarsening
+      // code during first IGVN before EA. Replace coarsened flag
+      // to eliminate all associated locks/unlocks.
+      this->set_non_esc_obj();
       return result;
     }
 
@@ -1613,8 +1606,7 @@
         for (int i = 0; i < lock_ops.length(); i++) {
           AbstractLockNode* lock = lock_ops.at(i);
 
-          // Mark it eliminated to update any counters
-          lock->set_eliminated();
+          // Mark it eliminated by coarsening and update any counters
           lock->set_coarsened();
         }
       } else if (ctrl->is_Region() &&
@@ -1632,6 +1624,40 @@
 }
 
 //=============================================================================
+bool LockNode::is_nested_lock_region() {
+  BoxLockNode* box = box_node()->as_BoxLock();
+  int stk_slot = box->stack_slot();
+  if (stk_slot <= 0)
+    return false; // External lock or it is not Box (Phi node).
+
+  // Ignore complex cases: merged locks or multiple locks.
+  Node* obj = obj_node();
+  LockNode* unique_lock = NULL;
+  if (!box->is_simple_lock_region(&unique_lock, obj) ||
+      (unique_lock != this)) {
+    return false;
+  }
+
+  // Look for external lock for the same object.
+  SafePointNode* sfn = this->as_SafePoint();
+  JVMState* youngest_jvms = sfn->jvms();
+  int max_depth = youngest_jvms->depth();
+  for (int depth = 1; depth <= max_depth; depth++) {
+    JVMState* jvms = youngest_jvms->of_depth(depth);
+    int num_mon  = jvms->nof_monitors();
+    // Loop over monitors
+    for (int idx = 0; idx < num_mon; idx++) {
+      Node* obj_node = sfn->monitor_obj(jvms, idx);
+      BoxLockNode* box_node = sfn->monitor_box(jvms, idx)->as_BoxLock();
+      if ((box_node->stack_slot() < stk_slot) && obj_node->eqv_uncast(obj)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+//=============================================================================
 uint UnlockNode::size_of() const { return sizeof(*this); }
 
 //=============================================================================
@@ -1649,7 +1675,7 @@
   // modify the graph, the value returned from this function is the
   // one computed above.
   // Escape state is defined after Parse phase.
-  if (can_reshape && EliminateLocks && (!is_eliminated() || is_coarsened())) {
+  if (can_reshape && EliminateLocks && !is_non_esc_obj()) {
     //
     // If we are unlocking an unescaped object, the lock/unlock is unnecessary.
     //
@@ -1658,16 +1684,11 @@
     if (cgr != NULL)
       es = cgr->escape_state(obj_node());
     if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-      if (!is_eliminated()) {
-        // Mark it eliminated to update any counters
-        this->set_eliminated();
-      } else {
-        assert(is_coarsened(), "sanity");
-        // The lock could be marked eliminated by lock coarsening
-        // code during first IGVN before EA. Clear coarsened flag
-        // to eliminate all associated locks/unlocks.
-        this->clear_coarsened();
-      }
+      assert(!is_eliminated() || is_coarsened(), "sanity");
+      // The lock could be marked eliminated by lock coarsening
+      // code during first IGVN before EA. Replace coarsened flag
+      // to eliminate all associated locks/unlocks.
+      this->set_non_esc_obj();
     }
   }
   return result;
--- a/hotspot/src/share/vm/opto/callnode.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/callnode.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -791,6 +791,10 @@
   // are defined in graphKit.cpp, which sets up the bidirectional relation.)
   InitializeNode* initialization();
 
+  // Return the corresponding storestore barrier (or null if none).
+  // Walks out edges to find it...
+  MemBarStoreStoreNode* storestore();
+
   // Convenience for initialization->maybe_set_complete(phase)
   bool maybe_set_complete(PhaseGVN* phase);
 };
@@ -836,8 +840,12 @@
 //------------------------------AbstractLockNode-----------------------------------
 class AbstractLockNode: public CallNode {
 private:
-  bool _eliminate;    // indicates this lock can be safely eliminated
-  bool _coarsened;    // indicates this lock was coarsened
+  enum {
+    Regular = 0,  // Normal lock
+    NonEscObj,    // Lock is used for non escaping object
+    Coarsened,    // Lock was coarsened
+    Nested        // Nested lock
+  } _kind;
 #ifndef PRODUCT
   NamedCounter* _counter;
 #endif
@@ -854,12 +862,13 @@
                                GrowableArray<AbstractLockNode*> &lock_ops);
   LockNode *find_matching_lock(UnlockNode* unlock);
 
+  // Update the counter to indicate that this lock was eliminated.
+  void set_eliminated_lock_counter() PRODUCT_RETURN;
 
 public:
   AbstractLockNode(const TypeFunc *tf)
     : CallNode(tf, NULL, TypeRawPtr::BOTTOM),
-      _coarsened(false),
-      _eliminate(false)
+      _kind(Regular)
   {
 #ifndef PRODUCT
     _counter = NULL;
@@ -869,20 +878,23 @@
   Node *   obj_node() const       {return in(TypeFunc::Parms + 0); }
   Node *   box_node() const       {return in(TypeFunc::Parms + 1); }
   Node *   fastlock_node() const  {return in(TypeFunc::Parms + 2); }
+  void     set_box_node(Node* box) { set_req(TypeFunc::Parms + 1, box); }
+
   const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
 
   virtual uint size_of() const { return sizeof(*this); }
 
-  bool is_eliminated()         {return _eliminate; }
-  // mark node as eliminated and update the counter if there is one
-  void set_eliminated();
+  bool is_eliminated()  const { return (_kind != Regular); }
+  bool is_non_esc_obj() const { return (_kind == NonEscObj); }
+  bool is_coarsened()   const { return (_kind == Coarsened); }
+  bool is_nested()      const { return (_kind == Nested); }
 
-  bool is_coarsened()  { return _coarsened; }
-  void set_coarsened() { _coarsened = true; }
-  void clear_coarsened() { _coarsened = false; }
+  void set_non_esc_obj() { _kind = NonEscObj; set_eliminated_lock_counter(); }
+  void set_coarsened()   { _kind = Coarsened; set_eliminated_lock_counter(); }
+  void set_nested()      { _kind = Nested; set_eliminated_lock_counter(); }
 
   // locking does not modify its arguments
-  virtual bool        may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;}
+  virtual bool may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;}
 
 #ifndef PRODUCT
   void create_lock_counter(JVMState* s);
@@ -932,6 +944,8 @@
   virtual void  clone_jvms() {
     set_jvms(jvms()->clone_deep(Compile::current()));
   }
+
+  bool is_nested_lock_region(); // Is this Lock nested?
 };
 
 //------------------------------Unlock---------------------------------------
--- a/hotspot/src/share/vm/opto/cfgnode.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1597,7 +1597,7 @@
       bool is_loop = (r->is_Loop() && r->req() == 3);
       // Then, check if there is a data loop when phi references itself directly
       // or through other data nodes.
-      if (is_loop && !phase->eqv_uncast(uin, in(LoopNode::EntryControl)) ||
+      if (is_loop && !uin->eqv_uncast(in(LoopNode::EntryControl)) ||
          !is_loop && is_unsafe_data_reference(uin)) {
         // Break this data loop to avoid creation of a dead loop.
         if (can_reshape) {
--- a/hotspot/src/share/vm/opto/chaitin.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/chaitin.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -485,7 +485,11 @@
     return yank_if_dead(old, current_block, &value, &regnd);
   }
 
-  int yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd );
+  int yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
+    return yank_if_dead_recurse(old, old, current_block, value, regnd);
+  }
+  int yank_if_dead_recurse(Node *old, Node *orig_old, Block *current_block,
+                           Node_List *value, Node_List *regnd);
   int yank( Node *old, Block *current_block, Node_List *value, Node_List *regnd );
   int elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List &regnd, bool can_change_regs );
   int use_prior_register( Node *copy, uint idx, Node *def, Block *current_block, Node_List &value, Node_List &regnd );
--- a/hotspot/src/share/vm/opto/classes.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/classes.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -166,6 +166,7 @@
 macro(MemBarRelease)
 macro(MemBarReleaseLock)
 macro(MemBarVolatile)
+macro(MemBarStoreStore)
 macro(MergeMem)
 macro(MinI)
 macro(ModD)
--- a/hotspot/src/share/vm/opto/compile.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/compile.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1282,12 +1282,11 @@
   if( tk ) {
     // If we are referencing a field within a Klass, we need
     // to assume the worst case of an Object.  Both exact and
-    // inexact types must flatten to the same alias class.
-    // Since the flattened result for a klass is defined to be
-    // precisely java.lang.Object, use a constant ptr.
+    // inexact types must flatten to the same alias class so
+    // use NotNull as the PTR.
     if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {
 
-      tj = tk = TypeKlassPtr::make(TypePtr::Constant,
+      tj = tk = TypeKlassPtr::make(TypePtr::NotNull,
                                    TypeKlassPtr::OBJECT->klass(),
                                    offset);
     }
@@ -1307,10 +1306,12 @@
     // these 2 disparate memories into the same alias class.  Since the
     // primary supertype array is read-only, there's no chance of confusion
     // where we bypass an array load and an array store.
-    uint off2 = offset - Klass::primary_supers_offset_in_bytes();
-    if( offset == Type::OffsetBot ||
-        off2 < Klass::primary_super_limit()*wordSize ) {
-      offset = sizeof(oopDesc) +Klass::secondary_super_cache_offset_in_bytes();
+    int primary_supers_offset = in_bytes(Klass::primary_supers_offset());
+    if (offset == Type::OffsetBot ||
+        (offset >= primary_supers_offset &&
+         offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) ||
+        offset == (int)in_bytes(Klass::secondary_super_cache_offset())) {
+      offset = in_bytes(Klass::secondary_super_cache_offset());
       tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
     }
   }
@@ -1489,13 +1490,13 @@
         alias_type(idx)->set_rewritable(false);
     }
     if (flat->isa_klassptr()) {
-      if (flat->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc))
+      if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))
         alias_type(idx)->set_rewritable(false);
-      if (flat->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc))
+      if (flat->offset() == in_bytes(Klass::modifier_flags_offset()))
         alias_type(idx)->set_rewritable(false);
-      if (flat->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc))
+      if (flat->offset() == in_bytes(Klass::access_flags_offset()))
         alias_type(idx)->set_rewritable(false);
-      if (flat->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc))
+      if (flat->offset() == in_bytes(Klass::java_mirror_offset()))
         alias_type(idx)->set_rewritable(false);
     }
     // %%% (We would like to finalize JavaThread::threadObj_offset(),
@@ -2521,7 +2522,7 @@
             break;
           }
         }
-        assert(p != NULL, "must be found");
+        assert(proj != NULL, "must be found");
         p->subsume_by(proj);
       }
     }
--- a/hotspot/src/share/vm/opto/escape.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/escape.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1595,6 +1595,7 @@
   GrowableArray<Node*> alloc_worklist;
   GrowableArray<Node*> addp_worklist;
   GrowableArray<Node*> ptr_cmp_worklist;
+  GrowableArray<Node*> storestore_worklist;
   PhaseGVN* igvn = _igvn;
 
   // Push all useful nodes onto CG list and set their type.
@@ -1618,6 +1619,11 @@
                (n->Opcode() == Op_CmpP || n->Opcode() == Op_CmpN)) {
       // Compare pointers nodes
       ptr_cmp_worklist.append(n);
+    } else if (n->is_MemBarStoreStore()) {
+      // Collect all MemBarStoreStore nodes so that depending on the
+      // escape status of the associated Allocate node some of them
+      // may be eliminated.
+      storestore_worklist.append(n);
     }
     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
       Node* m = n->fast_out(i);   // Get user
@@ -1724,11 +1730,20 @@
   uint alloc_length = alloc_worklist.length();
   for (uint next = 0; next < alloc_length; ++next) {
     Node* n = alloc_worklist.at(next);
-    if (ptnode_adr(n->_idx)->escape_state() == PointsToNode::NoEscape) {
+    PointsToNode::EscapeState es = ptnode_adr(n->_idx)->escape_state();
+    if (es == PointsToNode::NoEscape) {
       has_non_escaping_obj = true;
       if (n->is_Allocate()) {
         find_init_values(n, &visited, igvn);
+        // The object allocated by this Allocate node will never be
+        // seen by an other thread. Mark it so that when it is
+        // expanded no MemBarStoreStore is added.
+        n->as_Allocate()->initialization()->set_does_not_escape();
       }
+    } else if ((es == PointsToNode::ArgEscape) && n->is_Allocate()) {
+      // Same as above. Mark this Allocate node so that when it is
+      // expanded no MemBarStoreStore is added.
+      n->as_Allocate()->initialization()->set_does_not_escape();
     }
   }
 
@@ -1827,20 +1842,15 @@
       Node *n = C->macro_node(i);
       if (n->is_AbstractLock()) { // Lock and Unlock nodes
         AbstractLockNode* alock = n->as_AbstractLock();
-        if (!alock->is_eliminated() || alock->is_coarsened()) {
+        if (!alock->is_non_esc_obj()) {
           PointsToNode::EscapeState es = escape_state(alock->obj_node());
           assert(es != PointsToNode::UnknownEscape, "should know");
           if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-            if (!alock->is_eliminated()) {
-              // Mark it eliminated to update any counters
-              alock->set_eliminated();
-            } else {
-              // The lock could be marked eliminated by lock coarsening
-              // code during first IGVN before EA. Clear coarsened flag
-              // to eliminate all associated locks/unlocks and relock
-              // during deoptimization.
-              alock->clear_coarsened();
-            }
+            assert(!alock->is_eliminated() || alock->is_coarsened(), "sanity");
+            // The lock could be marked eliminated by lock coarsening
+            // code during first IGVN before EA. Replace coarsened flag
+            // to eliminate all associated locks/unlocks.
+            alock->set_non_esc_obj();
           }
         }
       }
@@ -1874,6 +1884,25 @@
       igvn->hash_delete(_pcmp_eq);
   }
 
+  // For MemBarStoreStore nodes added in library_call.cpp, check
+  // escape status of associated AllocateNode and optimize out
+  // MemBarStoreStore node if the allocated object never escapes.
+  while (storestore_worklist.length() != 0) {
+    Node *n = storestore_worklist.pop();
+    MemBarStoreStoreNode *storestore = n ->as_MemBarStoreStore();
+    Node *alloc = storestore->in(MemBarNode::Precedent)->in(0);
+    assert (alloc->is_Allocate(), "storestore should point to AllocateNode");
+    PointsToNode::EscapeState es = ptnode_adr(alloc->_idx)->escape_state();
+    if (es == PointsToNode::NoEscape || es == PointsToNode::ArgEscape) {
+      MemBarNode* mb = MemBarNode::make(C, Op_MemBarCPUOrder, Compile::AliasIdxBot);
+      mb->init_req(TypeFunc::Memory, storestore->in(TypeFunc::Memory));
+      mb->init_req(TypeFunc::Control, storestore->in(TypeFunc::Control));
+
+      _igvn->register_new_node_with_optimizer(mb);
+      _igvn->replace_node(storestore, mb);
+    }
+  }
+
 #ifndef PRODUCT
   if (PrintEscapeAnalysis) {
     dump(); // Dump ConnectionGraph
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -2304,9 +2304,9 @@
   // will always succeed.  We could leave a dependency behind to ensure this.
 
   // First load the super-klass's check-offset
-  Node *p1 = basic_plus_adr( superklass, superklass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() );
+  Node *p1 = basic_plus_adr( superklass, superklass, in_bytes(Klass::super_check_offset_offset()) );
   Node *chk_off = _gvn.transform( new (C, 3) LoadINode( NULL, memory(p1), p1, _gvn.type(p1)->is_ptr() ) );
-  int cacheoff_con = sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes();
+  int cacheoff_con = in_bytes(Klass::secondary_super_cache_offset());
   bool might_be_cache = (find_int_con(chk_off, cacheoff_con) == cacheoff_con);
 
   // Load from the sub-klass's super-class display list, or a 1-word cache of
@@ -2934,7 +2934,7 @@
     }
   }
   constant_value = Klass::_lh_neutral_value;  // put in a known value
-  Node* lhp = basic_plus_adr(klass_node, klass_node, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc));
+  Node* lhp = basic_plus_adr(klass_node, klass_node, in_bytes(Klass::layout_helper_offset()));
   return make_load(NULL, lhp, TypeInt::INT, T_INT);
 }
 
@@ -3337,6 +3337,19 @@
   return NULL;
 }
 
+// Trace Allocate -> Proj[Parm] -> MemBarStoreStore
+MemBarStoreStoreNode* AllocateNode::storestore() {
+  ProjNode* rawoop = proj_out(AllocateNode::RawAddress);
+  if (rawoop == NULL)  return NULL;
+  for (DUIterator_Fast imax, i = rawoop->fast_outs(imax); i < imax; i++) {
+    Node* storestore = rawoop->fast_out(i);
+    if (storestore->is_MemBarStoreStore()) {
+      return storestore->as_MemBarStoreStore();
+    }
+  }
+  return NULL;
+}
+
 //----------------------------- loop predicates ---------------------------
 
 //------------------------------add_predicate_impl----------------------------
--- a/hotspot/src/share/vm/opto/library_call.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -819,7 +819,7 @@
   if (stopped())
     return NULL;                // already stopped
   bool zero_offset = _gvn.type(offset) == TypeInt::ZERO;
-  if (zero_offset && _gvn.eqv_uncast(subseq_length, array_length))
+  if (zero_offset && subseq_length->eqv_uncast(array_length))
     return NULL;                // common case of whole-array copy
   Node* last = subseq_length;
   if (!zero_offset)             // last += offset
@@ -2165,8 +2165,7 @@
   IdealKit ideal(this);
 #define __ ideal.
 
-  const int reference_type_offset = instanceKlass::reference_type_offset_in_bytes() +
-                                        sizeof(oopDesc);
+  const int reference_type_offset = in_bytes(instanceKlass::reference_type_offset());
 
   Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
 
@@ -2806,8 +2805,10 @@
   // Note:  The argument might still be an illegal value like
   // Serializable.class or Object[].class.   The runtime will handle it.
   // But we must make an explicit check for initialization.
-  Node* insp = basic_plus_adr(kls, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc));
-  Node* inst = make_load(NULL, insp, TypeInt::INT, T_INT);
+  Node* insp = basic_plus_adr(kls, in_bytes(instanceKlass::init_state_offset()));
+  // Use T_BOOLEAN for instanceKlass::_init_state so the compiler
+  // can generate code to load it as unsigned byte.
+  Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN);
   Node* bits = intcon(instanceKlass::fully_initialized);
   Node* test = _gvn.transform( new (C, 3) SubINode(inst, bits) );
   // The 'test' is non-zero if we need to take a slow path.
@@ -2954,7 +2955,7 @@
 //---------------------------load_mirror_from_klass----------------------------
 // Given a klass oop, load its java mirror (a java.lang.Class oop).
 Node* LibraryCallKit::load_mirror_from_klass(Node* klass) {
-  Node* p = basic_plus_adr(klass, Klass::java_mirror_offset_in_bytes() + sizeof(oopDesc));
+  Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset()));
   return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT);
 }
 
@@ -2994,7 +2995,7 @@
 Node* LibraryCallKit::generate_access_flags_guard(Node* kls, int modifier_mask, int modifier_bits, RegionNode* region) {
   // Branch around if the given klass has the given modifier bit set.
   // Like generate_guard, adds a new path onto the region.
-  Node* modp = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+  Node* modp = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset()));
   Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT);
   Node* mask = intcon(modifier_mask);
   Node* bits = intcon(modifier_bits);
@@ -3115,7 +3116,7 @@
     break;
 
   case vmIntrinsics::_getModifiers:
-    p = basic_plus_adr(kls, Klass::modifier_flags_offset_in_bytes() + sizeof(oopDesc));
+    p = basic_plus_adr(kls, in_bytes(Klass::modifier_flags_offset()));
     query_value = make_load(NULL, p, TypeInt::INT, T_INT);
     break;
 
@@ -3155,7 +3156,7 @@
       // A guard was added.  If the guard is taken, it was an array.
       phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
     // If we fall through, it's a plain class.  Get its _super.
-    p = basic_plus_adr(kls, Klass::super_offset_in_bytes() + sizeof(oopDesc));
+    p = basic_plus_adr(kls, in_bytes(Klass::super_offset()));
     kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL) );
     null_ctl = top();
     kls = null_check_oop(kls, &null_ctl);
@@ -3173,7 +3174,7 @@
     if (generate_array_guard(kls, region) != NULL) {
       // Be sure to pin the oop load to the guard edge just created:
       Node* is_array_ctrl = region->in(region->req()-1);
-      Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()) + sizeof(oopDesc));
+      Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()));
       Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT);
       phi->add_req(cmo);
     }
@@ -3181,7 +3182,7 @@
     break;
 
   case vmIntrinsics::_getClassAccessFlags:
-    p = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+    p = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset()));
     query_value = make_load(NULL, p, TypeInt::INT, T_INT);
     break;
 
@@ -4194,12 +4195,17 @@
   Node* raw_obj = alloc_obj->in(1);
   assert(alloc_obj->is_CheckCastPP() && raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
 
+  AllocateNode* alloc = NULL;
   if (ReduceBulkZeroing) {
     // We will be completely responsible for initializing this object -
     // mark Initialize node as complete.
-    AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
+    alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
     // The object was just allocated - there should be no any stores!
     guarantee(alloc != NULL && alloc->maybe_set_complete(&_gvn), "");
+    // Mark as complete_with_arraycopy so that on AllocateNode
+    // expansion, we know this AllocateNode is initialized by an array
+    // copy and a StoreStore barrier exists after the array copy.
+    alloc->initialization()->set_complete_with_arraycopy();
   }
 
   // Copy the fastest available way.
@@ -4261,7 +4267,18 @@
   }
 
   // Do not let reads from the cloned object float above the arraycopy.
-  insert_mem_bar(Op_MemBarCPUOrder);
+  if (alloc != NULL) {
+    // Do not let stores that initialize this object be reordered with
+    // a subsequent store that would make this object accessible by
+    // other threads.
+    // Record what AllocateNode this StoreStore protects so that
+    // escape analysis can go from the MemBarStoreStoreNode to the
+    // AllocateNode and eliminate the MemBarStoreStoreNode if possible
+    // based on the escape status of the AllocateNode.
+    insert_mem_bar(Op_MemBarStoreStore, alloc->proj_out(AllocateNode::RawAddress));
+  } else {
+    insert_mem_bar(Op_MemBarCPUOrder);
+  }
 }
 
 //------------------------inline_native_clone----------------------------
@@ -4650,7 +4667,7 @@
   if (ReduceBulkZeroing
       && !ZeroTLAB              // pointless if already zeroed
       && basic_elem_type != T_CONFLICT // avoid corner case
-      && !_gvn.eqv_uncast(src, dest)
+      && !src->eqv_uncast(dest)
       && ((alloc = tightly_coupled_allocation(dest, slow_region))
           != NULL)
       && _gvn.find_int_con(alloc->in(AllocateNode::ALength), 1) > 0
@@ -4728,7 +4745,7 @@
     // copy_length is 0.
     if (!stopped() && dest_uninitialized) {
       Node* dest_length = alloc->in(AllocateNode::ALength);
-      if (_gvn.eqv_uncast(copy_length, dest_length)
+      if (copy_length->eqv_uncast(dest_length)
           || _gvn.find_int_con(dest_length, 1) <= 0) {
         // There is no zeroing to do. No need for a secondary raw memory barrier.
       } else {
@@ -4774,7 +4791,7 @@
     // with its attendant messy index arithmetic, and upgrade
     // the copy to a more hardware-friendly word size of 64 bits.
     Node* tail_ctl = NULL;
-    if (!stopped() && !_gvn.eqv_uncast(dest_tail, dest_length)) {
+    if (!stopped() && !dest_tail->eqv_uncast(dest_length)) {
       Node* cmp_lt   = _gvn.transform( new(C,3) CmpINode(dest_tail, dest_length) );
       Node* bol_lt   = _gvn.transform( new(C,2) BoolNode(cmp_lt, BoolTest::lt) );
       tail_ctl = generate_slow_guard(bol_lt, NULL);
@@ -4857,7 +4874,7 @@
       PreserveJVMState pjvms(this);
       set_control(not_subtype_ctrl);
       // (At this point we can assume disjoint_bases, since types differ.)
-      int ek_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
+      int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
       Node* p1 = basic_plus_adr(dest_klass, ek_offset);
       Node* n1 = LoadKlassNode::make(_gvn, immutable_memory(), p1, TypeRawPtr::BOTTOM);
       Node* dest_elem_klass = _gvn.transform(n1);
@@ -5004,7 +5021,16 @@
   // the membar also.
   //
   // Do not let reads from the cloned object float above the arraycopy.
-  if (InsertMemBarAfterArraycopy || alloc != NULL)
+  if (alloc != NULL) {
+    // Do not let stores that initialize this object be reordered with
+    // a subsequent store that would make this object accessible by
+    // other threads.
+    // Record what AllocateNode this StoreStore protects so that
+    // escape analysis can go from the MemBarStoreStoreNode to the
+    // AllocateNode and eliminate the MemBarStoreStoreNode if possible
+    // based on the escape status of the AllocateNode.
+    insert_mem_bar(Op_MemBarStoreStore, alloc->proj_out(AllocateNode::RawAddress));
+  } else if (InsertMemBarAfterArraycopy)
     insert_mem_bar(Op_MemBarCPUOrder);
 }
 
@@ -5308,7 +5334,7 @@
   // for the target array.  This is an optimistic check.  It will
   // look in each non-null element's class, at the desired klass's
   // super_check_offset, for the desired klass.
-  int sco_offset = Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc);
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
   Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset);
   Node* n3 = new(C, 3) LoadINode(NULL, memory(p3), p3, _gvn.type(p3)->is_ptr());
   Node* check_offset = ConvI2X(_gvn.transform(n3));
--- a/hotspot/src/share/vm/opto/locknode.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/locknode.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -49,18 +49,22 @@
 
 //-----------------------------hash--------------------------------------------
 uint BoxLockNode::hash() const {
+  if (EliminateNestedLocks)
+    return NO_HASH; // Each locked region has own BoxLock node
   return Node::hash() + _slot + (_is_eliminated ? Compile::current()->fixed_slots() : 0);
 }
 
 //------------------------------cmp--------------------------------------------
 uint BoxLockNode::cmp( const Node &n ) const {
+  if (EliminateNestedLocks)
+    return (&n == this); // Always fail except on self
   const BoxLockNode &bn = (const BoxLockNode &)n;
   return bn._slot == _slot && bn._is_eliminated == _is_eliminated;
 }
 
-OptoReg::Name BoxLockNode::stack_slot(Node* box_node) {
-  // Chase down the BoxNode
-  while (!box_node->is_BoxLock()) {
+BoxLockNode* BoxLockNode::box_node(Node* box) {
+  // Chase down the BoxNode after RA which may spill box nodes.
+  while (!box->is_BoxLock()) {
     //    if (box_node->is_SpillCopy()) {
     //      Node *m = box_node->in(1);
     //      if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_StoreP) {
@@ -68,10 +72,64 @@
     //        continue;
     //      }
     //    }
-    assert(box_node->is_SpillCopy() || box_node->is_Phi(), "Bad spill of Lock.");
-    box_node = box_node->in(1);
+    assert(box->is_SpillCopy() || box->is_Phi(), "Bad spill of Lock.");
+    // Only BoxLock nodes with the same stack slot are merged.
+    // So it is enough to trace one path to find the slot value.
+    box = box->in(1);
   }
-  return box_node->in_RegMask(0).find_first_elem();
+  return box->as_BoxLock();
+}
+
+OptoReg::Name BoxLockNode::reg(Node* box) {
+  return box_node(box)->in_RegMask(0).find_first_elem();
+}
+
+// Is BoxLock node used for one simple lock region (same box and obj)?
+bool BoxLockNode::is_simple_lock_region(LockNode** unique_lock, Node* obj) {
+  LockNode* lock = NULL;
+  bool has_one_lock = false;
+  for (uint i = 0; i < this->outcnt(); i++) {
+    Node* n = this->raw_out(i);
+    assert(!n->is_Phi(), "should not merge BoxLock nodes");
+    if (n->is_AbstractLock()) {
+      AbstractLockNode* alock = n->as_AbstractLock();
+      // Check lock's box since box could be referenced by Lock's debug info.
+      if (alock->box_node() == this) {
+        if (alock->obj_node()->eqv_uncast(obj)) {
+          if ((unique_lock != NULL) && alock->is_Lock()) {
+            if (lock == NULL) {
+              lock = alock->as_Lock();
+              has_one_lock = true;
+            } else if (lock != alock->as_Lock()) {
+              has_one_lock = false;
+            }
+          }
+        } else {
+          return false; // Different objects
+        }
+      }
+    }
+  }
+#ifdef ASSERT
+  // Verify that FastLock and Safepoint reference only this lock region.
+  for (uint i = 0; i < this->outcnt(); i++) {
+    Node* n = this->raw_out(i);
+    if (n->is_FastLock()) {
+      FastLockNode* flock = n->as_FastLock();
+      assert((flock->box_node() == this) && flock->obj_node()->eqv_uncast(obj),"");
+    }
+    // Don't check monitor info in safepoints since the referenced object could
+    // be different from the locked object. It could be Phi node of different
+    // cast nodes which point to this locked object.
+    // We assume that no other objects could be referenced in monitor info
+    // associated with this BoxLock node because all associated locks and
+    // unlocks are reference only this one object.
+  }
+#endif
+  if (unique_lock != NULL && has_one_lock) {
+    *unique_lock = lock;
+  }
+  return true;
 }
 
 //=============================================================================
--- a/hotspot/src/share/vm/opto/locknode.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/locknode.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -49,11 +49,11 @@
 
 //------------------------------BoxLockNode------------------------------------
 class BoxLockNode : public Node {
+  const int     _slot; // stack slot
+  RegMask     _inmask; // OptoReg corresponding to stack slot
+  bool _is_eliminated; // Associated locks were safely eliminated
+
 public:
-  const int _slot;
-  RegMask   _inmask;
-  bool _is_eliminated;    // indicates this lock was safely eliminated
-
   BoxLockNode( int lock );
   virtual int Opcode() const;
   virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
@@ -66,11 +66,19 @@
   virtual const class Type *bottom_type() const { return TypeRawPtr::BOTTOM; }
   virtual uint ideal_reg() const { return Op_RegP; }
 
-  static OptoReg::Name stack_slot(Node* box_node);
+  static OptoReg::Name reg(Node* box_node);
+  static BoxLockNode* box_node(Node* box_node);
+  static bool same_slot(Node* box1, Node* box2) {
+    return box1->as_BoxLock()->_slot == box2->as_BoxLock()->_slot;
+  }
+  int stack_slot() const { return _slot; }
 
-  bool is_eliminated()  { return _is_eliminated; }
+  bool is_eliminated() const { return _is_eliminated; }
   // mark lock as eliminated.
-  void set_eliminated() { _is_eliminated = true; }
+  void set_eliminated()      { _is_eliminated = true; }
+
+  // Is BoxLock node used for one simple lock region?
+  bool is_simple_lock_region(LockNode** unique_lock, Node* obj);
 
 #ifndef PRODUCT
   virtual void format( PhaseRegAlloc *, outputStream *st ) const;
@@ -91,6 +99,7 @@
   }
   Node* obj_node() const { return in(1); }
   Node* box_node() const { return in(2); }
+  void  set_box_node(Node* box) { set_req(2, box); }
 
   // FastLock and FastUnlockNode do not hash, we need one for each correspoding
   // LockNode/UnLockNode to avoid creating Phi's.
--- a/hotspot/src/share/vm/opto/loopnode.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/loopnode.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -3278,16 +3278,7 @@
 #ifdef ASSERT
     if (legal->is_Start() && !early->is_Root()) {
       // Bad graph. Print idom path and fail.
-      tty->print_cr( "Bad graph detected in build_loop_late");
-      tty->print("n: ");n->dump(); tty->cr();
-      tty->print("early: ");early->dump(); tty->cr();
-      int ct = 0;
-      Node *dbg_legal = LCA;
-      while(!dbg_legal->is_Start() && ct < 100) {
-        tty->print("idom[%d] ",ct); dbg_legal->dump(); tty->cr();
-        ct++;
-        dbg_legal = idom(dbg_legal);
-      }
+      dump_bad_graph(n, early, LCA);
       assert(false, "Bad graph detected in build_loop_late");
     }
 #endif
@@ -3337,6 +3328,88 @@
     chosen_loop->_body.push(n);// Collect inner loops
 }
 
+#ifdef ASSERT
+void PhaseIdealLoop::dump_bad_graph(Node* n, Node* early, Node* LCA) {
+  tty->print_cr( "Bad graph detected in build_loop_late");
+  tty->print("n: "); n->dump();
+  tty->print("early(n): "); early->dump();
+  if (n->in(0) != NULL  && !n->in(0)->is_top() &&
+      n->in(0) != early && !n->in(0)->is_Root()) {
+    tty->print("n->in(0): "); n->in(0)->dump();
+  }
+  for (uint i = 1; i < n->req(); i++) {
+    Node* in1 = n->in(i);
+    if (in1 != NULL && in1 != n && !in1->is_top()) {
+      tty->print("n->in(%d): ", i); in1->dump();
+      Node* in1_early = get_ctrl(in1);
+      tty->print("early(n->in(%d)): ", i); in1_early->dump();
+      if (in1->in(0) != NULL     && !in1->in(0)->is_top() &&
+          in1->in(0) != in1_early && !in1->in(0)->is_Root()) {
+        tty->print("n->in(%d)->in(0): ", i); in1->in(0)->dump();
+      }
+      for (uint j = 1; j < in1->req(); j++) {
+        Node* in2 = in1->in(j);
+        if (in2 != NULL && in2 != n && in2 != in1 && !in2->is_top()) {
+          tty->print("n->in(%d)->in(%d): ", i, j); in2->dump();
+          Node* in2_early = get_ctrl(in2);
+          tty->print("early(n->in(%d)->in(%d)): ", i, j); in2_early->dump();
+          if (in2->in(0) != NULL     && !in2->in(0)->is_top() &&
+              in2->in(0) != in2_early && !in2->in(0)->is_Root()) {
+            tty->print("n->in(%d)->in(%d)->in(0): ", i, j); in2->in(0)->dump();
+          }
+        }
+      }
+    }
+  }
+  tty->cr();
+  tty->print("LCA(n): "); LCA->dump();
+  for (uint i = 0; i < n->outcnt(); i++) {
+    Node* u1 = n->raw_out(i);
+    if (u1 == n)
+      continue;
+    tty->print("n->out(%d): ", i); u1->dump();
+    if (u1->is_CFG()) {
+      for (uint j = 0; j < u1->outcnt(); j++) {
+        Node* u2 = u1->raw_out(j);
+        if (u2 != u1 && u2 != n && u2->is_CFG()) {
+          tty->print("n->out(%d)->out(%d): ", i, j); u2->dump();
+        }
+      }
+    } else {
+      Node* u1_later = get_ctrl(u1);
+      tty->print("later(n->out(%d)): ", i); u1_later->dump();
+      if (u1->in(0) != NULL     && !u1->in(0)->is_top() &&
+          u1->in(0) != u1_later && !u1->in(0)->is_Root()) {
+        tty->print("n->out(%d)->in(0): ", i); u1->in(0)->dump();
+      }
+      for (uint j = 0; j < u1->outcnt(); j++) {
+        Node* u2 = u1->raw_out(j);
+        if (u2 == n || u2 == u1)
+          continue;
+        tty->print("n->out(%d)->out(%d): ", i, j); u2->dump();
+        if (!u2->is_CFG()) {
+          Node* u2_later = get_ctrl(u2);
+          tty->print("later(n->out(%d)->out(%d)): ", i, j); u2_later->dump();
+          if (u2->in(0) != NULL     && !u2->in(0)->is_top() &&
+              u2->in(0) != u2_later && !u2->in(0)->is_Root()) {
+            tty->print("n->out(%d)->in(0): ", i); u2->in(0)->dump();
+          }
+        }
+      }
+    }
+  }
+  tty->cr();
+  int ct = 0;
+  Node *dbg_legal = LCA;
+  while(!dbg_legal->is_Start() && ct < 100) {
+    tty->print("idom[%d] ",ct); dbg_legal->dump();
+    ct++;
+    dbg_legal = idom(dbg_legal);
+  }
+  tty->cr();
+}
+#endif
+
 #ifndef PRODUCT
 //------------------------------dump-------------------------------------------
 void PhaseIdealLoop::dump( ) const {
--- a/hotspot/src/share/vm/opto/loopnode.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/loopnode.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1040,6 +1040,10 @@
   bool created_loop_node()     { return _created_loop_node; }
   void register_new_node( Node *n, Node *blk );
 
+#ifdef ASSERT
+void dump_bad_graph(Node* n, Node* early, Node* LCA);
+#endif
+
 #ifndef PRODUCT
   void dump( ) const;
   void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const;
--- a/hotspot/src/share/vm/opto/loopopts.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/loopopts.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -819,6 +819,8 @@
     if( iff->is_If() ) {        // Classic split-if?
       if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
     } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
+      // Can't split CMove with different control edge.
+      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
       if( get_ctrl(iff->in(2)) == n_ctrl ||
           get_ctrl(iff->in(3)) == n_ctrl )
         return;                 // Inputs not yet split-up
@@ -937,7 +939,7 @@
       }
       bool did_break = (i < imax);  // Did we break out of the previous loop?
       if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
-        Node *late_load_ctrl;
+        Node *late_load_ctrl = NULL;
         if (n->is_Load()) {
           // If n is a load, get and save the result from get_late_ctrl(),
           // to be later used in calculating the control for n's clones.
--- a/hotspot/src/share/vm/opto/macro.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/macro.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1088,6 +1088,12 @@
   Node* klass_node        = alloc->in(AllocateNode::KlassNode);
   Node* initial_slow_test = alloc->in(AllocateNode::InitialTest);
 
+  Node* storestore = alloc->storestore();
+  if (storestore != NULL) {
+    // Break this link that is no longer useful and confuses register allocation
+    storestore->set_req(MemBarNode::Precedent, top());
+  }
+
   assert(ctrl != NULL, "must have control");
   // We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
   // they will not be used if "always_slow" is set
@@ -1289,10 +1295,66 @@
                                    0, new_alloc_bytes, T_LONG);
     }
 
+    InitializeNode* init = alloc->initialization();
     fast_oop_rawmem = initialize_object(alloc,
                                         fast_oop_ctrl, fast_oop_rawmem, fast_oop,
                                         klass_node, length, size_in_bytes);
 
+    // If initialization is performed by an array copy, any required
+    // MemBarStoreStore was already added. If the object does not
+    // escape no need for a MemBarStoreStore. Otherwise we need a
+    // MemBarStoreStore so that stores that initialize this object
+    // can't be reordered with a subsequent store that makes this
+    // object accessible by other threads.
+    if (init == NULL || (!init->is_complete_with_arraycopy() && !init->does_not_escape())) {
+      if (init == NULL || init->req() < InitializeNode::RawStores) {
+        // No InitializeNode or no stores captured by zeroing
+        // elimination. Simply add the MemBarStoreStore after object
+        // initialization.
+        MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot, fast_oop_rawmem);
+        transform_later(mb);
+
+        mb->init_req(TypeFunc::Memory, fast_oop_rawmem);
+        mb->init_req(TypeFunc::Control, fast_oop_ctrl);
+        fast_oop_ctrl = new (C, 1) ProjNode(mb,TypeFunc::Control);
+        transform_later(fast_oop_ctrl);
+        fast_oop_rawmem = new (C, 1) ProjNode(mb,TypeFunc::Memory);
+        transform_later(fast_oop_rawmem);
+      } else {
+        // Add the MemBarStoreStore after the InitializeNode so that
+        // all stores performing the initialization that were moved
+        // before the InitializeNode happen before the storestore
+        // barrier.
+
+        Node* init_ctrl = init->proj_out(TypeFunc::Control);
+        Node* init_mem = init->proj_out(TypeFunc::Memory);
+
+        MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot);
+        transform_later(mb);
+
+        Node* ctrl = new (C, 1) ProjNode(init,TypeFunc::Control);
+        transform_later(ctrl);
+        Node* mem = new (C, 1) ProjNode(init,TypeFunc::Memory);
+        transform_later(mem);
+
+        // The MemBarStoreStore depends on control and memory coming
+        // from the InitializeNode
+        mb->init_req(TypeFunc::Memory, mem);
+        mb->init_req(TypeFunc::Control, ctrl);
+
+        ctrl = new (C, 1) ProjNode(mb,TypeFunc::Control);
+        transform_later(ctrl);
+        mem = new (C, 1) ProjNode(mb,TypeFunc::Memory);
+        transform_later(mem);
+
+        // All nodes that depended on the InitializeNode for control
+        // and memory must now depend on the MemBarNode that itself
+        // depends on the InitializeNode
+        _igvn.replace_node(init_ctrl, ctrl);
+        _igvn.replace_node(init_mem, mem);
+      }
+    }
+
     if (C->env()->dtrace_extended_probes()) {
       // Slow-path call
       int size = TypeFunc::Parms + 2;
@@ -1326,6 +1388,7 @@
     result_phi_rawmem->init_req(fast_result_path, fast_oop_rawmem);
   } else {
     slow_region = ctrl;
+    result_phi_i_o = i_o; // Rename it to use in the following code.
   }
 
   // Generate slow-path call
@@ -1350,6 +1413,10 @@
   copy_call_debug_info((CallNode *) alloc,  call);
   if (!always_slow) {
     call->set_cnt(PROB_UNLIKELY_MAG(4));  // Same effect as RC_UNCOMMON.
+  } else {
+    // Hook i_o projection to avoid its elimination during allocation
+    // replacement (when only a slow call is generated).
+    call->set_req(TypeFunc::I_O, result_phi_i_o);
   }
   _igvn.replace_node(alloc, call);
   transform_later(call);
@@ -1366,8 +1433,10 @@
   //
   extract_call_projections(call);
 
-  // An allocate node has separate memory projections for the uses on the control and i_o paths
-  // Replace uses of the control memory projection with result_phi_rawmem (unless we are only generating a slow call)
+  // An allocate node has separate memory projections for the uses on
+  // the control and i_o paths. Replace the control memory projection with
+  // result_phi_rawmem (unless we are only generating a slow call when
+  // both memory projections are combined)
   if (!always_slow && _memproj_fallthrough != NULL) {
     for (DUIterator_Fast imax, i = _memproj_fallthrough->fast_outs(imax); i < imax; i++) {
       Node *use = _memproj_fallthrough->fast_out(i);
@@ -1378,8 +1447,8 @@
       --i;
     }
   }
-  // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete _memproj_catchall so
-  // we end up with a call that has only 1 memory projection
+  // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete
+  // _memproj_catchall so we end up with a call that has only 1 memory projection.
   if (_memproj_catchall != NULL ) {
     if (_memproj_fallthrough == NULL) {
       _memproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::Memory);
@@ -1393,17 +1462,18 @@
       // back up iterator
       --i;
     }
+    assert(_memproj_catchall->outcnt() == 0, "all uses must be deleted");
+    _igvn.remove_dead_node(_memproj_catchall);
   }
 
-  // An allocate node has separate i_o projections for the uses on the control and i_o paths
-  // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call)
-  if (_ioproj_fallthrough == NULL) {
-    _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O);
-    transform_later(_ioproj_fallthrough);
-  } else if (!always_slow) {
+  // An allocate node has separate i_o projections for the uses on the control
+  // and i_o paths. Always replace the control i_o projection with result i_o
+  // otherwise incoming i_o become dead when only a slow call is generated
+  // (it is different from memory projections where both projections are
+  // combined in such case).
+  if (_ioproj_fallthrough != NULL) {
     for (DUIterator_Fast imax, i = _ioproj_fallthrough->fast_outs(imax); i < imax; i++) {
       Node *use = _ioproj_fallthrough->fast_out(i);
-
       _igvn.hash_delete(use);
       imax -= replace_input(use, _ioproj_fallthrough, result_phi_i_o);
       _igvn._worklist.push(use);
@@ -1411,9 +1481,13 @@
       --i;
     }
   }
-  // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete _ioproj_catchall so
-  // we end up with a call that has only 1 control projection
+  // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete
+  // _ioproj_catchall so we end up with a call that has only 1 i_o projection.
   if (_ioproj_catchall != NULL ) {
+    if (_ioproj_fallthrough == NULL) {
+      _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O);
+      transform_later(_ioproj_fallthrough);
+    }
     for (DUIterator_Fast imax, i = _ioproj_catchall->fast_outs(imax); i < imax; i++) {
       Node *use = _ioproj_catchall->fast_out(i);
       _igvn.hash_delete(use);
@@ -1422,11 +1496,25 @@
       // back up iterator
       --i;
     }
+    assert(_ioproj_catchall->outcnt() == 0, "all uses must be deleted");
+    _igvn.remove_dead_node(_ioproj_catchall);
   }
 
   // if we generated only a slow call, we are done
-  if (always_slow)
+  if (always_slow) {
+    // Now we can unhook i_o.
+    if (result_phi_i_o->outcnt() > 1) {
+      call->set_req(TypeFunc::I_O, top());
+    } else {
+      assert(result_phi_i_o->unique_ctrl_out() == call, "");
+      // Case of new array with negative size known during compilation.
+      // AllocateArrayNode::Ideal() optimization disconnect unreachable
+      // following code since call to runtime will throw exception.
+      // As result there will be no users of i_o after the call.
+      // Leave i_o attached to this call to avoid problems in preceding graph.
+    }
     return;
+  }
 
 
   if (_fallthroughcatchproj != NULL) {
@@ -1470,7 +1558,7 @@
   Node* mark_node = NULL;
   // For now only enable fast locking for non-array types
   if (UseBiasedLocking && (length == NULL)) {
-    mark_node = make_load(control, rawmem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeRawPtr::BOTTOM, T_ADDRESS);
+    mark_node = make_load(control, rawmem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeRawPtr::BOTTOM, T_ADDRESS);
   } else {
     mark_node = makecon(TypeRawPtr::make((address)markOopDesc::prototype()));
   }
@@ -1701,7 +1789,8 @@
                          slow_call_address);
 }
 
-//-----------------------mark_eliminated_locking_nodes-----------------------
+//-------------------mark_eliminated_box----------------------------------
+//
 // During EA obj may point to several objects but after few ideal graph
 // transformations (CCP) it may point to only one non escaping object
 // (but still using phi), corresponding locks and unlocks will be marked
@@ -1712,62 +1801,148 @@
 // marked for elimination since new obj has no escape information.
 // Mark all associated (same box and obj) lock and unlock nodes for
 // elimination if some of them marked already.
-void PhaseMacroExpand::mark_eliminated_locking_nodes(AbstractLockNode *alock) {
-  if (!alock->is_eliminated()) {
+void PhaseMacroExpand::mark_eliminated_box(Node* oldbox, Node* obj) {
+  if (oldbox->as_BoxLock()->is_eliminated())
+    return; // This BoxLock node was processed already.
+
+  // New implementation (EliminateNestedLocks) has separate BoxLock
+  // node for each locked region so mark all associated locks/unlocks as
+  // eliminated even if different objects are referenced in one locked region
+  // (for example, OSR compilation of nested loop inside locked scope).
+  if (EliminateNestedLocks ||
+      oldbox->as_BoxLock()->is_simple_lock_region(NULL, obj)) {
+    // Box is used only in one lock region. Mark this box as eliminated.
+    _igvn.hash_delete(oldbox);
+    oldbox->as_BoxLock()->set_eliminated(); // This changes box's hash value
+    _igvn.hash_insert(oldbox);
+
+    for (uint i = 0; i < oldbox->outcnt(); i++) {
+      Node* u = oldbox->raw_out(i);
+      if (u->is_AbstractLock() && !u->as_AbstractLock()->is_non_esc_obj()) {
+        AbstractLockNode* alock = u->as_AbstractLock();
+        // Check lock's box since box could be referenced by Lock's debug info.
+        if (alock->box_node() == oldbox) {
+          // Mark eliminated all related locks and unlocks.
+          alock->set_non_esc_obj();
+        }
+      }
+    }
     return;
   }
-  if (!alock->is_coarsened()) { // Eliminated by EA
-      // Create new "eliminated" BoxLock node and use it
-      // in monitor debug info for the same object.
-      BoxLockNode* oldbox = alock->box_node()->as_BoxLock();
-      Node* obj = alock->obj_node();
-      if (!oldbox->is_eliminated()) {
-        BoxLockNode* newbox = oldbox->clone()->as_BoxLock();
+
+  // Create new "eliminated" BoxLock node and use it in monitor debug info
+  // instead of oldbox for the same object.
+  BoxLockNode* newbox = oldbox->clone()->as_BoxLock();
+
+  // Note: BoxLock node is marked eliminated only here and it is used
+  // to indicate that all associated lock and unlock nodes are marked
+  // for elimination.
+  newbox->set_eliminated();
+  transform_later(newbox);
+
+  // Replace old box node with new box for all users of the same object.
+  for (uint i = 0; i < oldbox->outcnt();) {
+    bool next_edge = true;
+
+    Node* u = oldbox->raw_out(i);
+    if (u->is_AbstractLock()) {
+      AbstractLockNode* alock = u->as_AbstractLock();
+      if (alock->box_node() == oldbox && alock->obj_node()->eqv_uncast(obj)) {
+        // Replace Box and mark eliminated all related locks and unlocks.
+        alock->set_non_esc_obj();
+        _igvn.hash_delete(alock);
+        alock->set_box_node(newbox);
+        _igvn._worklist.push(alock);
+        next_edge = false;
+      }
+    }
+    if (u->is_FastLock() && u->as_FastLock()->obj_node()->eqv_uncast(obj)) {
+      FastLockNode* flock = u->as_FastLock();
+      assert(flock->box_node() == oldbox, "sanity");
+      _igvn.hash_delete(flock);
+      flock->set_box_node(newbox);
+      _igvn._worklist.push(flock);
+      next_edge = false;
+    }
+
+    // Replace old box in monitor debug info.
+    if (u->is_SafePoint() && u->as_SafePoint()->jvms()) {
+      SafePointNode* sfn = u->as_SafePoint();
+      JVMState* youngest_jvms = sfn->jvms();
+      int max_depth = youngest_jvms->depth();
+      for (int depth = 1; depth <= max_depth; depth++) {
+        JVMState* jvms = youngest_jvms->of_depth(depth);
+        int num_mon  = jvms->nof_monitors();
+        // Loop over monitors
+        for (int idx = 0; idx < num_mon; idx++) {
+          Node* obj_node = sfn->monitor_obj(jvms, idx);
+          Node* box_node = sfn->monitor_box(jvms, idx);
+          if (box_node == oldbox && obj_node->eqv_uncast(obj)) {
+            int j = jvms->monitor_box_offset(idx);
+            _igvn.hash_delete(u);
+            u->set_req(j, newbox);
+            _igvn._worklist.push(u);
+            next_edge = false;
+          }
+        }
+      }
+    }
+    if (next_edge) i++;
+  }
+}
+
+//-----------------------mark_eliminated_locking_nodes-----------------------
+void PhaseMacroExpand::mark_eliminated_locking_nodes(AbstractLockNode *alock) {
+  if (EliminateNestedLocks) {
+    if (alock->is_nested()) {
+       assert(alock->box_node()->as_BoxLock()->is_eliminated(), "sanity");
+       return;
+    } else if (!alock->is_non_esc_obj()) { // Not eliminated or coarsened
+      // Only Lock node has JVMState needed here.
+      if (alock->jvms() != NULL && alock->as_Lock()->is_nested_lock_region()) {
+        // Mark eliminated related nested locks and unlocks.
+        Node* obj = alock->obj_node();
+        BoxLockNode* box_node = alock->box_node()->as_BoxLock();
+        assert(!box_node->is_eliminated(), "should not be marked yet");
         // Note: BoxLock node is marked eliminated only here
         // and it is used to indicate that all associated lock
         // and unlock nodes are marked for elimination.
-        newbox->set_eliminated();
-        transform_later(newbox);
-        // Replace old box node with new box for all users
-        // of the same object.
-        for (uint i = 0; i < oldbox->outcnt();) {
-
-          bool next_edge = true;
-          Node* u = oldbox->raw_out(i);
-          if (u->is_AbstractLock() &&
-              u->as_AbstractLock()->obj_node() == obj &&
-              u->as_AbstractLock()->box_node() == oldbox) {
-            // Mark all associated locks and unlocks.
-            u->as_AbstractLock()->set_eliminated();
-            _igvn.hash_delete(u);
-            u->set_req(TypeFunc::Parms + 1, newbox);
-            next_edge = false;
+        box_node->set_eliminated(); // Box's hash is always NO_HASH here
+        for (uint i = 0; i < box_node->outcnt(); i++) {
+          Node* u = box_node->raw_out(i);
+          if (u->is_AbstractLock()) {
+            alock = u->as_AbstractLock();
+            if (alock->box_node() == box_node) {
+              // Verify that this Box is referenced only by related locks.
+              assert(alock->obj_node()->eqv_uncast(obj), "");
+              // Mark all related locks and unlocks.
+              alock->set_nested();
+            }
           }
-          // Replace old box in monitor debug info.
-          if (u->is_SafePoint() && u->as_SafePoint()->jvms()) {
-            SafePointNode* sfn = u->as_SafePoint();
-            JVMState* youngest_jvms = sfn->jvms();
-            int max_depth = youngest_jvms->depth();
-            for (int depth = 1; depth <= max_depth; depth++) {
-              JVMState* jvms = youngest_jvms->of_depth(depth);
-              int num_mon  = jvms->nof_monitors();
-              // Loop over monitors
-              for (int idx = 0; idx < num_mon; idx++) {
-                Node* obj_node = sfn->monitor_obj(jvms, idx);
-                Node* box_node = sfn->monitor_box(jvms, idx);
-                if (box_node == oldbox && obj_node == obj) {
-                  int j = jvms->monitor_box_offset(idx);
-                  _igvn.hash_delete(u);
-                  u->set_req(j, newbox);
-                  next_edge = false;
-                }
-              } // for (int idx = 0;
-            } // for (int depth = 1;
-          } // if (u->is_SafePoint()
-          if (next_edge) i++;
-        } // for (uint i = 0; i < oldbox->outcnt();)
-      } // if (!oldbox->is_eliminated())
-  } // if (!alock->is_coarsened())
+        }
+      }
+      return;
+    }
+    // Process locks for non escaping object
+    assert(alock->is_non_esc_obj(), "");
+  } // EliminateNestedLocks
+
+  if (alock->is_non_esc_obj()) { // Lock is used for non escaping object
+    // Look for all locks of this object and mark them and
+    // corresponding BoxLock nodes as eliminated.
+    Node* obj = alock->obj_node();
+    for (uint j = 0; j < obj->outcnt(); j++) {
+      Node* o = obj->raw_out(j);
+      if (o->is_AbstractLock() &&
+          o->as_AbstractLock()->obj_node()->eqv_uncast(obj)) {
+        alock = o->as_AbstractLock();
+        Node* box = alock->box_node();
+        // Replace old box node with new eliminated box for all users
+        // of the same object and mark related locks as eliminated.
+        mark_eliminated_box(box, obj);
+      }
+    }
+  }
 }
 
 // we have determined that this lock/unlock can be eliminated, we simply
@@ -1782,7 +1957,7 @@
     return false;
   }
 #ifdef ASSERT
-  if (alock->is_Lock() && !alock->is_coarsened()) {
+  if (!alock->is_coarsened()) {
     // Check that new "eliminated" BoxLock node is created.
     BoxLockNode* oldbox = alock->box_node()->as_BoxLock();
     assert(oldbox->is_eliminated(), "should be done already");
@@ -1874,6 +2049,8 @@
   Node* box = lock->box_node();
   Node* flock = lock->fastlock_node();
 
+  assert(!box->as_BoxLock()->is_eliminated(), "sanity");
+
   // Make the merge point
   Node *region;
   Node *mem_phi;
@@ -1958,7 +2135,7 @@
 #endif
       klass_node->init_req(0, ctrl);
     }
-    Node *proto_node = make_load(ctrl, mem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeX_X, TypeX_X->basic_type());
+    Node *proto_node = make_load(ctrl, mem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeX_X, TypeX_X->basic_type());
 
     Node* thread = transform_later(new (C, 1) ThreadLocalNode());
     Node* cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread));
@@ -2108,6 +2285,8 @@
   Node* obj = unlock->obj_node();
   Node* box = unlock->box_node();
 
+  assert(!box->as_BoxLock()->is_eliminated(), "sanity");
+
   // No need for a null check on unlock
 
   // Make the merge point
--- a/hotspot/src/share/vm/opto/macro.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/macro.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -92,6 +92,7 @@
   void process_users_of_allocation(AllocateNode *alloc);
 
   void eliminate_card_mark(Node *cm);
+  void mark_eliminated_box(Node* box, Node* obj);
   void mark_eliminated_locking_nodes(AbstractLockNode *alock);
   bool eliminate_locking_node(AbstractLockNode *alock);
   void expand_lock_node(LockNode *lock);
--- a/hotspot/src/share/vm/opto/matcher.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/matcher.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1365,31 +1365,36 @@
 
   const Type *t = m->bottom_type();
 
-  if( t->singleton() ) {
+  if (t->singleton()) {
     // Never force constants into registers.  Allow them to match as
     // constants or registers.  Copies of the same value will share
     // the same register.  See find_shared_node.
     return false;
   } else {                      // Not a constant
     // Stop recursion if they have different Controls.
-    // Slot 0 of constants is not really a Control.
-    if( control && m->in(0) && control != m->in(0) ) {
+    Node* m_control = m->in(0);
+    // Control of load's memory can post-dominates load's control.
+    // So use it since load can't float above its memory.
+    Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : NULL;
+    if (control && m_control && control != m_control && control != mem_control) {
 
       // Actually, we can live with the most conservative control we
       // find, if it post-dominates the others.  This allows us to
       // pick up load/op/store trees where the load can float a little
       // above the store.
       Node *x = control;
-      const uint max_scan = 6;   // Arbitrary scan cutoff
+      const uint max_scan = 6;  // Arbitrary scan cutoff
       uint j;
-      for( j=0; j<max_scan; j++ ) {
-        if( x->is_Region() )    // Bail out at merge points
+      for (j=0; j<max_scan; j++) {
+        if (x->is_Region())     // Bail out at merge points
           return true;
         x = x->in(0);
-        if( x == m->in(0) )     // Does 'control' post-dominate
+        if (x == m_control)     // Does 'control' post-dominate
           break;                // m->in(0)?  If so, we can use it
+        if (x == mem_control)   // Does 'control' post-dominate
+          break;                // mem_control?  If so, we can use it
       }
-      if( j == max_scan )       // No post-domination before scan end?
+      if (j == max_scan)        // No post-domination before scan end?
         return true;            // Then break the match tree up
     }
     if (m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) {
--- a/hotspot/src/share/vm/opto/memnode.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/memnode.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1473,19 +1473,19 @@
 const Type*
 LoadNode::load_array_final_field(const TypeKlassPtr *tkls,
                                  ciKlass* klass) const {
-  if (tkls->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
+  if (tkls->offset() == in_bytes(Klass::modifier_flags_offset())) {
     // The field is Klass::_modifier_flags.  Return its (constant) value.
     // (Folds up the 2nd indirection in aClassConstant.getModifiers().)
     assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags");
     return TypeInt::make(klass->modifier_flags());
   }
-  if (tkls->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
+  if (tkls->offset() == in_bytes(Klass::access_flags_offset())) {
     // The field is Klass::_access_flags.  Return its (constant) value.
     // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).)
     assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags");
     return TypeInt::make(klass->access_flags());
   }
-  if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)) {
+  if (tkls->offset() == in_bytes(Klass::layout_helper_offset())) {
     // The field is Klass::_layout_helper.  Return its constant value if known.
     assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper");
     return TypeInt::make(klass->layout_helper());
@@ -1636,14 +1636,14 @@
       // We are loading a field from a Klass metaobject whose identity
       // is known at compile time (the type is "exact" or "precise").
       // Check for fields we know are maintained as constants by the VM.
-      if (tkls->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) {
+      if (tkls->offset() == in_bytes(Klass::super_check_offset_offset())) {
         // The field is Klass::_super_check_offset.  Return its (constant) value.
         // (Folds up type checking code.)
         assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset");
         return TypeInt::make(klass->super_check_offset());
       }
       // Compute index into primary_supers array
-      juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
+      juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(klassOop);
       // Check for overflowing; use unsigned compare to handle the negative case.
       if( depth < ciKlass::primary_super_limit() ) {
         // The field is an element of Klass::_primary_supers.  Return its (constant) value.
@@ -1654,14 +1654,14 @@
       }
       const Type* aift = load_array_final_field(tkls, klass);
       if (aift != NULL)  return aift;
-      if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) + (int)sizeof(oopDesc)
+      if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset())
           && klass->is_array_klass()) {
         // The field is arrayKlass::_component_mirror.  Return its (constant) value.
         // (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.)
         assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror");
         return TypeInstPtr::make(klass->as_array_klass()->component_mirror());
       }
-      if (tkls->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) {
+      if (tkls->offset() == in_bytes(Klass::java_mirror_offset())) {
         // The field is Klass::_java_mirror.  Return its (constant) value.
         // (Folds up the 2nd indirection in anObjConstant.getClass().)
         assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror");
@@ -1679,7 +1679,7 @@
       if( inner->is_instance_klass() &&
           !inner->as_instance_klass()->flags().is_interface() ) {
         // Compute index into primary_supers array
-        juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
+        juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(klassOop);
         // Check for overflowing; use unsigned compare to handle the negative case.
         if( depth < ciKlass::primary_super_limit() &&
             depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case
@@ -1695,7 +1695,7 @@
     // If the type is enough to determine that the thing is not an array,
     // we can give the layout_helper a positive interval type.
     // This will help short-circuit some reflective code.
-    if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)
+    if (tkls->offset() == in_bytes(Klass::layout_helper_offset())
         && !klass->is_array_klass() // not directly typed as an array
         && !klass->is_interface()  // specifically not Serializable & Cloneable
         && !klass->is_java_lang_Object()   // not the supertype of all T[]
@@ -1938,7 +1938,7 @@
     if( !klass->is_loaded() )
       return _type;             // Bail out if not loaded
     if( klass->is_obj_array_klass() &&
-        (uint)tkls->offset() == objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)) {
+        tkls->offset() == in_bytes(objArrayKlass::element_klass_offset())) {
       ciKlass* elem = klass->as_obj_array_klass()->element_klass();
       // // Always returning precise element type is incorrect,
       // // e.g., element type could be object and array may contain strings
@@ -1949,7 +1949,7 @@
       return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/);
     }
     if( klass->is_instance_klass() && tkls->klass_is_exact() &&
-        (uint)tkls->offset() == Klass::super_offset_in_bytes() + sizeof(oopDesc)) {
+        tkls->offset() == in_bytes(Klass::super_offset())) {
       ciKlass* sup = klass->as_instance_klass()->super();
       // The field is Klass::_super.  Return its (constant) value.
       // (Folds up the 2nd indirection in aClassConstant.getSuperClass().)
@@ -2013,11 +2013,11 @@
               tkls->klass()->is_array_klass())
           && adr2->is_AddP()
           ) {
-        int mirror_field = Klass::java_mirror_offset_in_bytes();
+        int mirror_field = in_bytes(Klass::java_mirror_offset());
         if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
           mirror_field = in_bytes(arrayKlass::component_mirror_offset());
         }
-        if (tkls->offset() == mirror_field + (int)sizeof(oopDesc)) {
+        if (tkls->offset() == mirror_field) {
           return adr2->in(AddPNode::Base);
         }
       }
@@ -2201,7 +2201,7 @@
   // unsafe if I have intervening uses...  Also disallowed for StoreCM
   // since they must follow each StoreP operation.  Redundant StoreCMs
   // are eliminated just before matching in final_graph_reshape.
-  if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address) &&
+  if (mem->is_Store() && mem->in(MemNode::Address)->eqv_uncast(address) &&
       mem->Opcode() != Op_StoreCM) {
     // Looking at a dead closed cycle of memory?
     assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
@@ -2274,16 +2274,16 @@
 
   // Load then Store?  Then the Store is useless
   if (val->is_Load() &&
-      phase->eqv_uncast( val->in(MemNode::Address), adr ) &&
-      phase->eqv_uncast( val->in(MemNode::Memory ), mem ) &&
+      val->in(MemNode::Address)->eqv_uncast(adr) &&
+      val->in(MemNode::Memory )->eqv_uncast(mem) &&
       val->as_Load()->store_Opcode() == Opcode()) {
     return mem;
   }
 
   // Two stores in a row of the same value?
   if (mem->is_Store() &&
-      phase->eqv_uncast( mem->in(MemNode::Address), adr ) &&
-      phase->eqv_uncast( mem->in(MemNode::ValueIn), val ) &&
+      mem->in(MemNode::Address)->eqv_uncast(adr) &&
+      mem->in(MemNode::ValueIn)->eqv_uncast(val) &&
       mem->Opcode() == Opcode()) {
     return mem;
   }
@@ -2721,6 +2721,7 @@
   case Op_MemBarVolatile:  return new(C, len) MemBarVolatileNode(C, atp, pn);
   case Op_MemBarCPUOrder:  return new(C, len) MemBarCPUOrderNode(C, atp, pn);
   case Op_Initialize:      return new(C, len) InitializeNode(C,     atp, pn);
+  case Op_MemBarStoreStore: return new(C, len) MemBarStoreStoreNode(C,  atp, pn);
   default:                 ShouldNotReachHere(); return NULL;
   }
 }
@@ -2870,7 +2871,7 @@
 
 //---------------------------InitializeNode------------------------------------
 InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop)
-  : _is_complete(Incomplete),
+  : _is_complete(Incomplete), _does_not_escape(false),
     MemBarNode(C, adr_type, rawoop)
 {
   init_class_id(Class_Initialize);
--- a/hotspot/src/share/vm/opto/memnode.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/memnode.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -918,6 +918,15 @@
   virtual int Opcode() const;
 };
 
+class MemBarStoreStoreNode: public MemBarNode {
+public:
+  MemBarStoreStoreNode(Compile* C, int alias_idx, Node* precedent)
+    : MemBarNode(C, alias_idx, precedent) {
+    init_class_id(Class_MemBarStoreStore);
+  }
+  virtual int Opcode() const;
+};
+
 // Ordering between a volatile store and a following volatile load.
 // Requires multi-CPU visibility?
 class MemBarVolatileNode: public MemBarNode {
@@ -950,6 +959,8 @@
   };
   int _is_complete;
 
+  bool _does_not_escape;
+
 public:
   enum {
     Control    = TypeFunc::Control,
@@ -989,6 +1000,9 @@
   void set_complete(PhaseGVN* phase);
   void set_complete_with_arraycopy() { _is_complete = Complete | WithArraycopy; }
 
+  bool does_not_escape() { return _does_not_escape; }
+  void set_does_not_escape() { _does_not_escape = true; }
+
 #ifdef ASSERT
   // ensure all non-degenerate stores are ordered and non-overlapping
   bool stores_are_sane(PhaseTransform* phase);
--- a/hotspot/src/share/vm/opto/node.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/node.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -833,8 +833,20 @@
 
 //---------------------------uncast_helper-------------------------------------
 Node* Node::uncast_helper(const Node* p) {
-  uint max_depth = 3;
-  for (uint i = 0; i < max_depth; i++) {
+#ifdef ASSERT
+  uint depth_count = 0;
+  const Node* orig_p = p;
+#endif
+
+  while (true) {
+#ifdef ASSERT
+    if (depth_count >= K) {
+      orig_p->dump(4);
+      if (p != orig_p)
+        p->dump(1);
+    }
+    assert(depth_count++ < K, "infinite loop in Node::uncast_helper");
+#endif
     if (p == NULL || p->req() != 2) {
       break;
     } else if (p->is_ConstraintCast()) {
--- a/hotspot/src/share/vm/opto/node.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/node.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -97,6 +97,7 @@
 class MachTempNode;
 class Matcher;
 class MemBarNode;
+class MemBarStoreStoreNode;
 class MemNode;
 class MergeMemNode;
 class MultiNode;
@@ -428,6 +429,10 @@
 
   // Strip away casting.  (It is depth-limited.)
   Node* uncast() const;
+  // Return whether two Nodes are equivalent, after stripping casting.
+  bool eqv_uncast(const Node* n) const {
+    return (this->uncast() == n->uncast());
+  }
 
 private:
   static Node* uncast_helper(const Node* n);
@@ -564,7 +569,8 @@
         DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2)
       DEFINE_CLASS_ID(Start,       Multi, 2)
       DEFINE_CLASS_ID(MemBar,      Multi, 3)
-        DEFINE_CLASS_ID(Initialize,    MemBar, 0)
+        DEFINE_CLASS_ID(Initialize,       MemBar, 0)
+        DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1)
 
     DEFINE_CLASS_ID(Mach,  Node, 1)
       DEFINE_CLASS_ID(MachReturn, Mach, 0)
@@ -744,6 +750,7 @@
   DEFINE_CLASS_QUERY(MachTemp)
   DEFINE_CLASS_QUERY(Mem)
   DEFINE_CLASS_QUERY(MemBar)
+  DEFINE_CLASS_QUERY(MemBarStoreStore)
   DEFINE_CLASS_QUERY(MergeMem)
   DEFINE_CLASS_QUERY(Multi)
   DEFINE_CLASS_QUERY(MultiBranch)
--- a/hotspot/src/share/vm/opto/output.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/output.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -924,10 +924,10 @@
         scval = new ConstantOopWriteValue(tp->is_oopptr()->const_oop()->constant_encoding());
       }
 
-      OptoReg::Name box_reg = BoxLockNode::stack_slot(box_node);
+      OptoReg::Name box_reg = BoxLockNode::reg(box_node);
       Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg));
-      while( !box_node->is_BoxLock() )  box_node = box_node->in(1);
-      monarray->append(new MonitorValue(scval, basic_lock, box_node->as_BoxLock()->is_eliminated()));
+      bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated());
+      monarray->append(new MonitorValue(scval, basic_lock, eliminated));
     }
 
     // We dump the object pool first, since deoptimization reads it in first.
--- a/hotspot/src/share/vm/opto/parse1.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/parse1.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1604,7 +1604,16 @@
           continue;
         default:                // All normal stuff
           if (phi == NULL) {
-            if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
+            const JVMState* jvms = map()->jvms();
+            if (EliminateNestedLocks &&
+                jvms->is_mon(j) && jvms->is_monitor_box(j)) {
+              // BoxLock nodes are not commoning.
+              // Use old BoxLock node as merged box.
+              assert(newin->jvms()->is_monitor_box(j), "sanity");
+              // This assert also tests that nodes are BoxLock.
+              assert(BoxLockNode::same_slot(n, m), "sanity");
+              C->gvn_replace_by(n, m);
+            } else if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
               phi = ensure_phi(j, nophi);
             }
           }
@@ -1911,7 +1920,7 @@
   Node* klass_addr = basic_plus_adr( receiver, receiver, oopDesc::klass_offset_in_bytes() );
   Node* klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), klass_addr, TypeInstPtr::KLASS) );
 
-  Node* access_flags_addr = basic_plus_adr(klass, klass, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+  Node* access_flags_addr = basic_plus_adr(klass, klass, in_bytes(Klass::access_flags_offset()));
   Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT);
 
   Node* mask  = _gvn.transform(new (C, 3) AndINode(access_flags, intcon(JVM_ACC_HAS_FINALIZER)));
--- a/hotspot/src/share/vm/opto/parseHelper.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/parseHelper.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -200,7 +200,7 @@
   // Come here for polymorphic array klasses
 
   // Extract the array element class
-  int element_klass_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
+  int element_klass_offset = in_bytes(objArrayKlass::element_klass_offset());
   Node *p2 = basic_plus_adr(array_klass, array_klass, element_klass_offset);
   Node *a_e_klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p2, tak) );
 
@@ -220,7 +220,7 @@
   _gvn.set_type(merge, Type::CONTROL);
   Node* kls = makecon(TypeKlassPtr::make(klass));
 
-  Node* init_thread_offset = _gvn.MakeConX(instanceKlass::init_thread_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes());
+  Node* init_thread_offset = _gvn.MakeConX(in_bytes(instanceKlass::init_thread_offset()));
   Node* adr_node = basic_plus_adr(kls, kls, init_thread_offset);
   Node* init_thread = make_load(NULL, adr_node, TypeRawPtr::BOTTOM, T_ADDRESS);
   Node *tst   = Bool( CmpP( init_thread, cur_thread), BoolTest::eq);
@@ -228,9 +228,11 @@
   set_control(IfTrue(iff));
   merge->set_req(1, IfFalse(iff));
 
-  Node* init_state_offset = _gvn.MakeConX(instanceKlass::init_state_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes());
+  Node* init_state_offset = _gvn.MakeConX(in_bytes(instanceKlass::init_state_offset()));
   adr_node = basic_plus_adr(kls, kls, init_state_offset);
-  Node* init_state = make_load(NULL, adr_node, TypeInt::INT, T_INT);
+  // Use T_BOOLEAN for instanceKlass::_init_state so the compiler
+  // can generate code to load it as unsigned byte.
+  Node* init_state = make_load(NULL, adr_node, TypeInt::UBYTE, T_BOOLEAN);
   Node* being_init = _gvn.intcon(instanceKlass::being_initialized);
   tst   = Bool( CmpI( init_state, being_init), BoolTest::eq);
   iff = create_and_map_if(control(), tst, PROB_ALWAYS, COUNT_UNKNOWN);
--- a/hotspot/src/share/vm/opto/phaseX.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/phaseX.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -256,11 +256,6 @@
   // For pessimistic optimizations this is simply pointer equivalence.
   bool eqv(const Node* n1, const Node* n2) const { return n1 == n2; }
 
-  // Return whether two Nodes are equivalent, after stripping casting.
-  bool eqv_uncast(const Node* n1, const Node* n2) const {
-    return eqv(n1->uncast(), n2->uncast());
-  }
-
   // For pessimistic passes, the return type must monotonically narrow.
   // For optimistic  passes, the return type must monotonically widen.
   // It is possible to get into a "death march" in either type of pass,
--- a/hotspot/src/share/vm/opto/postaloc.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/postaloc.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -89,32 +89,62 @@
   return blk_adjust;
 }
 
+#ifdef ASSERT
+static bool expected_yanked_node(Node *old, Node *orig_old) {
+  // This code is expected only next original nodes:
+  // - load from constant table node which may have next data input nodes:
+  //     MachConstantBase, Phi, MachTemp, MachSpillCopy
+  // - load constant node which may have next data input nodes:
+  //     MachTemp, MachSpillCopy
+  // - MachSpillCopy
+  // - MachProj and Copy dead nodes
+  if (old->is_MachSpillCopy()) {
+    return true;
+  } else if (old->is_Con()) {
+    return true;
+  } else if (old->is_MachProj()) { // Dead kills projection of Con node
+    return (old == orig_old);
+  } else if (old->is_Copy()) {     // Dead copy of a callee-save value
+    return (old == orig_old);
+  } else if (old->is_MachTemp()) {
+    return orig_old->is_Con();
+  } else if (old->is_Phi() || old->is_MachConstantBase()) {
+    return (orig_old->is_Con() && orig_old->is_MachConstant());
+  }
+  return false;
+}
+#endif
+
 //------------------------------yank_if_dead-----------------------------------
-// Removed an edge from 'old'.  Yank if dead.  Return adjustment counts to
+// Removed edges from 'old'.  Yank if dead.  Return adjustment counts to
 // iterators in the current block.
-int PhaseChaitin::yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
+int PhaseChaitin::yank_if_dead_recurse(Node *old, Node *orig_old, Block *current_block,
+                                       Node_List *value, Node_List *regnd) {
   int blk_adjust=0;
-  while (old->outcnt() == 0 && old != C->top()) {
+  if (old->outcnt() == 0 && old != C->top()) {
+#ifdef ASSERT
+    if (!expected_yanked_node(old, orig_old)) {
+      tty->print_cr("==============================================");
+      tty->print_cr("orig_old:");
+      orig_old->dump();
+      tty->print_cr("old:");
+      old->dump();
+      assert(false, "unexpected yanked node");
+    }
+    if (old->is_Con())
+      orig_old = old; // Reset to satisfy expected nodes checks.
+#endif
     blk_adjust += yank(old, current_block, value, regnd);
 
-    Node *tmp = NULL;
     for (uint i = 1; i < old->req(); i++) {
-      if (old->in(i)->is_MachTemp()) {
-        // handle TEMP inputs
-        Node* machtmp = old->in(i);
-        if (machtmp->outcnt() == 1) {
-          assert(machtmp->unique_out() == old, "sanity");
-          blk_adjust += yank(machtmp, current_block, value, regnd);
-          machtmp->disconnect_inputs(NULL);
-        }
-      } else {
-        assert(tmp == NULL, "can't handle more non MachTemp inputs");
-        tmp = old->in(i);
+      Node* n = old->in(i);
+      if (n != NULL) {
+        old->set_req(i, NULL);
+        blk_adjust += yank_if_dead_recurse(n, orig_old, current_block, value, regnd);
       }
     }
+    // Disconnect control and remove precedence edges if any exist
     old->disconnect_inputs(NULL);
-    if( !tmp ) break;
-    old = tmp;
   }
   return blk_adjust;
 }
--- a/hotspot/src/share/vm/opto/subnode.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/opto/subnode.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -91,7 +91,7 @@
 
   // Not correct for SubFnode and AddFNode (must check for infinity)
   // Equal?  Subtract is zero
-  if (phase->eqv_uncast(in1, in2))  return add_id();
+  if (in1->eqv_uncast(in2))  return add_id();
 
   // Either input is BOTTOM ==> the result is the local BOTTOM
   if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
--- a/hotspot/src/share/vm/prims/jvmtiEnv.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/prims/jvmtiEnv.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -267,7 +267,10 @@
 
     instanceKlassHandle ikh(current_thread, k_oop);
     if (ikh->get_cached_class_file_bytes() == NULL) {
-      // not cached, we need to reconstitute the class file from VM representation
+      // Not cached, we need to reconstitute the class file from the
+      // VM representation. We don't attach the reconstituted class
+      // bytes to the instanceKlass here because they have not been
+      // validated and we're not at a safepoint.
       constantPoolHandle  constants(current_thread, ikh->constants());
       ObjectLocker ol(constants, current_thread);    // lock constant pool while we query it
 
--- a/hotspot/src/share/vm/prims/jvmtiExport.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/prims/jvmtiExport.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -538,8 +538,6 @@
     _curr_env = NULL;
     _cached_length_ptr = cached_length_ptr;
     _cached_data_ptr = cached_data_ptr;
-    *_cached_length_ptr = 0;
-    *_cached_data_ptr = NULL;
 
     _state = _thread->jvmti_thread_state();
     if (_state != NULL) {
--- a/hotspot/src/share/vm/prims/jvmtiRedefineClasses.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/prims/jvmtiRedefineClasses.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -854,8 +854,9 @@
 
     // RC_TRACE_WITH_THREAD macro has an embedded ResourceMark
     RC_TRACE_WITH_THREAD(0x00000001, THREAD,
-      ("loading name=%s (avail_mem=" UINT64_FORMAT "K)",
-      the_class->external_name(), os::available_memory() >> 10));
+      ("loading name=%s kind=%d (avail_mem=" UINT64_FORMAT "K)",
+      the_class->external_name(), _class_load_kind,
+      os::available_memory() >> 10));
 
     ClassFileStream st((u1*) _class_defs[i].class_bytes,
       _class_defs[i].class_byte_count, (char *)"__VM_RedefineClasses__");
@@ -3205,8 +3206,20 @@
   // with them was cached on the scratch class, move to the_class.
   // Note: we still want to do this if nothing needed caching since it
   // should get cleared in the_class too.
-  the_class->set_cached_class_file(scratch_class->get_cached_class_file_bytes(),
-                                   scratch_class->get_cached_class_file_len());
+  if (the_class->get_cached_class_file_bytes() == 0) {
+    // the_class doesn't have a cache yet so copy it
+    the_class->set_cached_class_file(
+      scratch_class->get_cached_class_file_bytes(),
+      scratch_class->get_cached_class_file_len());
+  }
+#ifndef PRODUCT
+  else {
+    assert(the_class->get_cached_class_file_bytes() ==
+      scratch_class->get_cached_class_file_bytes(), "cache ptrs must match");
+    assert(the_class->get_cached_class_file_len() ==
+      scratch_class->get_cached_class_file_len(), "cache lens must match");
+  }
+#endif
 
   // Replace inner_classes
   typeArrayOop old_inner_classes = the_class->inner_classes();
--- a/hotspot/src/share/vm/runtime/advancedThresholdPolicy.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/runtime/advancedThresholdPolicy.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -156,20 +156,19 @@
 // Called with the queue locked and with at least one element
 CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) {
   CompileTask *max_task = NULL;
-  methodOop max_method;
+  methodHandle max_method;
   jlong t = os::javaTimeMillis();
   // Iterate through the queue and find a method with a maximum rate.
   for (CompileTask* task = compile_queue->first(); task != NULL;) {
     CompileTask* next_task = task->next();
-    methodOop method = (methodOop)JNIHandles::resolve(task->method_handle());
-    methodDataOop mdo = method->method_data();
-    update_rate(t, method);
+    methodHandle method = (methodOop)JNIHandles::resolve(task->method_handle());
+    update_rate(t, method());
     if (max_task == NULL) {
       max_task = task;
       max_method = method;
     } else {
       // If a method has been stale for some time, remove it from the queue.
-      if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
+      if (is_stale(t, TieredCompileTaskTimeout, method()) && !is_old(method())) {
         if (PrintTieredEvents) {
           print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level());
         }
@@ -181,7 +180,7 @@
       }
 
       // Select a method with a higher rate
-      if (compare_methods(method, max_method)) {
+      if (compare_methods(method(), max_method())) {
         max_task = task;
         max_method = method;
       }
@@ -190,7 +189,7 @@
   }
 
   if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile
-      && is_method_profiled(max_method)) {
+      && is_method_profiled(max_method())) {
     max_task->set_comp_level(CompLevel_limited_profile);
     if (PrintTieredEvents) {
       print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1000,6 +1000,13 @@
     UseInterpreter           = false;
     BackgroundCompilation    = false;
     ClipInlining             = false;
+    // Be much more aggressive in tiered mode with -Xcomp and exercise C2 more.
+    // We will first compile a level 3 version (C1 with full profiling), then do one invocation of it and
+    // compile a level 4 (C2) and then continue executing it.
+    if (TieredCompilation) {
+      Tier3InvokeNotifyFreqLog = 0;
+      Tier4InvocationThreshold = 0;
+    }
     break;
   }
 }
@@ -2323,7 +2330,7 @@
 #ifndef PRODUCT
     // -Xprintflags
     } else if (match_option(option, "-Xprintflags", &tail)) {
-      CommandLineFlags::printFlags();
+      CommandLineFlags::printFlags(tty, false);
       vm_exit(0);
 #endif
     // -D
@@ -2971,13 +2978,13 @@
       IgnoreUnrecognizedVMOptions = false;
     }
     if (match_option(option, "-XX:+PrintFlagsInitial", &tail)) {
-      CommandLineFlags::printFlags();
+      CommandLineFlags::printFlags(tty, false);
       vm_exit(0);
     }
 
 #ifndef PRODUCT
     if (match_option(option, "-XX:+PrintFlagsWithComments", &tail)) {
-      CommandLineFlags::printFlags(true);
+      CommandLineFlags::printFlags(tty, true);
       vm_exit(0);
     }
 #endif
@@ -3153,6 +3160,9 @@
   if (!UseBiasedLocking || EmitSync != 0) {
     UseOptoBiasInlining = false;
   }
+  if (!EliminateLocks) {
+    EliminateNestedLocks = false;
+  }
 #endif
 
   if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {
@@ -3170,7 +3180,7 @@
 #endif
 
   if (PrintCommandLineFlags) {
-    CommandLineFlags::printSetFlags();
+    CommandLineFlags::printSetFlags(tty);
   }
 
   // Apply CPU specific policy for the BiasedLocking
--- a/hotspot/src/share/vm/runtime/deoptimization.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/runtime/deoptimization.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -211,7 +211,7 @@
 #ifdef COMPILER2
   // Reallocate the non-escaping objects and restore their fields. Then
   // relock objects if synchronization on them was eliminated.
-  if (DoEscapeAnalysis) {
+  if (DoEscapeAnalysis || EliminateNestedLocks) {
     if (EliminateAllocations) {
       assert (chunk->at(0)->scope() != NULL,"expect only compiled java frames");
       GrowableArray<ScopeValue*>* objects = chunk->at(0)->scope()->objects();
--- a/hotspot/src/share/vm/runtime/globals.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/runtime/globals.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -488,7 +488,7 @@
   }
 }
 
-void CommandLineFlags::printSetFlags() {
+void CommandLineFlags::printSetFlags(outputStream* out) {
   // Print which flags were set on the command line
   // note: this method is called before the thread structure is in place
   //       which means resource allocation cannot be used.
@@ -507,11 +507,11 @@
   // Print
   for (int i = 0; i < length; i++) {
     if (array[i]->origin /* naked field! */) {
-      array[i]->print_as_flag(tty);
-      tty->print(" ");
+      array[i]->print_as_flag(out);
+      out->print(" ");
     }
   }
-  tty->cr();
+  out->cr();
   FREE_C_HEAP_ARRAY(Flag*, array);
 }
 
@@ -524,7 +524,7 @@
 
 #endif // PRODUCT
 
-void CommandLineFlags::printFlags(bool withComments) {
+void CommandLineFlags::printFlags(outputStream* out, bool withComments) {
   // Print the flags sorted by name
   // note: this method is called before the thread structure is in place
   //       which means resource allocation cannot be used.
@@ -541,10 +541,10 @@
   qsort(array, length, sizeof(Flag*), compare_flags);
 
   // Print
-  tty->print_cr("[Global flags]");
+  out->print_cr("[Global flags]");
   for (int i = 0; i < length; i++) {
     if (array[i]->is_unlocked()) {
-      array[i]->print_on(tty, withComments);
+      array[i]->print_on(out, withComments);
     }
   }
   FREE_C_HEAP_ARRAY(Flag*, array);
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -326,9 +326,9 @@
 
   // Returns false if name is not a command line flag.
   static bool wasSetOnCmdline(const char* name, bool* value);
-  static void printSetFlags();
+  static void printSetFlags(outputStream* out);
 
-  static void printFlags(bool withComments = false );
+  static void printFlags(outputStream* out, bool withComments);
 
   static void verify() PRODUCT_RETURN;
 };
@@ -527,6 +527,9 @@
   product(intx, UseSSE, 99,                                                 \
           "Highest supported SSE instructions set on x86/x64")              \
                                                                             \
+  product(intx, UseAVX, 99,                                                 \
+          "Highest supported AVX instructions set on x86/x64")              \
+                                                                            \
   product(intx, UseVIS, 99,                                                 \
           "Highest supported VIS instructions set on Sparc")                \
                                                                             \
@@ -1553,7 +1556,7 @@
   product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
           "The desired number of objects to claim from the overflow list")  \
                                                                             \
-  diagnostic(intx, ParGCStridesPerThread, 2,                                \
+  diagnostic(uintx, ParGCStridesPerThread, 2,                               \
           "The number of strides per worker thread that we divide up the "  \
           "card table scanning work into")                                  \
                                                                             \
--- a/hotspot/src/share/vm/runtime/init.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/runtime/init.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,15 +47,16 @@
 void classLoader_init();
 void codeCache_init();
 void VM_Version_init();
+void os_init_globals();        // depends on VM_Version_init, before universe_init
 void stubRoutines_init1();
-jint universe_init();  // dependent on codeCache_init and stubRoutines_init
-void interpreter_init();  // before any methods loaded
-void invocationCounter_init();  // before any methods loaded
+jint universe_init();          // depends on codeCache_init and stubRoutines_init
+void interpreter_init();       // before any methods loaded
+void invocationCounter_init(); // before any methods loaded
 void marksweep_init();
 void accessFlags_init();
 void templateTable_init();
 void InterfaceSupport_init();
-void universe2_init();  // dependent on codeCache_init and stubRoutines_init
+void universe2_init();  // dependent on codeCache_init and stubRoutines_init, loads primordial classes
 void referenceProcessor_init();
 void jni_handles_init();
 void vmStructs_init();
@@ -94,8 +95,10 @@
   classLoader_init();
   codeCache_init();
   VM_Version_init();
+  os_init_globals();
   stubRoutines_init1();
-  jint status = universe_init();  // dependent on codeCache_init and stubRoutines_init
+  jint status = universe_init();  // dependent on codeCache_init and
+                                  // stubRoutines_init1
   if (status != JNI_OK)
     return status;
 
@@ -106,7 +109,7 @@
   templateTable_init();
   InterfaceSupport_init();
   SharedRuntime::generate_stubs();
-  universe2_init();  // dependent on codeCache_init and stubRoutines_init
+  universe2_init();  // dependent on codeCache_init and stubRoutines_init1
   referenceProcessor_init();
   jni_handles_init();
 #ifndef VM_STRUCTS_KERNEL
@@ -122,7 +125,7 @@
   if (!universe_post_init()) {
     return JNI_ERR;
   }
-  javaClasses_init();  // must happen after vtable initialization
+  javaClasses_init();   // must happen after vtable initialization
   stubRoutines_init2(); // note: StubRoutines need 2-phase init
 
   // Although we'd like to, we can't easily do a heap verify
@@ -137,7 +140,7 @@
   // All the flags that get adjusted by VM_Version_init and os::init_2
   // have been set so dump the flags now.
   if (PrintFlagsFinal) {
-    CommandLineFlags::printFlags();
+    CommandLineFlags::printFlags(tty, false);
   }
 
   return JNI_OK;
--- a/hotspot/src/share/vm/runtime/mutex.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/runtime/mutex.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -527,7 +527,21 @@
 
 void Monitor::IUnlock (bool RelaxAssert) {
   assert (ILocked(), "invariant") ;
-  _LockWord.Bytes[_LSBINDEX] = 0 ;       // drop outer lock
+  // Conceptually we need a MEMBAR #storestore|#loadstore barrier or fence immediately
+  // before the store that releases the lock.  Crucially, all the stores and loads in the
+  // critical section must be globally visible before the store of 0 into the lock-word
+  // that releases the lock becomes globally visible.  That is, memory accesses in the
+  // critical section should not be allowed to bypass or overtake the following ST that
+  // releases the lock.  As such, to prevent accesses within the critical section
+  // from "leaking" out, we need a release fence between the critical section and the
+  // store that releases the lock.  In practice that release barrier is elided on
+  // platforms with strong memory models such as TSO.
+  //
+  // Note that the OrderAccess::storeload() fence that appears after unlock store
+  // provides for progress conditions and succession and is _not related to exclusion
+  // safety or lock release consistency.
+  OrderAccess::release_store(&_LockWord.Bytes[_LSBINDEX], 0); // drop outer lock
+
   OrderAccess::storeload ();
   ParkEvent * const w = _OnDeck ;
   assert (RelaxAssert || w != Thread::current()->_MutexEvent, "invariant") ;
--- a/hotspot/src/share/vm/runtime/os.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/runtime/os.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -82,6 +82,12 @@
 julong os::free_bytes = 0;          // # of bytes freed
 #endif
 
+void os_init_globals() {
+  // Called from init_globals().
+  // See Threads::create_vm() in thread.cpp, and init.cpp.
+  os::init_globals();
+}
+
 // Fill in buffer with current local time as an ISO-8601 string.
 // E.g., yyyy-mm-ddThh:mm:ss-zzzz.
 // Returns buffer, or NULL if it failed.
--- a/hotspot/src/share/vm/runtime/os.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/runtime/os.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -99,9 +99,11 @@
   }
 
  public:
-
   static void init(void);                      // Called before command line parsing
   static jint init_2(void);                    // Called after command line parsing
+  static void init_globals(void) {             // Called from init_globals() in init.cpp
+    init_globals_ext();
+  }
   static void init_3(void);                    // Called at the end of vm init
 
   // File names are case-insensitive on windows only
@@ -256,7 +258,7 @@
                              char *addr, size_t bytes, bool read_only,
                              bool allow_exec);
   static bool   unmap_memory(char *addr, size_t bytes);
-  static void   free_memory(char *addr, size_t bytes);
+  static void   free_memory(char *addr, size_t bytes, size_t alignment_hint);
   static void   realign_memory(char *addr, size_t bytes, size_t alignment_hint);
 
   // NUMA-specific interface
@@ -500,6 +502,7 @@
 
   static void print_location(outputStream* st, intptr_t x, bool verbose = false);
   static size_t lasterror(char *buf, size_t len);
+  static int get_last_error();
 
   // Determines whether the calling process is being debugged by a user-mode debugger.
   static bool is_debugger_attached();
@@ -671,6 +674,11 @@
   // rest of line is skipped. Returns number of bytes read or -1 on EOF
   static int get_line_chars(int fd, char *buf, const size_t bsize);
 
+  // Extensions
+#include "runtime/os_ext.hpp"
+
+ public:
+
   // Platform dependent stuff
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.hpp"
@@ -715,6 +723,7 @@
 # include "os_bsd_zero.hpp"
 #endif
 
+ public:
   // debugging support (mostly used by debug.cpp but also fatal error handler)
   static bool find(address pc, outputStream* st = tty); // OS specific function to make sense out of an address
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/runtime/os_ext.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_OS_EXT_HPP
+#define SHARE_VM_RUNTIME_OS_EXT_HPP
+
+ public:
+  static void init_globals_ext() {} // Run from init_globals().
+                                    // See os.hpp/cpp and init.cpp.
+
+ private:
+
+#endif // SHARE_VM_RUNTIME_OS_EXT_HPP
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -295,7 +295,7 @@
   nonstatic_field(instanceKlass,               _nof_implementors,                             int)                                   \
   nonstatic_field(instanceKlass,               _implementors[0],                              klassOop)                              \
   nonstatic_field(instanceKlass,               _fields,                                       typeArrayOop)                          \
-  nonstatic_field(instanceKlass,               _java_fields_count,                             int)                                   \
+  nonstatic_field(instanceKlass,               _java_fields_count,                            u2)                                    \
   nonstatic_field(instanceKlass,               _constants,                                    constantPoolOop)                       \
   nonstatic_field(instanceKlass,               _class_loader,                                 oop)                                   \
   nonstatic_field(instanceKlass,               _protection_domain,                            oop)                                   \
@@ -305,12 +305,12 @@
   nonstatic_field(instanceKlass,               _inner_classes,                                typeArrayOop)                          \
   nonstatic_field(instanceKlass,               _nonstatic_field_size,                         int)                                   \
   nonstatic_field(instanceKlass,               _static_field_size,                            int)                                   \
-  nonstatic_field(instanceKlass,               _static_oop_field_count,                       int)                                   \
+  nonstatic_field(instanceKlass,               _static_oop_field_count,                       u2)                                   \
   nonstatic_field(instanceKlass,               _nonstatic_oop_map_size,                       int)                                   \
   nonstatic_field(instanceKlass,               _is_marked_dependent,                          bool)                                  \
   nonstatic_field(instanceKlass,               _minor_version,                                u2)                                    \
   nonstatic_field(instanceKlass,               _major_version,                                u2)                                    \
-  nonstatic_field(instanceKlass,               _init_state,                                   instanceKlass::ClassState)             \
+  nonstatic_field(instanceKlass,               _init_state,                                   u1)                                    \
   nonstatic_field(instanceKlass,               _init_thread,                                  Thread*)                               \
   nonstatic_field(instanceKlass,               _vtable_len,                                   int)                                   \
   nonstatic_field(instanceKlass,               _itable_len,                                   int)                                   \
@@ -1362,6 +1362,7 @@
   /* The compiler thinks this is a different type than */                 \
   /* unsigned short on Win32 */                                           \
   declare_unsigned_integer_type(u2)                                       \
+  declare_unsigned_integer_type(u1)                                       \
   declare_unsigned_integer_type(unsigned)                                 \
                                                                           \
   /*****************************/                                         \
--- a/hotspot/src/share/vm/services/attachListener.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/services/attachListener.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -99,6 +99,7 @@
 }
 
 // Implementation of "properties" command.
+// See also: PrintSystemPropertiesDCmd class
 static jint get_system_properties(AttachOperation* op, outputStream* out) {
   return get_properties(op, out, vmSymbols::serializePropertiesToByteArray_name());
 }
@@ -127,6 +128,7 @@
 }
 
 // Implementation of "threaddump" command - essentially a remote ctrl-break
+// See also: ThreadDumpDCmd class
 //
 static jint thread_dump(AttachOperation* op, outputStream* out) {
   bool print_concurrent_locks = false;
@@ -158,6 +160,7 @@
   DCmd::parse_and_execute(out, op->arg(0), ' ', THREAD);
   if (HAS_PENDING_EXCEPTION) {
     java_lang_Throwable::print(PENDING_EXCEPTION, out);
+    out->cr();
     CLEAR_PENDING_EXCEPTION;
     // The exception has been printed on the output stream
     // If the JVM returns JNI_ERR, the attachAPI throws a generic I/O
@@ -169,6 +172,7 @@
 
 #ifndef SERVICES_KERNEL   // Heap dumping not supported
 // Implementation of "dumpheap" command.
+// See also: HeapDumpDCmd class
 //
 // Input arguments :-
 //   arg0: Name of the dump file
@@ -211,6 +215,7 @@
 #endif // SERVICES_KERNEL
 
 // Implementation of "inspectheap" command
+// See also: ClassHistogramDCmd class
 //
 // Input arguments :-
 //   arg0: "-live" or "-all"
@@ -354,6 +359,7 @@
 }
 
 // Implementation of "printflag" command
+// See also: PrintVMFlagsDCmd class
 static jint print_flag(AttachOperation* op, outputStream* out) {
   const char* name = NULL;
   if ((name = op->arg(0)) == NULL) {
--- a/hotspot/src/share/vm/services/diagnosticCommand.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/services/diagnosticCommand.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -23,11 +23,15 @@
  */
 
 #include "precompiled.hpp"
+#include "gc_implementation/shared/vmGCOperations.hpp"
+#include "runtime/javaCalls.hpp"
 #include "services/diagnosticArgument.hpp"
 #include "services/diagnosticCommand.hpp"
 #include "services/diagnosticFramework.hpp"
+#include "services/heapDumper.hpp"
+#include "services/management.hpp"
 
-HelpDCmd::HelpDCmd(outputStream* output, bool heap) : DCmd(output, heap),
+HelpDCmd::HelpDCmd(outputStream* output, bool heap) : DCmdWithParser(output, heap),
   _all("-all", "Show help for all commands", "BOOLEAN", false, "false"),
   _cmd("command name", "The name of the command for which we want help",
         "STRING", false) {
@@ -35,14 +39,6 @@
   _dcmdparser.add_dcmd_argument(&_cmd);
 };
 
-void HelpDCmd::parse(CmdLine* line, char delim, TRAPS) {
-  _dcmdparser.parse(line, delim, CHECK);
-}
-
-void HelpDCmd::print_help(outputStream* out) {
-  _dcmdparser.print_help(out, name());
-}
-
 void HelpDCmd::execute(TRAPS) {
   if (_all.value()) {
     GrowableArray<const char*>* cmd_list = DCmdFactory::DCmd_list();
@@ -66,10 +62,11 @@
                          factory->is_enabled() ? "" : " [disabled]");
       output()->print_cr(factory->description());
       output()->print_cr("\nImpact: %s", factory->impact());
+      output()->cr();
       cmd = factory->create_resource_instance(output());
       if (cmd != NULL) {
         DCmdMark mark(cmd);
-        cmd->print_help(output());
+        cmd->print_help(factory->name());
       }
     } else {
       output()->print_cr("Help unavailable : '%s' : No such command", _cmd.value());
@@ -90,14 +87,6 @@
   }
 }
 
-void HelpDCmd::reset(TRAPS) {
-  _dcmdparser.reset(CHECK);
-}
-
-void HelpDCmd::cleanup() {
-  _dcmdparser.cleanup();
-}
-
 int HelpDCmd::num_arguments() {
   ResourceMark rm;
   HelpDCmd* dcmd = new HelpDCmd(NULL, false);
@@ -109,14 +98,6 @@
   }
 }
 
-GrowableArray<const char*>* HelpDCmd::argument_name_array() {
-  return _dcmdparser.argument_name_array();
-}
-
-GrowableArray<DCmdArgumentInfo*>* HelpDCmd::argument_info_array() {
-  return _dcmdparser.argument_info_array();
-}
-
 void VersionDCmd::execute(TRAPS) {
   output()->print_cr("%s version %s", Abstract_VM_Version::vm_name(),
           Abstract_VM_Version::vm_release());
@@ -129,3 +110,210 @@
             jdk_version.minor_version());
   }
 }
+
+PrintVMFlagsDCmd::PrintVMFlagsDCmd(outputStream* output, bool heap) :
+                                   DCmdWithParser(output, heap),
+  _all("-all", "Print all flags supported by the VM", "BOOLEAN", false, "false") {
+  _dcmdparser.add_dcmd_option(&_all);
+}
+
+void PrintVMFlagsDCmd::execute(TRAPS) {
+  if (_all.value()) {
+    CommandLineFlags::printFlags(output(), true);
+  } else {
+    CommandLineFlags::printSetFlags(output());
+  }
+}
+
+int PrintVMFlagsDCmd::num_arguments() {
+    ResourceMark rm;
+    PrintVMFlagsDCmd* dcmd = new PrintVMFlagsDCmd(NULL, false);
+    if (dcmd != NULL) {
+      DCmdMark mark(dcmd);
+      return dcmd->_dcmdparser.num_arguments();
+    } else {
+      return 0;
+    }
+}
+
+void PrintSystemPropertiesDCmd::execute(TRAPS) {
+  // load sun.misc.VMSupport
+  Symbol* klass = vmSymbols::sun_misc_VMSupport();
+  klassOop k = SystemDictionary::resolve_or_fail(klass, true, CHECK);
+  instanceKlassHandle ik (THREAD, k);
+  if (ik->should_be_initialized()) {
+    ik->initialize(THREAD);
+  }
+  if (HAS_PENDING_EXCEPTION) {
+    java_lang_Throwable::print(PENDING_EXCEPTION, output());
+    output()->cr();
+    CLEAR_PENDING_EXCEPTION;
+    return;
+  }
+
+  // invoke the serializePropertiesToByteArray method
+  JavaValue result(T_OBJECT);
+  JavaCallArguments args;
+
+  Symbol* signature = vmSymbols::serializePropertiesToByteArray_signature();
+  JavaCalls::call_static(&result,
+                         ik,
+                         vmSymbols::serializePropertiesToByteArray_name(),
+                         signature,
+                         &args,
+                         THREAD);
+  if (HAS_PENDING_EXCEPTION) {
+    java_lang_Throwable::print(PENDING_EXCEPTION, output());
+    output()->cr();
+    CLEAR_PENDING_EXCEPTION;
+    return;
+  }
+
+  // The result should be a [B
+  oop res = (oop)result.get_jobject();
+  assert(res->is_typeArray(), "just checking");
+  assert(typeArrayKlass::cast(res->klass())->element_type() == T_BYTE, "just checking");
+
+  // copy the bytes to the output stream
+  typeArrayOop ba = typeArrayOop(res);
+  jbyte* addr = typeArrayOop(res)->byte_at_addr(0);
+  output()->print_raw((const char*)addr, ba->length());
+}
+
+VMUptimeDCmd::VMUptimeDCmd(outputStream* output, bool heap) :
+                           DCmdWithParser(output, heap),
+  _date("-date", "Add a prefix with current date", "BOOLEAN", false, "false") {
+  _dcmdparser.add_dcmd_option(&_date);
+}
+
+void VMUptimeDCmd::execute(TRAPS) {
+  if (_date.value()) {
+    output()->date_stamp(true, "", ": ");
+  }
+  output()->time_stamp().update_to(tty->time_stamp().ticks());
+  output()->stamp();
+  output()->print_cr(" s");
+}
+
+int VMUptimeDCmd::num_arguments() {
+  ResourceMark rm;
+  VMUptimeDCmd* dcmd = new VMUptimeDCmd(NULL, false);
+  if (dcmd != NULL) {
+    DCmdMark mark(dcmd);
+    return dcmd->_dcmdparser.num_arguments();
+  } else {
+    return 0;
+  }
+}
+
+void SystemGCDCmd::execute(TRAPS) {
+  Universe::heap()->collect(GCCause::_java_lang_system_gc);
+}
+
+void RunFinalizationDCmd::execute(TRAPS) {
+  klassOop k = SystemDictionary::resolve_or_fail(vmSymbols::java_lang_System(),
+                                                 true, CHECK);
+  instanceKlassHandle klass(THREAD, k);
+  JavaValue result(T_VOID);
+  JavaCalls::call_static(&result, klass,
+                         vmSymbols::run_finalization_name(),
+                         vmSymbols::void_method_signature(), CHECK);
+}
+
+#ifndef SERVICES_KERNEL   // Heap dumping not supported
+HeapDumpDCmd::HeapDumpDCmd(outputStream* output, bool heap) :
+                           DCmdWithParser(output, heap),
+  _filename("filename","Name of the dump file", "STRING",true),
+  _all("-all", "Dump all objects, including unreachable objects",
+       "BOOLEAN", false, "false") {
+  _dcmdparser.add_dcmd_option(&_all);
+  _dcmdparser.add_dcmd_argument(&_filename);
+}
+
+void HeapDumpDCmd::execute(TRAPS) {
+  // Request a full GC before heap dump if _all is false
+  // This helps reduces the amount of unreachable objects in the dump
+  // and makes it easier to browse.
+  HeapDumper dumper(!_all.value() /* request GC if _all is false*/);
+  int res = dumper.dump(_filename.value());
+  if (res == 0) {
+    output()->print_cr("Heap dump file created");
+  } else {
+    // heap dump failed
+    ResourceMark rm;
+    char* error = dumper.error_as_C_string();
+    if (error == NULL) {
+      output()->print_cr("Dump failed - reason unknown");
+    } else {
+      output()->print_cr("%s", error);
+    }
+  }
+}
+
+int HeapDumpDCmd::num_arguments() {
+  ResourceMark rm;
+  HeapDumpDCmd* dcmd = new HeapDumpDCmd(NULL, false);
+  if (dcmd != NULL) {
+    DCmdMark mark(dcmd);
+    return dcmd->_dcmdparser.num_arguments();
+  } else {
+    return 0;
+  }
+}
+#endif // SERVICES_KERNEL
+
+ClassHistogramDCmd::ClassHistogramDCmd(outputStream* output, bool heap) :
+                                       DCmdWithParser(output, heap),
+  _all("-all", "Inspect all objects, including unreachable objects",
+       "BOOLEAN", false, "false") {
+  _dcmdparser.add_dcmd_option(&_all);
+}
+
+void ClassHistogramDCmd::execute(TRAPS) {
+  VM_GC_HeapInspection heapop(output(),
+                              !_all.value() /* request full gc if false */,
+                              true /* need_prologue */);
+  VMThread::execute(&heapop);
+}
+
+int ClassHistogramDCmd::num_arguments() {
+  ResourceMark rm;
+  ClassHistogramDCmd* dcmd = new ClassHistogramDCmd(NULL, false);
+  if (dcmd != NULL) {
+    DCmdMark mark(dcmd);
+    return dcmd->_dcmdparser.num_arguments();
+  } else {
+    return 0;
+  }
+}
+
+ThreadDumpDCmd::ThreadDumpDCmd(outputStream* output, bool heap) :
+                               DCmdWithParser(output, heap),
+  _locks("-l", "print java.util.concurrent locks", "BOOLEAN", false, "false") {
+  _dcmdparser.add_dcmd_option(&_locks);
+}
+
+void ThreadDumpDCmd::execute(TRAPS) {
+  // thread stacks
+  VM_PrintThreads op1(output(), _locks.value());
+  VMThread::execute(&op1);
+
+  // JNI global handles
+  VM_PrintJNI op2(output());
+  VMThread::execute(&op2);
+
+  // Deadlock detection
+  VM_FindDeadlocks op3(output());
+  VMThread::execute(&op3);
+}
+
+int ThreadDumpDCmd::num_arguments() {
+  ResourceMark rm;
+  ThreadDumpDCmd* dcmd = new ThreadDumpDCmd(NULL, false);
+  if (dcmd != NULL) {
+    DCmdMark mark(dcmd);
+    return dcmd->_dcmdparser.num_arguments();
+  } else {
+    return 0;
+  }
+}
--- a/hotspot/src/share/vm/services/diagnosticCommand.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/services/diagnosticCommand.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -35,9 +35,8 @@
 #include "services/diagnosticCommand.hpp"
 #include "services/diagnosticFramework.hpp"
 
-class HelpDCmd : public DCmd {
+class HelpDCmd : public DCmdWithParser {
 protected:
-  DCmdParser _dcmdparser;
   DCmdArgument<bool> _all;
   DCmdArgument<char*> _cmd;
 public:
@@ -50,13 +49,7 @@
   }
   static const char* impact() { return "Low: "; }
   static int num_arguments();
-  virtual void parse(CmdLine* line, char delim, TRAPS);
   virtual void execute(TRAPS);
-  virtual void reset(TRAPS);
-  virtual void cleanup();
-  virtual void print_help(outputStream* out);
-  virtual GrowableArray<const char*>* argument_name_array();
-  virtual GrowableArray<DCmdArgumentInfo*>* argument_info_array();
 };
 
 class VersionDCmd : public DCmd {
@@ -68,9 +61,156 @@
   }
   static const char* impact() { return "Low: "; }
   static int num_arguments() { return 0; }
-  virtual void parse(CmdLine* line, char delim, TRAPS) { }
+  virtual void execute(TRAPS);
+};
+
+class CommandLineDCmd : public DCmd {
+public:
+  CommandLineDCmd(outputStream* output, bool heap) : DCmd(output, heap) { }
+  static const char* name() { return "VM.command_line"; }
+  static const char* description() {
+    return "Print the command line used to start this VM instance.";
+  }
+  static const char* impact() { return "Low: "; }
+  static int num_arguments() { return 0; }
+  virtual void execute(TRAPS) {
+    Arguments::print_on(_output);
+  }
+};
+
+// See also: get_system_properties in attachListener.cpp
+class PrintSystemPropertiesDCmd : public DCmd {
+public:
+  PrintSystemPropertiesDCmd(outputStream* output, bool heap) : DCmd(output, heap) { }
+    static const char* name() { return "VM.system_properties"; }
+    static const char* description() {
+      return "Print system properties.";
+    }
+    static const char* impact() {
+      return "Low: ";
+    }
+    static int num_arguments() { return 0; }
+    virtual void execute(TRAPS);
+};
+
+// See also: print_flag in attachListener.cpp
+class PrintVMFlagsDCmd : public DCmdWithParser {
+protected:
+  DCmdArgument<bool> _all;
+public:
+  PrintVMFlagsDCmd(outputStream* output, bool heap);
+  static const char* name() { return "VM.flags"; }
+  static const char* description() {
+    return "Print VM flag options and their current values.";
+  }
+  static const char* impact() {
+    return "Low: ";
+  }
+  static int num_arguments();
+  virtual void execute(TRAPS);
+};
+
+class VMUptimeDCmd : public DCmdWithParser {
+protected:
+  DCmdArgument<bool> _date;
+public:
+  VMUptimeDCmd(outputStream* output, bool heap);
+  static const char* name() { return "VM.uptime"; }
+  static const char* description() {
+    return "Print VM uptime.";
+  }
+  static const char* impact() {
+    return "Low: ";
+  }
+  static int num_arguments();
   virtual void execute(TRAPS);
-  virtual void print_help(outputStream* out) { }
+};
+
+class SystemGCDCmd : public DCmd {
+public:
+  SystemGCDCmd(outputStream* output, bool heap) : DCmd(output, heap) { }
+    static const char* name() { return "GC.run"; }
+    static const char* description() {
+      return "Call java.lang.System.gc().";
+    }
+    static const char* impact() {
+      return "Medium: Depends on Java heap size and content.";
+    }
+    static int num_arguments() { return 0; }
+    virtual void execute(TRAPS);
+};
+
+class RunFinalizationDCmd : public DCmd {
+public:
+  RunFinalizationDCmd(outputStream* output, bool heap) : DCmd(output, heap) { }
+    static const char* name() { return "GC.run_finalization"; }
+    static const char* description() {
+      return "Call java.lang.System.runFinalization().";
+    }
+    static const char* impact() {
+      return "Medium: Depends on Java content.";
+    }
+    static int num_arguments() { return 0; }
+    virtual void execute(TRAPS);
+};
+
+#ifndef SERVICES_KERNEL   // Heap dumping not supported
+// See also: dump_heap in attachListener.cpp
+class HeapDumpDCmd : public DCmdWithParser {
+protected:
+  DCmdArgument<char*> _filename;
+  DCmdArgument<bool>  _all;
+public:
+  HeapDumpDCmd(outputStream* output, bool heap);
+  static const char* name() {
+    return "GC.heap_dump";
+  }
+  static const char* description() {
+    return "Generate a HPROF format dump of the Java heap.";
+  }
+  static const char* impact() {
+    return "High: Depends on Java heap size and content. "
+           "Request a full GC unless the '-all' option is specified.";
+  }
+  static int num_arguments();
+  virtual void execute(TRAPS);
+};
+#endif // SERVICES_KERNEL
+
+// See also: inspeactheap in attachListener.cpp
+class ClassHistogramDCmd : public DCmdWithParser {
+protected:
+  DCmdArgument<bool> _all;
+public:
+  ClassHistogramDCmd(outputStream* output, bool heap);
+  static const char* name() {
+    return "GC.class_histogram";
+  }
+  static const char* description() {
+    return "Provide statistics about the Java heap usage.";
+  }
+  static const char* impact() {
+    return "High: Depends on Java heap size and content.";
+  }
+  static int num_arguments();
+  virtual void execute(TRAPS);
+};
+
+// See also: thread_dump in attachListener.cpp
+class ThreadDumpDCmd : public DCmdWithParser {
+protected:
+  DCmdArgument<bool> _locks;
+public:
+  ThreadDumpDCmd(outputStream* output, bool heap);
+  static const char* name() { return "Thread.print"; }
+  static const char* description() {
+    return "Print all threads with stacktraces.";
+  }
+  static const char* impact() {
+    return "Medium: Depends on the number of threads.";
+  }
+  static int num_arguments();
+  virtual void execute(TRAPS);
 };
 
 #endif // SHARE_VM_SERVICES_DIAGNOSTICCOMMAND_HPP
--- a/hotspot/src/share/vm/services/diagnosticFramework.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/services/diagnosticFramework.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -226,7 +226,7 @@
 }
 
 void DCmdParser::print_help(outputStream* out, const char* cmd_name) {
-  out->print("\nSyntax : %s %s", cmd_name, _options == NULL ? "" : "[options]");
+  out->print("Syntax : %s %s", cmd_name, _options == NULL ? "" : "[options]");
   GenDCmdArgument* arg = _arguments_list;
   while (arg != NULL) {
     if (arg->is_mandatory()) {
@@ -373,6 +373,30 @@
   }
 }
 
+void DCmdWithParser::parse(CmdLine* line, char delim, TRAPS) {
+  _dcmdparser.parse(line, delim, CHECK);
+}
+
+void DCmdWithParser::print_help(const char* name) {
+  _dcmdparser.print_help(output(), name);
+}
+
+void DCmdWithParser::reset(TRAPS) {
+  _dcmdparser.reset(CHECK);
+}
+
+void DCmdWithParser::cleanup() {
+  _dcmdparser.cleanup();
+}
+
+GrowableArray<const char*>* DCmdWithParser::argument_name_array() {
+  return _dcmdparser.argument_name_array();
+}
+
+GrowableArray<DCmdArgumentInfo*>* DCmdWithParser::argument_info_array() {
+  return _dcmdparser.argument_info_array();
+}
+
 Mutex* DCmdFactory::_dcmdFactory_lock = new Mutex(Mutex::leaf, "DCmdFactory", true);
 
 DCmdFactory* DCmdFactory::factory(const char* name, size_t len) {
--- a/hotspot/src/share/vm/services/diagnosticFramework.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/services/diagnosticFramework.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -241,8 +241,17 @@
   static int num_arguments() { return 0; }
   outputStream* output() { return _output; }
   bool is_heap_allocated()  { return _is_heap_allocated; }
-  virtual void print_help(outputStream* out) { };
-  virtual void parse(CmdLine* line, char delim, TRAPS) { }
+  virtual void print_help(const char* name) {
+    output()->print_cr("Syntax: %s", name);
+  }
+  virtual void parse(CmdLine* line, char delim, TRAPS) {
+    DCmdArgIter iter(line->args_addr(), line->args_len(), delim);
+    bool has_arg = iter.next(CHECK);
+    if (has_arg) {
+      THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+                "Unknown argument in diagnostic command");
+    }
+  }
   virtual void execute(TRAPS) { }
   virtual void reset(TRAPS) { }
   virtual void cleanup() { }
@@ -262,6 +271,25 @@
                                 char delim, TRAPS);
 };
 
+class DCmdWithParser : public DCmd {
+protected:
+  DCmdParser _dcmdparser;
+public:
+  DCmdWithParser (outputStream *output, bool heap=false) : DCmd(output, heap) { }
+  static const char* name() { return "No Name";}
+  static const char* description() { return "No Help";}
+  static const char* disabled_message() { return "Diagnostic command currently disabled"; }
+  static const char* impact() { return "Low: No impact"; }
+  static int num_arguments() { return 0; }
+  virtual void parse(CmdLine *line, char delim, TRAPS);
+  virtual void execute(TRAPS) { }
+  virtual void reset(TRAPS);
+  virtual void cleanup();
+  virtual void print_help(const char* name);
+  virtual GrowableArray<const char*>* argument_name_array();
+  virtual GrowableArray<DCmdArgumentInfo*>* argument_info_array();
+};
+
 class DCmdMark : public StackObj {
   DCmd* _ref;
 public:
--- a/hotspot/src/share/vm/services/management.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/services/management.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -118,8 +118,22 @@
 #endif // SERVICES_KERNEL
   _optional_support.isThreadAllocatedMemorySupported = 1;
 
+  // Registration of the diagnostic commands
+  // First boolean argument specifies if the command is enabled
+  // Second boolean argument specifies if the command is hidden
   DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<HelpDCmd>(true, false));
   DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<VersionDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<CommandLineDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<PrintSystemPropertiesDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<PrintVMFlagsDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<VMUptimeDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<SystemGCDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<RunFinalizationDCmd>(true, false));
+#ifndef SERVICES_KERNEL   // Heap dumping not supported
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<HeapDumpDCmd>(true, false));
+#endif // SERVICES_KERNEL
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<ClassHistogramDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<ThreadDumpDCmd>(true, false));
 }
 
 void Management::initialize(TRAPS) {
--- a/hotspot/src/share/vm/services/management.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/services/management.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -76,6 +76,9 @@
     _stamp.update();
   }
 
+  static jlong begin_vm_creation_time() {
+    return _begin_vm_creation_time->get_value();
+  }
   static jlong vm_init_done_time() {
     return _vm_init_done_time->get_value();
   }
--- a/hotspot/src/share/vm/services/threadService.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/services/threadService.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -377,7 +377,7 @@
     }
 
   }
-
+  delete cycle;
   return deadlocks;
 }
 
--- a/hotspot/src/share/vm/shark/sharkIntrinsics.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/shark/sharkIntrinsics.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -213,17 +213,11 @@
     SharkType::oop_type(),
     "klass");
 
-  Value *klass_part = builder()->CreateAddressOfStructEntry(
-    klass,
-    in_ByteSize(klassOopDesc::klass_part_offset_in_bytes()),
-    SharkType::klass_type(),
-    "klass_part");
-
   state()->push(
     SharkValue::create_jobject(
       builder()->CreateValueOfStructEntry(
-        klass_part,
-        in_ByteSize(Klass::java_mirror_offset_in_bytes()),
+        klass,
+        Klass::java_mirror_offset(),
         SharkType::oop_type(),
         "java_mirror"),
       true));
--- a/hotspot/src/share/vm/shark/sharkTopLevelBlock.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/shark/sharkTopLevelBlock.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -745,15 +745,9 @@
     SharkType::oop_type(),
     "klass");
 
-  Value *klass_part = builder()->CreateAddressOfStructEntry(
+  Value *access_flags = builder()->CreateValueOfStructEntry(
     klass,
-    in_ByteSize(klassOopDesc::klass_part_offset_in_bytes()),
-    SharkType::klass_type(),
-    "klass_part");
-
-  Value *access_flags = builder()->CreateValueOfStructEntry(
-    klass_part,
-    in_ByteSize(Klass::access_flags_offset_in_bytes()),
+    Klass::access_flags_offset(),
     SharkType::jint_type(),
     "access_flags");
 
--- a/hotspot/src/share/vm/utilities/workgroup.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/utilities/workgroup.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -53,14 +53,14 @@
 }
 
 WorkGang::WorkGang(const char* name,
-                   int         workers,
+                   uint        workers,
                    bool        are_GC_task_threads,
                    bool        are_ConcurrentGC_threads) :
   AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) {
   _total_workers = workers;
 }
 
-GangWorker* WorkGang::allocate_worker(int which) {
+GangWorker* WorkGang::allocate_worker(uint which) {
   GangWorker* new_worker = new GangWorker(this, which);
   return new_worker;
 }
@@ -88,7 +88,7 @@
   } else {
     worker_type = os::pgc_thread;
   }
-  for (int worker = 0; worker < total_workers(); worker += 1) {
+  for (uint worker = 0; worker < total_workers(); worker += 1) {
     GangWorker* new_worker = allocate_worker(worker);
     assert(new_worker != NULL, "Failed to allocate GangWorker");
     _gang_workers[worker] = new_worker;
@@ -108,14 +108,14 @@
     tty->print_cr("Destructing work gang %s", name());
   }
   stop();   // stop all the workers
-  for (int worker = 0; worker < total_workers(); worker += 1) {
+  for (uint worker = 0; worker < total_workers(); worker += 1) {
     delete gang_worker(worker);
   }
   delete gang_workers();
   delete monitor();
 }
 
-GangWorker* AbstractWorkGang::gang_worker(int i) const {
+GangWorker* AbstractWorkGang::gang_worker(uint i) const {
   // Array index bounds checking.
   GangWorker* result = NULL;
   assert(gang_workers() != NULL, "No workers for indexing");
@@ -148,7 +148,7 @@
   // Tell the workers to get to work.
   monitor()->notify_all();
   // Wait for them to be finished
-  while (finished_workers() < (int) no_of_parallel_workers) {
+  while (finished_workers() < no_of_parallel_workers) {
     if (TraceWorkGang) {
       tty->print_cr("Waiting in work gang %s: %d/%d finished sequence %d",
                     name(), finished_workers(), no_of_parallel_workers,
@@ -377,12 +377,12 @@
     _n_workers(0), _n_completed(0), _should_reset(false) {
 }
 
-WorkGangBarrierSync::WorkGangBarrierSync(int n_workers, const char* name)
+WorkGangBarrierSync::WorkGangBarrierSync(uint n_workers, const char* name)
   : _monitor(Mutex::safepoint, name, true),
     _n_workers(n_workers), _n_completed(0), _should_reset(false) {
 }
 
-void WorkGangBarrierSync::set_n_workers(int n_workers) {
+void WorkGangBarrierSync::set_n_workers(uint n_workers) {
   _n_workers   = n_workers;
   _n_completed = 0;
   _should_reset = false;
@@ -419,9 +419,9 @@
 
 // SubTasksDone functions.
 
-SubTasksDone::SubTasksDone(int n) :
+SubTasksDone::SubTasksDone(uint n) :
   _n_tasks(n), _n_threads(1), _tasks(NULL) {
-  _tasks = NEW_C_HEAP_ARRAY(jint, n);
+  _tasks = NEW_C_HEAP_ARRAY(uint, n);
   guarantee(_tasks != NULL, "alloc failure");
   clear();
 }
@@ -430,14 +430,14 @@
   return _tasks != NULL;
 }
 
-void SubTasksDone::set_n_threads(int t) {
+void SubTasksDone::set_n_threads(uint t) {
   assert(_claimed == 0 || _threads_completed == _n_threads,
          "should not be called while tasks are being processed!");
   _n_threads = (t == 0 ? 1 : t);
 }
 
 void SubTasksDone::clear() {
-  for (int i = 0; i < _n_tasks; i++) {
+  for (uint i = 0; i < _n_tasks; i++) {
     _tasks[i] = 0;
   }
   _threads_completed = 0;
@@ -446,9 +446,9 @@
 #endif
 }
 
-bool SubTasksDone::is_task_claimed(int t) {
+bool SubTasksDone::is_task_claimed(uint t) {
   assert(0 <= t && t < _n_tasks, "bad task id.");
-  jint old = _tasks[t];
+  uint old = _tasks[t];
   if (old == 0) {
     old = Atomic::cmpxchg(1, &_tasks[t], 0);
   }
@@ -457,7 +457,7 @@
 #ifdef ASSERT
   if (!res) {
     assert(_claimed < _n_tasks, "Too many tasks claimed; missing clear?");
-    Atomic::inc(&_claimed);
+    Atomic::inc((volatile jint*) &_claimed);
   }
 #endif
   return res;
@@ -471,7 +471,7 @@
     observed = Atomic::cmpxchg(old+1, &_threads_completed, old);
   } while (observed != old);
   // If this was the last thread checking in, clear the tasks.
-  if (observed+1 == _n_threads) clear();
+  if (observed+1 == (jint)_n_threads) clear();
 }
 
 
@@ -490,12 +490,12 @@
   return _n_threads > 0;
 }
 
-bool SequentialSubTasksDone::is_task_claimed(int& t) {
-  jint* n_claimed_ptr = &_n_claimed;
+bool SequentialSubTasksDone::is_task_claimed(uint& t) {
+  uint* n_claimed_ptr = &_n_claimed;
   t = *n_claimed_ptr;
   while (t < _n_tasks) {
     jint res = Atomic::cmpxchg(t+1, n_claimed_ptr, t);
-    if (res == t) {
+    if (res == (jint)t) {
       return false;
     }
     t = *n_claimed_ptr;
@@ -504,10 +504,10 @@
 }
 
 bool SequentialSubTasksDone::all_tasks_completed() {
-  jint* n_completed_ptr = &_n_completed;
-  jint  complete        = *n_completed_ptr;
+  uint* n_completed_ptr = &_n_completed;
+  uint  complete        = *n_completed_ptr;
   while (true) {
-    jint res = Atomic::cmpxchg(complete+1, n_completed_ptr, complete);
+    uint res = Atomic::cmpxchg(complete+1, n_completed_ptr, complete);
     if (res == complete) {
       break;
     }
--- a/hotspot/src/share/vm/utilities/workgroup.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/utilities/workgroup.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -68,7 +68,7 @@
 public:
   // The abstract work method.
   // The argument tells you which member of the gang you are.
-  virtual void work(int i) = 0;
+  virtual void work(uint worker_id) = 0;
 
   // This method configures the task for proper termination.
   // Some tasks do not have any requirements on termination
@@ -149,7 +149,7 @@
   // and notifies of changes in it.
   Monitor*  _monitor;
   // The count of the number of workers in the gang.
-  int _total_workers;
+  uint _total_workers;
   // Whether the workers should terminate.
   bool _terminate;
   // The array of worker threads for this gang.
@@ -160,18 +160,18 @@
   // A sequence number for the current task.
   int _sequence_number;
   // The number of started workers.
-  int _started_workers;
+  uint _started_workers;
   // The number of finished workers.
-  int _finished_workers;
+  uint _finished_workers;
 public:
   // Accessors for fields
   Monitor* monitor() const {
     return _monitor;
   }
-  int total_workers() const {
+  uint total_workers() const {
     return _total_workers;
   }
-  virtual int active_workers() const {
+  virtual uint active_workers() const {
     return _total_workers;
   }
   bool terminate() const {
@@ -186,10 +186,10 @@
   int sequence_number() const {
     return _sequence_number;
   }
-  int started_workers() const {
+  uint started_workers() const {
     return _started_workers;
   }
-  int finished_workers() const {
+  uint finished_workers() const {
     return _finished_workers;
   }
   bool are_GC_task_threads() const {
@@ -203,7 +203,7 @@
     return (task() == NULL);
   }
   // Return the Ith gang worker.
-  GangWorker* gang_worker(int i) const;
+  GangWorker* gang_worker(uint i) const;
 
   void threads_do(ThreadClosure* tc) const;
 
@@ -255,13 +255,13 @@
 class WorkGang: public AbstractWorkGang {
 public:
   // Constructor
-  WorkGang(const char* name, int workers,
+  WorkGang(const char* name, uint workers,
            bool are_GC_task_threads, bool are_ConcurrentGC_threads);
   // Run a task, returns when the task is done (or terminated).
   virtual void run_task(AbstractGangTask* task);
   void run_task(AbstractGangTask* task, uint no_of_parallel_workers);
   // Allocate a worker and return a pointer to it.
-  virtual GangWorker* allocate_worker(int which);
+  virtual GangWorker* allocate_worker(uint which);
   // Initialize workers in the gang.  Return true if initialization
   // succeeded. The type of the worker can be overridden in a derived
   // class with the appropriate implementation of allocate_worker().
@@ -323,25 +323,25 @@
   // determine completion.
 
  protected:
-  int _active_workers;
+  uint _active_workers;
  public:
   // Constructor and destructor.
   // Initialize active_workers to a minimum value.  Setting it to
   // the parameter "workers" will initialize it to a maximum
   // value which is not desirable.
-  FlexibleWorkGang(const char* name, int workers,
+  FlexibleWorkGang(const char* name, uint workers,
                    bool are_GC_task_threads,
                    bool  are_ConcurrentGC_threads) :
     WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads),
-    _active_workers(UseDynamicNumberOfGCThreads ? 1 : ParallelGCThreads) {};
+    _active_workers(UseDynamicNumberOfGCThreads ? 1U : ParallelGCThreads) {}
   // Accessors for fields
-  virtual int active_workers() const { return _active_workers; }
-  void set_active_workers(int v) {
+  virtual uint active_workers() const { return _active_workers; }
+  void set_active_workers(uint v) {
     assert(v <= _total_workers,
            "Trying to set more workers active than there are");
     _active_workers = MIN2(v, _total_workers);
     assert(v != 0, "Trying to set active workers to 0");
-    _active_workers = MAX2(1, _active_workers);
+    _active_workers = MAX2(1U, _active_workers);
     assert(UseDynamicNumberOfGCThreads || _active_workers == _total_workers,
            "Unless dynamic should use total workers");
   }
@@ -370,13 +370,13 @@
 class WorkGangBarrierSync : public StackObj {
 protected:
   Monitor _monitor;
-  int     _n_workers;
-  int     _n_completed;
+  uint     _n_workers;
+  uint     _n_completed;
   bool    _should_reset;
 
   Monitor* monitor()        { return &_monitor; }
-  int      n_workers()      { return _n_workers; }
-  int      n_completed()    { return _n_completed; }
+  uint     n_workers()      { return _n_workers; }
+  uint     n_completed()    { return _n_completed; }
   bool     should_reset()   { return _should_reset; }
 
   void     zero_completed() { _n_completed = 0; }
@@ -386,11 +386,11 @@
 
 public:
   WorkGangBarrierSync();
-  WorkGangBarrierSync(int n_workers, const char* name);
+  WorkGangBarrierSync(uint n_workers, const char* name);
 
   // Set the number of workers that will use the barrier.
   // Must be called before any of the workers start running.
-  void set_n_workers(int n_workers);
+  void set_n_workers(uint n_workers);
 
   // Enter the barrier. A worker that enters the barrier will
   // not be allowed to leave until all other threads have
@@ -402,18 +402,18 @@
 // subtasks will be identified by integer indices, usually elements of an
 // enumeration type.
 
-class SubTasksDone: public CHeapObj {
-  jint* _tasks;
-  int _n_tasks;
+class SubTasksDone : public CHeapObj {
+  uint* _tasks;
+  uint _n_tasks;
   // _n_threads is used to determine when a sub task is done.
   // It does not control how many threads will execute the subtask
   // but must be initialized to the number that do execute the task
   // in order to correctly decide when the subtask is done (all the
   // threads working on the task have finished).
-  int _n_threads;
-  jint _threads_completed;
+  uint _n_threads;
+  uint _threads_completed;
 #ifdef ASSERT
-  volatile jint _claimed;
+  volatile uint _claimed;
 #endif
 
   // Set all tasks to unclaimed.
@@ -423,19 +423,19 @@
   // Initializes "this" to a state in which there are "n" tasks to be
   // processed, none of the which are originally claimed.  The number of
   // threads doing the tasks is initialized 1.
-  SubTasksDone(int n);
+  SubTasksDone(uint n);
 
   // True iff the object is in a valid state.
   bool valid();
 
   // Get/set the number of parallel threads doing the tasks to "t".  Can only
   // be called before tasks start or after they are complete.
-  int n_threads() { return _n_threads; }
-  void set_n_threads(int t);
+  uint n_threads() { return _n_threads; }
+  void set_n_threads(uint t);
 
   // Returns "false" if the task "t" is unclaimed, and ensures that task is
   // claimed.  The task "t" is required to be within the range of "this".
-  bool is_task_claimed(int t);
+  bool is_task_claimed(uint t);
 
   // The calling thread asserts that it has attempted to claim all the
   // tasks that it will try to claim.  Every thread in the parallel task
@@ -456,12 +456,12 @@
 
 class SequentialSubTasksDone : public StackObj {
 protected:
-  jint _n_tasks;     // Total number of tasks available.
-  jint _n_claimed;   // Number of tasks claimed.
+  uint _n_tasks;     // Total number of tasks available.
+  uint _n_claimed;   // Number of tasks claimed.
   // _n_threads is used to determine when a sub task is done.
   // See comments on SubTasksDone::_n_threads
-  jint _n_threads;   // Total number of parallel threads.
-  jint _n_completed; // Number of completed threads.
+  uint _n_threads;   // Total number of parallel threads.
+  uint _n_completed; // Number of completed threads.
 
   void clear();
 
@@ -475,26 +475,26 @@
   bool valid();
 
   // number of tasks
-  jint n_tasks() const { return _n_tasks; }
+  uint n_tasks() const { return _n_tasks; }
 
   // Get/set the number of parallel threads doing the tasks to t.
   // Should be called before the task starts but it is safe
   // to call this once a task is running provided that all
   // threads agree on the number of threads.
-  int n_threads() { return _n_threads; }
-  void set_n_threads(int t) { _n_threads = t; }
+  uint n_threads() { return _n_threads; }
+  void set_n_threads(uint t) { _n_threads = t; }
 
   // Set the number of tasks to be claimed to t. As above,
   // should be called before the tasks start but it is safe
   // to call this once a task is running provided all threads
   // agree on the number of tasks.
-  void set_n_tasks(int t) { _n_tasks = t; }
+  void set_n_tasks(uint t) { _n_tasks = t; }
 
   // Returns false if the next task in the sequence is unclaimed,
   // and ensures that it is claimed. Will set t to be the index
   // of the claimed task in the sequence. Will return true if
   // the task cannot be claimed and there are none left to claim.
-  bool is_task_claimed(int& t);
+  bool is_task_claimed(uint& t);
 
   // The calling thread asserts that it has attempted to claim
   // all the tasks it possibly can in the sequence. Every thread
--- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp	Wed Jul 05 18:00:12 2017 +0200
@@ -33,11 +33,11 @@
 class WorkData;
 
 YieldingFlexibleWorkGang::YieldingFlexibleWorkGang(
-  const char* name, int workers, bool are_GC_task_threads) :
+  const char* name, uint workers, bool are_GC_task_threads) :
   FlexibleWorkGang(name, workers, are_GC_task_threads, false),
     _yielded_workers(0) {}
 
-GangWorker* YieldingFlexibleWorkGang::allocate_worker(int which) {
+GangWorker* YieldingFlexibleWorkGang::allocate_worker(uint which) {
   YieldingFlexibleGangWorker* new_member =
       new YieldingFlexibleGangWorker(this, which);
   return (YieldingFlexibleGangWorker*) new_member;
@@ -120,7 +120,7 @@
   new_task->set_gang(this);  // Establish 2-way binding to support yielding
   _sequence_number++;
 
-  int requested_size = new_task->requested_size();
+  uint requested_size = new_task->requested_size();
   assert(requested_size >= 0, "Should be non-negative");
   if (requested_size != 0) {
     _active_workers = MIN2(requested_size, total_workers());
--- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp	Wed Jul 05 17:59:32 2017 +0200
+++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp	Wed Jul 05 18:00:12 2017 +0200
@@ -71,7 +71,7 @@
 
   // The abstract work method.
   // The argument tells you which member of the gang you are.
-  virtual void work(int i) = 0;
+  virtual void work(uint worker_id) = 0;
 
   int requested_size() const { return _requested_size; }
   int actual_size()    const { return _actual_size; }
@@ -128,7 +128,7 @@
 public:
   // The abstract work method.
   // The argument tells you which member of the gang you are.
-  virtual void work(int i) = 0;
+  virtual void work(uint worker_id) = 0;
 
   // Subclasses should call the parent's yield() method
   // after having done any work specific to the subclass.
@@ -159,7 +159,7 @@
   // Here's the public interface to this class.
 public:
   // Constructor and destructor.
-  YieldingFlexibleWorkGang(const char* name, int workers,
+  YieldingFlexibleWorkGang(const char* name, uint workers,
                            bool are_GC_task_threads);
 
   YieldingFlexibleGangTask* yielding_task() const {
@@ -168,7 +168,7 @@
     return (YieldingFlexibleGangTask*)task();
   }
   // Allocate a worker and return a pointer to it.
-  GangWorker* allocate_worker(int which);
+  GangWorker* allocate_worker(uint which);
 
   // Run a task; returns when the task is done, or the workers yield,
   // or the task is aborted, or the work gang is terminated via stop().
@@ -199,12 +199,12 @@
   void abort();
 
 private:
-  int _yielded_workers;
+  uint _yielded_workers;
   void wait_for_gang();
 
 public:
   // Accessors for fields
-  int yielded_workers() const {
+  uint yielded_workers() const {
     return _yielded_workers;
   }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7116216/LargeFrame.java	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,1329 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+public class LargeFrame {
+
+    public static void method_with_many_locals(Object r1, int r2, int r3, int r4, int r5, int r6, int r7, Object r8) {
+        int i00 = 0, i01 = 0, i02 = 0, i03 = 0, i04 = 0, i05 = 0, i06 = 0, i07 = 0, i08 = 0, i09 = 0;
+        int i10 = 0, i11 = 0, i12 = 0, i13 = 0, i14 = 0, i15 = 0, i16 = 0, i17 = 0, i18 = 0, i19 = 0;
+        int i20 = 0, i21 = 0, i22 = 0, i23 = 0, i24 = 0, i25 = 0, i26 = 0, i27 = 0, i28 = 0, i29 = 0;
+        int i30 = 0, i31 = 0, i32 = 0, i33 = 0, i34 = 0, i35 = 0, i36 = 0, i37 = 0, i38 = 0, i39 = 0;
+        int i40 = 0, i41 = 0, i42 = 0, i43 = 0, i44 = 0, i45 = 0, i46 = 0, i47 = 0, i48 = 0, i49 = 0;
+        int i50 = 0, i51 = 0, i52 = 0, i53 = 0, i54 = 0, i55 = 0, i56 = 0, i57 = 0, i58 = 0, i59 = 0;
+        int i60 = 0, i61 = 0, i62 = 0, i63 = 0, i64 = 0, i65 = 0, i66 = 0, i67 = 0, i68 = 0, i69 = 0;
+        int i70 = 0, i71 = 0, i72 = 0, i73 = 0, i74 = 0, i75 = 0, i76 = 0, i77 = 0, i78 = 0, i79 = 0;
+        int i80 = 0, i81 = 0, i82 = 0, i83 = 0, i84 = 0, i85 = 0, i86 = 0, i87 = 0, i88 = 0, i89 = 0;
+        int i90 = 0, i91 = 0, i92 = 0, i93 = 0, i94 = 0, i95 = 0, i96 = 0, i97 = 0, i98 = 0, i99 = 0;
+        int i100 = 0, i101 = 0, i102 = 0, i103 = 0, i104 = 0, i105 = 0, i106 = 0, i107 = 0, i108 = 0, i109 = 0;
+        int i110 = 0, i111 = 0, i112 = 0, i113 = 0, i114 = 0, i115 = 0, i116 = 0, i117 = 0, i118 = 0, i119 = 0;
+        int i120 = 0, i121 = 0, i122 = 0, i123 = 0, i124 = 0, i125 = 0, i126 = 0, i127 = 0, i128 = 0, i129 = 0;
+        int i130 = 0, i131 = 0, i132 = 0, i133 = 0, i134 = 0, i135 = 0, i136 = 0, i137 = 0, i138 = 0, i139 = 0;
+        int i140 = 0, i141 = 0, i142 = 0, i143 = 0, i144 = 0, i145 = 0, i146 = 0, i147 = 0, i148 = 0, i149 = 0;
+        int i150 = 0, i151 = 0, i152 = 0, i153 = 0, i154 = 0, i155 = 0, i156 = 0, i157 = 0, i158 = 0, i159 = 0;
+        int i160 = 0, i161 = 0, i162 = 0, i163 = 0, i164 = 0, i165 = 0, i166 = 0, i167 = 0, i168 = 0, i169 = 0;
+        int i170 = 0, i171 = 0, i172 = 0, i173 = 0, i174 = 0, i175 = 0, i176 = 0, i177 = 0, i178 = 0, i179 = 0;
+        int i180 = 0, i181 = 0, i182 = 0, i183 = 0, i184 = 0, i185 = 0, i186 = 0, i187 = 0, i188 = 0, i189 = 0;
+        int i190 = 0, i191 = 0, i192 = 0, i193 = 0, i194 = 0, i195 = 0, i196 = 0, i197 = 0, i198 = 0, i199 = 0;
+        int i200 = 0, i201 = 0, i202 = 0, i203 = 0, i204 = 0, i205 = 0, i206 = 0, i207 = 0, i208 = 0, i209 = 0;
+        int i210 = 0, i211 = 0, i212 = 0, i213 = 0, i214 = 0, i215 = 0, i216 = 0, i217 = 0, i218 = 0, i219 = 0;
+        int i220 = 0, i221 = 0, i222 = 0, i223 = 0, i224 = 0, i225 = 0, i226 = 0, i227 = 0, i228 = 0, i229 = 0;
+        int i230 = 0, i231 = 0, i232 = 0, i233 = 0, i234 = 0, i235 = 0, i236 = 0, i237 = 0, i238 = 0, i239 = 0;
+        int i240 = 0, i241 = 0, i242 = 0, i243 = 0, i244 = 0, i245 = 0, i246 = 0, i247 = 0, i248 = 0, i249 = 0;
+        int i250 = 0, i251 = 0, i252 = 0, i253 = 0, i254 = 0, i255 = 0, i256 = 0, i257 = 0, i258 = 0, i259 = 0;
+        int i260 = 0, i261 = 0, i262 = 0, i263 = 0, i264 = 0, i265 = 0, i266 = 0, i267 = 0, i268 = 0, i269 = 0;
+        int i270 = 0, i271 = 0, i272 = 0, i273 = 0, i274 = 0, i275 = 0, i276 = 0, i277 = 0, i278 = 0, i279 = 0;
+        int i280 = 0, i281 = 0, i282 = 0, i283 = 0, i284 = 0, i285 = 0, i286 = 0, i287 = 0, i288 = 0, i289 = 0;
+        int i290 = 0, i291 = 0, i292 = 0, i293 = 0, i294 = 0, i295 = 0, i296 = 0, i297 = 0, i298 = 0, i299 = 0;
+        int i300 = 0, i301 = 0, i302 = 0, i303 = 0, i304 = 0, i305 = 0, i306 = 0, i307 = 0, i308 = 0, i309 = 0;
+        int i310 = 0, i311 = 0, i312 = 0, i313 = 0, i314 = 0, i315 = 0, i316 = 0, i317 = 0, i318 = 0, i319 = 0;
+        int i320 = 0, i321 = 0, i322 = 0, i323 = 0, i324 = 0, i325 = 0, i326 = 0, i327 = 0, i328 = 0, i329 = 0;
+        int i330 = 0, i331 = 0, i332 = 0, i333 = 0, i334 = 0, i335 = 0, i336 = 0, i337 = 0, i338 = 0, i339 = 0;
+        int i340 = 0, i341 = 0, i342 = 0, i343 = 0, i344 = 0, i345 = 0, i346 = 0, i347 = 0, i348 = 0, i349 = 0;
+        int i350 = 0, i351 = 0, i352 = 0, i353 = 0, i354 = 0, i355 = 0, i356 = 0, i357 = 0, i358 = 0, i359 = 0;
+        int i360 = 0, i361 = 0, i362 = 0, i363 = 0, i364 = 0, i365 = 0, i366 = 0, i367 = 0, i368 = 0, i369 = 0;
+        int i370 = 0, i371 = 0, i372 = 0, i373 = 0, i374 = 0, i375 = 0, i376 = 0, i377 = 0, i378 = 0, i379 = 0;
+        int i380 = 0, i381 = 0, i382 = 0, i383 = 0, i384 = 0, i385 = 0, i386 = 0, i387 = 0, i388 = 0, i389 = 0;
+        int i390 = 0, i391 = 0, i392 = 0, i393 = 0, i394 = 0, i395 = 0, i396 = 0, i397 = 0, i398 = 0, i399 = 0;
+        int i400 = 0, i401 = 0, i402 = 0, i403 = 0, i404 = 0, i405 = 0, i406 = 0, i407 = 0, i408 = 0, i409 = 0;
+        int i410 = 0, i411 = 0, i412 = 0, i413 = 0, i414 = 0, i415 = 0, i416 = 0, i417 = 0, i418 = 0, i419 = 0;
+        int i420 = 0, i421 = 0, i422 = 0, i423 = 0, i424 = 0, i425 = 0, i426 = 0, i427 = 0, i428 = 0, i429 = 0;
+        int i430 = 0, i431 = 0, i432 = 0, i433 = 0, i434 = 0, i435 = 0, i436 = 0, i437 = 0, i438 = 0, i439 = 0;
+        int i440 = 0, i441 = 0, i442 = 0, i443 = 0, i444 = 0, i445 = 0, i446 = 0, i447 = 0, i448 = 0, i449 = 0;
+        int i450 = 0, i451 = 0, i452 = 0, i453 = 0, i454 = 0, i455 = 0, i456 = 0, i457 = 0, i458 = 0, i459 = 0;
+        int i460 = 0, i461 = 0, i462 = 0, i463 = 0, i464 = 0, i465 = 0, i466 = 0, i467 = 0, i468 = 0, i469 = 0;
+        int i470 = 0, i471 = 0, i472 = 0, i473 = 0, i474 = 0, i475 = 0, i476 = 0, i477 = 0, i478 = 0, i479 = 0;
+        int i480 = 0, i481 = 0, i482 = 0, i483 = 0, i484 = 0, i485 = 0, i486 = 0, i487 = 0, i488 = 0, i489 = 0;
+        int i490 = 0, i491 = 0, i492 = 0, i493 = 0, i494 = 0, i495 = 0, i496 = 0, i497 = 0, i498 = 0, i499 = 0;
+        int i500 = 0, i501 = 0, i502 = 0, i503 = 0, i504 = 0, i505 = 0, i506 = 0, i507 = 0, i508 = 0, i509 = 0;
+        int i510 = 0, i511 = 0, i512 = 0, i513 = 0, i514 = 0, i515 = 0, i516 = 0, i517 = 0, i518 = 0, i519 = 0;
+        int i520 = 0, i521 = 0, i522 = 0, i523 = 0, i524 = 0, i525 = 0, i526 = 0, i527 = 0, i528 = 0, i529 = 0;
+        int i530 = 0, i531 = 0, i532 = 0, i533 = 0, i534 = 0, i535 = 0, i536 = 0, i537 = 0, i538 = 0, i539 = 0;
+        int i540 = 0, i541 = 0, i542 = 0, i543 = 0, i544 = 0, i545 = 0, i546 = 0, i547 = 0, i548 = 0, i549 = 0;
+        int i550 = 0, i551 = 0, i552 = 0, i553 = 0, i554 = 0, i555 = 0, i556 = 0, i557 = 0, i558 = 0, i559 = 0;
+        int i560 = 0, i561 = 0, i562 = 0, i563 = 0, i564 = 0, i565 = 0, i566 = 0, i567 = 0, i568 = 0, i569 = 0;
+        int i570 = 0, i571 = 0, i572 = 0, i573 = 0, i574 = 0, i575 = 0, i576 = 0, i577 = 0, i578 = 0, i579 = 0;
+        int i580 = 0, i581 = 0, i582 = 0, i583 = 0, i584 = 0, i585 = 0, i586 = 0, i587 = 0, i588 = 0, i589 = 0;
+        int i590 = 0, i591 = 0, i592 = 0, i593 = 0, i594 = 0, i595 = 0, i596 = 0, i597 = 0, i598 = 0, i599 = 0;
+        int i600 = 0, i601 = 0, i602 = 0, i603 = 0, i604 = 0, i605 = 0, i606 = 0, i607 = 0, i608 = 0, i609 = 0;
+        int i610 = 0, i611 = 0, i612 = 0, i613 = 0, i614 = 0, i615 = 0, i616 = 0, i617 = 0, i618 = 0, i619 = 0;
+        int i620 = 0, i621 = 0, i622 = 0, i623 = 0, i624 = 0, i625 = 0, i626 = 0, i627 = 0, i628 = 0, i629 = 0;
+        int i630 = 0, i631 = 0, i632 = 0, i633 = 0, i634 = 0, i635 = 0, i636 = 0, i637 = 0, i638 = 0, i639 = 0;
+        int i640 = 0, i641 = 0, i642 = 0, i643 = 0, i644 = 0, i645 = 0, i646 = 0, i647 = 0, i648 = 0, i649 = 0;
+        int i650 = 0, i651 = 0, i652 = 0, i653 = 0, i654 = 0, i655 = 0, i656 = 0, i657 = 0, i658 = 0, i659 = 0;
+        int i660 = 0, i661 = 0, i662 = 0, i663 = 0, i664 = 0, i665 = 0, i666 = 0, i667 = 0, i668 = 0, i669 = 0;
+        int i670 = 0, i671 = 0, i672 = 0, i673 = 0, i674 = 0, i675 = 0, i676 = 0, i677 = 0, i678 = 0, i679 = 0;
+        int i680 = 0, i681 = 0, i682 = 0, i683 = 0, i684 = 0, i685 = 0, i686 = 0, i687 = 0, i688 = 0, i689 = 0;
+        int i690 = 0, i691 = 0, i692 = 0, i693 = 0, i694 = 0, i695 = 0, i696 = 0, i697 = 0, i698 = 0, i699 = 0;
+        int i700 = 0, i701 = 0, i702 = 0, i703 = 0, i704 = 0, i705 = 0, i706 = 0, i707 = 0, i708 = 0, i709 = 0;
+        int i710 = 0, i711 = 0, i712 = 0, i713 = 0, i714 = 0, i715 = 0, i716 = 0, i717 = 0, i718 = 0, i719 = 0;
+        int i720 = 0, i721 = 0, i722 = 0, i723 = 0, i724 = 0, i725 = 0, i726 = 0, i727 = 0, i728 = 0, i729 = 0;
+        int i730 = 0, i731 = 0, i732 = 0, i733 = 0, i734 = 0, i735 = 0, i736 = 0, i737 = 0, i738 = 0, i739 = 0;
+        int i740 = 0, i741 = 0, i742 = 0, i743 = 0, i744 = 0, i745 = 0, i746 = 0, i747 = 0, i748 = 0, i749 = 0;
+        int i750 = 0, i751 = 0, i752 = 0, i753 = 0, i754 = 0, i755 = 0, i756 = 0, i757 = 0, i758 = 0, i759 = 0;
+        int i760 = 0, i761 = 0, i762 = 0, i763 = 0, i764 = 0, i765 = 0, i766 = 0, i767 = 0, i768 = 0, i769 = 0;
+        int i770 = 0, i771 = 0, i772 = 0, i773 = 0, i774 = 0, i775 = 0, i776 = 0, i777 = 0, i778 = 0, i779 = 0;
+        int i780 = 0, i781 = 0, i782 = 0, i783 = 0, i784 = 0, i785 = 0, i786 = 0, i787 = 0, i788 = 0, i789 = 0;
+        int i790 = 0, i791 = 0, i792 = 0, i793 = 0, i794 = 0, i795 = 0, i796 = 0, i797 = 0, i798 = 0, i799 = 0;
+        int i800 = 0, i801 = 0, i802 = 0, i803 = 0, i804 = 0, i805 = 0, i806 = 0, i807 = 0, i808 = 0, i809 = 0;
+        int i810 = 0, i811 = 0, i812 = 0, i813 = 0, i814 = 0, i815 = 0, i816 = 0, i817 = 0, i818 = 0, i819 = 0;
+        int i820 = 0, i821 = 0, i822 = 0, i823 = 0, i824 = 0, i825 = 0, i826 = 0, i827 = 0, i828 = 0, i829 = 0;
+        int i830 = 0, i831 = 0, i832 = 0, i833 = 0, i834 = 0, i835 = 0, i836 = 0, i837 = 0, i838 = 0, i839 = 0;
+        int i840 = 0, i841 = 0, i842 = 0, i843 = 0, i844 = 0, i845 = 0, i846 = 0, i847 = 0, i848 = 0, i849 = 0;
+        int i850 = 0, i851 = 0, i852 = 0, i853 = 0, i854 = 0, i855 = 0, i856 = 0, i857 = 0, i858 = 0, i859 = 0;
+        int i860 = 0, i861 = 0, i862 = 0, i863 = 0, i864 = 0, i865 = 0, i866 = 0, i867 = 0, i868 = 0, i869 = 0;
+        int i870 = 0, i871 = 0, i872 = 0, i873 = 0, i874 = 0, i875 = 0, i876 = 0, i877 = 0, i878 = 0, i879 = 0;
+        int i880 = 0, i881 = 0, i882 = 0, i883 = 0, i884 = 0, i885 = 0, i886 = 0, i887 = 0, i888 = 0, i889 = 0;
+        int i890 = 0, i891 = 0, i892 = 0, i893 = 0, i894 = 0, i895 = 0, i896 = 0, i897 = 0, i898 = 0, i899 = 0;
+        int i900 = 0, i901 = 0, i902 = 0, i903 = 0, i904 = 0, i905 = 0, i906 = 0, i907 = 0, i908 = 0, i909 = 0;
+        int i910 = 0, i911 = 0, i912 = 0, i913 = 0, i914 = 0, i915 = 0, i916 = 0, i917 = 0, i918 = 0, i919 = 0;
+        int i920 = 0, i921 = 0, i922 = 0, i923 = 0, i924 = 0, i925 = 0, i926 = 0, i927 = 0, i928 = 0, i929 = 0;
+        int i930 = 0, i931 = 0, i932 = 0, i933 = 0, i934 = 0, i935 = 0, i936 = 0, i937 = 0, i938 = 0, i939 = 0;
+        int i940 = 0, i941 = 0, i942 = 0, i943 = 0, i944 = 0, i945 = 0, i946 = 0, i947 = 0, i948 = 0, i949 = 0;
+        int i950 = 0, i951 = 0, i952 = 0, i953 = 0, i954 = 0, i955 = 0, i956 = 0, i957 = 0, i958 = 0, i959 = 0;
+        int i960 = 0, i961 = 0, i962 = 0, i963 = 0, i964 = 0, i965 = 0, i966 = 0, i967 = 0, i968 = 0, i969 = 0;
+        int i970 = 0, i971 = 0, i972 = 0, i973 = 0, i974 = 0, i975 = 0, i976 = 0, i977 = 0, i978 = 0, i979 = 0;
+        int i980 = 0, i981 = 0, i982 = 0, i983 = 0, i984 = 0, i985 = 0, i986 = 0, i987 = 0, i988 = 0, i989 = 0;
+        int i990 = 0, i991 = 0, i992 = 0, i993 = 0, i994 = 0, i995 = 0, i996 = 0, i997 = 0, i998 = 0, i999 = 0;
+        int i1000 = 0, i1001 = 0, i1002 = 0, i1003 = 0, i1004 = 0, i1005 = 0, i1006 = 0, i1007 = 0, i1008 = 0, i1009 = 0;
+        int i1010 = 0, i1011 = 0, i1012 = 0, i1013 = 0, i1014 = 0, i1015 = 0, i1016 = 0, i1017 = 0, i1018 = 0, i1019 = 0;
+        int i1020 = 0, i1021 = 0, i1022 = 0, i1023 = 0, i1024 = 0, i1025 = 0, i1026 = 0, i1027 = 0, i1028 = 0, i1029 = 0;
+        int i1030 = 0, i1031 = 0, i1032 = 0, i1033 = 0, i1034 = 0, i1035 = 0, i1036 = 0, i1037 = 0, i1038 = 0, i1039 = 0;
+        int i1040 = 0, i1041 = 0, i1042 = 0, i1043 = 0, i1044 = 0, i1045 = 0, i1046 = 0, i1047 = 0, i1048 = 0, i1049 = 0;
+        int i1050 = 0, i1051 = 0, i1052 = 0, i1053 = 0, i1054 = 0, i1055 = 0, i1056 = 0, i1057 = 0, i1058 = 0, i1059 = 0;
+        int i1060 = 0, i1061 = 0, i1062 = 0, i1063 = 0, i1064 = 0, i1065 = 0, i1066 = 0, i1067 = 0, i1068 = 0, i1069 = 0;
+        int i1070 = 0, i1071 = 0, i1072 = 0, i1073 = 0, i1074 = 0, i1075 = 0, i1076 = 0, i1077 = 0, i1078 = 0, i1079 = 0;
+        int i1080 = 0, i1081 = 0, i1082 = 0, i1083 = 0, i1084 = 0, i1085 = 0, i1086 = 0, i1087 = 0, i1088 = 0, i1089 = 0;
+        int i1090 = 0, i1091 = 0, i1092 = 0, i1093 = 0, i1094 = 0, i1095 = 0, i1096 = 0, i1097 = 0, i1098 = 0, i1099 = 0;
+        int i1100 = 0, i1101 = 0, i1102 = 0, i1103 = 0, i1104 = 0, i1105 = 0, i1106 = 0, i1107 = 0, i1108 = 0, i1109 = 0;
+        int i1110 = 0, i1111 = 0, i1112 = 0, i1113 = 0, i1114 = 0, i1115 = 0, i1116 = 0, i1117 = 0, i1118 = 0, i1119 = 0;
+        int i1120 = 0, i1121 = 0, i1122 = 0, i1123 = 0, i1124 = 0, i1125 = 0, i1126 = 0, i1127 = 0, i1128 = 0, i1129 = 0;
+        int i1130 = 0, i1131 = 0, i1132 = 0, i1133 = 0, i1134 = 0, i1135 = 0, i1136 = 0, i1137 = 0, i1138 = 0, i1139 = 0;
+        int i1140 = 0, i1141 = 0, i1142 = 0, i1143 = 0, i1144 = 0, i1145 = 0, i1146 = 0, i1147 = 0, i1148 = 0, i1149 = 0;
+        int i1150 = 0, i1151 = 0, i1152 = 0, i1153 = 0, i1154 = 0, i1155 = 0, i1156 = 0, i1157 = 0, i1158 = 0, i1159 = 0;
+        int i1160 = 0, i1161 = 0, i1162 = 0, i1163 = 0, i1164 = 0, i1165 = 0, i1166 = 0, i1167 = 0, i1168 = 0, i1169 = 0;
+        int i1170 = 0, i1171 = 0, i1172 = 0, i1173 = 0, i1174 = 0, i1175 = 0, i1176 = 0, i1177 = 0, i1178 = 0, i1179 = 0;
+        int i1180 = 0, i1181 = 0, i1182 = 0, i1183 = 0, i1184 = 0, i1185 = 0, i1186 = 0, i1187 = 0, i1188 = 0, i1189 = 0;
+        int i1190 = 0, i1191 = 0, i1192 = 0, i1193 = 0, i1194 = 0, i1195 = 0, i1196 = 0, i1197 = 0, i1198 = 0, i1199 = 0;
+        int i1200 = 0, i1201 = 0, i1202 = 0, i1203 = 0, i1204 = 0, i1205 = 0, i1206 = 0, i1207 = 0, i1208 = 0, i1209 = 0;
+        int i1210 = 0, i1211 = 0, i1212 = 0, i1213 = 0, i1214 = 0, i1215 = 0, i1216 = 0, i1217 = 0, i1218 = 0, i1219 = 0;
+        int i1220 = 0, i1221 = 0, i1222 = 0, i1223 = 0, i1224 = 0, i1225 = 0, i1226 = 0, i1227 = 0, i1228 = 0, i1229 = 0;
+        int i1230 = 0, i1231 = 0, i1232 = 0, i1233 = 0, i1234 = 0, i1235 = 0, i1236 = 0, i1237 = 0, i1238 = 0, i1239 = 0;
+        int i1240 = 0, i1241 = 0, i1242 = 0, i1243 = 0, i1244 = 0, i1245 = 0, i1246 = 0, i1247 = 0, i1248 = 0, i1249 = 0;
+        int i1250 = 0, i1251 = 0, i1252 = 0, i1253 = 0, i1254 = 0, i1255 = 0, i1256 = 0, i1257 = 0, i1258 = 0, i1259 = 0;
+        int i1260 = 0, i1261 = 0, i1262 = 0, i1263 = 0, i1264 = 0, i1265 = 0, i1266 = 0, i1267 = 0, i1268 = 0, i1269 = 0;
+        int i1270 = 0, i1271 = 0, i1272 = 0, i1273 = 0, i1274 = 0, i1275 = 0, i1276 = 0, i1277 = 0, i1278 = 0, i1279 = 0;
+        int i1280 = 0, i1281 = 0, i1282 = 0, i1283 = 0, i1284 = 0, i1285 = 0, i1286 = 0, i1287 = 0, i1288 = 0, i1289 = 0;
+        int i1290 = 0, i1291 = 0, i1292 = 0, i1293 = 0, i1294 = 0, i1295 = 0, i1296 = 0, i1297 = 0, i1298 = 0, i1299 = 0;
+        int i1300 = 0, i1301 = 0, i1302 = 0, i1303 = 0, i1304 = 0, i1305 = 0, i1306 = 0, i1307 = 0, i1308 = 0, i1309 = 0;
+        int i1310 = 0, i1311 = 0, i1312 = 0, i1313 = 0, i1314 = 0, i1315 = 0, i1316 = 0, i1317 = 0, i1318 = 0, i1319 = 0;
+        int i1320 = 0, i1321 = 0, i1322 = 0, i1323 = 0, i1324 = 0, i1325 = 0, i1326 = 0, i1327 = 0, i1328 = 0, i1329 = 0;
+        int i1330 = 0, i1331 = 0, i1332 = 0, i1333 = 0, i1334 = 0, i1335 = 0, i1336 = 0, i1337 = 0, i1338 = 0, i1339 = 0;
+        int i1340 = 0, i1341 = 0, i1342 = 0, i1343 = 0, i1344 = 0, i1345 = 0, i1346 = 0, i1347 = 0, i1348 = 0, i1349 = 0;
+        int i1350 = 0, i1351 = 0, i1352 = 0, i1353 = 0, i1354 = 0, i1355 = 0, i1356 = 0, i1357 = 0, i1358 = 0, i1359 = 0;
+        int i1360 = 0, i1361 = 0, i1362 = 0, i1363 = 0, i1364 = 0, i1365 = 0, i1366 = 0, i1367 = 0, i1368 = 0, i1369 = 0;
+        int i1370 = 0, i1371 = 0, i1372 = 0, i1373 = 0, i1374 = 0, i1375 = 0, i1376 = 0, i1377 = 0, i1378 = 0, i1379 = 0;
+        int i1380 = 0, i1381 = 0, i1382 = 0, i1383 = 0, i1384 = 0, i1385 = 0, i1386 = 0, i1387 = 0, i1388 = 0, i1389 = 0;
+        int i1390 = 0, i1391 = 0, i1392 = 0, i1393 = 0, i1394 = 0, i1395 = 0, i1396 = 0, i1397 = 0, i1398 = 0, i1399 = 0;
+        int i1400 = 0, i1401 = 0, i1402 = 0, i1403 = 0, i1404 = 0, i1405 = 0, i1406 = 0, i1407 = 0, i1408 = 0, i1409 = 0;
+        int i1410 = 0, i1411 = 0, i1412 = 0, i1413 = 0, i1414 = 0, i1415 = 0, i1416 = 0, i1417 = 0, i1418 = 0, i1419 = 0;
+        int i1420 = 0, i1421 = 0, i1422 = 0, i1423 = 0, i1424 = 0, i1425 = 0, i1426 = 0, i1427 = 0, i1428 = 0, i1429 = 0;
+        int i1430 = 0, i1431 = 0, i1432 = 0, i1433 = 0, i1434 = 0, i1435 = 0, i1436 = 0, i1437 = 0, i1438 = 0, i1439 = 0;
+        int i1440 = 0, i1441 = 0, i1442 = 0, i1443 = 0, i1444 = 0, i1445 = 0, i1446 = 0, i1447 = 0, i1448 = 0, i1449 = 0;
+        int i1450 = 0, i1451 = 0, i1452 = 0, i1453 = 0, i1454 = 0, i1455 = 0, i1456 = 0, i1457 = 0, i1458 = 0, i1459 = 0;
+        int i1460 = 0, i1461 = 0, i1462 = 0, i1463 = 0, i1464 = 0, i1465 = 0, i1466 = 0, i1467 = 0, i1468 = 0, i1469 = 0;
+        int i1470 = 0, i1471 = 0, i1472 = 0, i1473 = 0, i1474 = 0, i1475 = 0, i1476 = 0, i1477 = 0, i1478 = 0, i1479 = 0;
+        int i1480 = 0, i1481 = 0, i1482 = 0, i1483 = 0, i1484 = 0, i1485 = 0, i1486 = 0, i1487 = 0, i1488 = 0, i1489 = 0;
+        int i1490 = 0, i1491 = 0, i1492 = 0, i1493 = 0, i1494 = 0, i1495 = 0, i1496 = 0, i1497 = 0, i1498 = 0, i1499 = 0;
+        int i1500 = 0, i1501 = 0, i1502 = 0, i1503 = 0, i1504 = 0, i1505 = 0, i1506 = 0, i1507 = 0, i1508 = 0, i1509 = 0;
+        int i1510 = 0, i1511 = 0, i1512 = 0, i1513 = 0, i1514 = 0, i1515 = 0, i1516 = 0, i1517 = 0, i1518 = 0, i1519 = 0;
+        int i1520 = 0, i1521 = 0, i1522 = 0, i1523 = 0, i1524 = 0, i1525 = 0, i1526 = 0, i1527 = 0, i1528 = 0, i1529 = 0;
+        int i1530 = 0, i1531 = 0, i1532 = 0, i1533 = 0, i1534 = 0, i1535 = 0, i1536 = 0, i1537 = 0, i1538 = 0, i1539 = 0;
+        int i1540 = 0, i1541 = 0, i1542 = 0, i1543 = 0, i1544 = 0, i1545 = 0, i1546 = 0, i1547 = 0, i1548 = 0, i1549 = 0;
+        int i1550 = 0, i1551 = 0, i1552 = 0, i1553 = 0, i1554 = 0, i1555 = 0, i1556 = 0, i1557 = 0, i1558 = 0, i1559 = 0;
+        int i1560 = 0, i1561 = 0, i1562 = 0, i1563 = 0, i1564 = 0, i1565 = 0, i1566 = 0, i1567 = 0, i1568 = 0, i1569 = 0;
+        int i1570 = 0, i1571 = 0, i1572 = 0, i1573 = 0, i1574 = 0, i1575 = 0, i1576 = 0, i1577 = 0, i1578 = 0, i1579 = 0;
+        int i1580 = 0, i1581 = 0, i1582 = 0, i1583 = 0, i1584 = 0, i1585 = 0, i1586 = 0, i1587 = 0, i1588 = 0, i1589 = 0;
+        int i1590 = 0, i1591 = 0, i1592 = 0, i1593 = 0, i1594 = 0, i1595 = 0, i1596 = 0, i1597 = 0, i1598 = 0, i1599 = 0;
+        int i1600 = 0, i1601 = 0, i1602 = 0, i1603 = 0, i1604 = 0, i1605 = 0, i1606 = 0, i1607 = 0, i1608 = 0, i1609 = 0;
+        int i1610 = 0, i1611 = 0, i1612 = 0, i1613 = 0, i1614 = 0, i1615 = 0, i1616 = 0, i1617 = 0, i1618 = 0, i1619 = 0;
+        int i1620 = 0, i1621 = 0, i1622 = 0, i1623 = 0, i1624 = 0, i1625 = 0, i1626 = 0, i1627 = 0, i1628 = 0, i1629 = 0;
+        int i1630 = 0, i1631 = 0, i1632 = 0, i1633 = 0, i1634 = 0, i1635 = 0, i1636 = 0, i1637 = 0, i1638 = 0, i1639 = 0;
+        int i1640 = 0, i1641 = 0, i1642 = 0, i1643 = 0, i1644 = 0, i1645 = 0, i1646 = 0, i1647 = 0, i1648 = 0, i1649 = 0;
+        int i1650 = 0, i1651 = 0, i1652 = 0, i1653 = 0, i1654 = 0, i1655 = 0, i1656 = 0, i1657 = 0, i1658 = 0, i1659 = 0;
+        int i1660 = 0, i1661 = 0, i1662 = 0, i1663 = 0, i1664 = 0, i1665 = 0, i1666 = 0, i1667 = 0, i1668 = 0, i1669 = 0;
+        int i1670 = 0, i1671 = 0, i1672 = 0, i1673 = 0, i1674 = 0, i1675 = 0, i1676 = 0, i1677 = 0, i1678 = 0, i1679 = 0;
+        int i1680 = 0, i1681 = 0, i1682 = 0, i1683 = 0, i1684 = 0, i1685 = 0, i1686 = 0, i1687 = 0, i1688 = 0, i1689 = 0;
+        int i1690 = 0, i1691 = 0, i1692 = 0, i1693 = 0, i1694 = 0, i1695 = 0, i1696 = 0, i1697 = 0, i1698 = 0, i1699 = 0;
+        int i1700 = 0, i1701 = 0, i1702 = 0, i1703 = 0, i1704 = 0, i1705 = 0, i1706 = 0, i1707 = 0, i1708 = 0, i1709 = 0;
+        int i1710 = 0, i1711 = 0, i1712 = 0, i1713 = 0, i1714 = 0, i1715 = 0, i1716 = 0, i1717 = 0, i1718 = 0, i1719 = 0;
+        int i1720 = 0, i1721 = 0, i1722 = 0, i1723 = 0, i1724 = 0, i1725 = 0, i1726 = 0, i1727 = 0, i1728 = 0, i1729 = 0;
+        int i1730 = 0, i1731 = 0, i1732 = 0, i1733 = 0, i1734 = 0, i1735 = 0, i1736 = 0, i1737 = 0, i1738 = 0, i1739 = 0;
+        int i1740 = 0, i1741 = 0, i1742 = 0, i1743 = 0, i1744 = 0, i1745 = 0, i1746 = 0, i1747 = 0, i1748 = 0, i1749 = 0;
+        int i1750 = 0, i1751 = 0, i1752 = 0, i1753 = 0, i1754 = 0, i1755 = 0, i1756 = 0, i1757 = 0, i1758 = 0, i1759 = 0;
+        int i1760 = 0, i1761 = 0, i1762 = 0, i1763 = 0, i1764 = 0, i1765 = 0, i1766 = 0, i1767 = 0, i1768 = 0, i1769 = 0;
+        int i1770 = 0, i1771 = 0, i1772 = 0, i1773 = 0, i1774 = 0, i1775 = 0, i1776 = 0, i1777 = 0, i1778 = 0, i1779 = 0;
+        int i1780 = 0, i1781 = 0, i1782 = 0, i1783 = 0, i1784 = 0, i1785 = 0, i1786 = 0, i1787 = 0, i1788 = 0, i1789 = 0;
+        int i1790 = 0, i1791 = 0, i1792 = 0, i1793 = 0, i1794 = 0, i1795 = 0, i1796 = 0, i1797 = 0, i1798 = 0, i1799 = 0;
+        int i1800 = 0, i1801 = 0, i1802 = 0, i1803 = 0, i1804 = 0, i1805 = 0, i1806 = 0, i1807 = 0, i1808 = 0, i1809 = 0;
+        int i1810 = 0, i1811 = 0, i1812 = 0, i1813 = 0, i1814 = 0, i1815 = 0, i1816 = 0, i1817 = 0, i1818 = 0, i1819 = 0;
+        int i1820 = 0, i1821 = 0, i1822 = 0, i1823 = 0, i1824 = 0, i1825 = 0, i1826 = 0, i1827 = 0, i1828 = 0, i1829 = 0;
+        int i1830 = 0, i1831 = 0, i1832 = 0, i1833 = 0, i1834 = 0, i1835 = 0, i1836 = 0, i1837 = 0, i1838 = 0, i1839 = 0;
+        int i1840 = 0, i1841 = 0, i1842 = 0, i1843 = 0, i1844 = 0, i1845 = 0, i1846 = 0, i1847 = 0, i1848 = 0, i1849 = 0;
+        int i1850 = 0, i1851 = 0, i1852 = 0, i1853 = 0, i1854 = 0, i1855 = 0, i1856 = 0, i1857 = 0, i1858 = 0, i1859 = 0;
+        int i1860 = 0, i1861 = 0, i1862 = 0, i1863 = 0, i1864 = 0, i1865 = 0, i1866 = 0, i1867 = 0, i1868 = 0, i1869 = 0;
+        int i1870 = 0, i1871 = 0, i1872 = 0, i1873 = 0, i1874 = 0, i1875 = 0, i1876 = 0, i1877 = 0, i1878 = 0, i1879 = 0;
+        int i1880 = 0, i1881 = 0, i1882 = 0, i1883 = 0, i1884 = 0, i1885 = 0, i1886 = 0, i1887 = 0, i1888 = 0, i1889 = 0;
+        int i1890 = 0, i1891 = 0, i1892 = 0, i1893 = 0, i1894 = 0, i1895 = 0, i1896 = 0, i1897 = 0, i1898 = 0, i1899 = 0;
+        int i1900 = 0, i1901 = 0, i1902 = 0, i1903 = 0, i1904 = 0, i1905 = 0, i1906 = 0, i1907 = 0, i1908 = 0, i1909 = 0;
+        int i1910 = 0, i1911 = 0, i1912 = 0, i1913 = 0, i1914 = 0, i1915 = 0, i1916 = 0, i1917 = 0, i1918 = 0, i1919 = 0;
+        int i1920 = 0, i1921 = 0, i1922 = 0, i1923 = 0, i1924 = 0, i1925 = 0, i1926 = 0, i1927 = 0, i1928 = 0, i1929 = 0;
+        int i1930 = 0, i1931 = 0, i1932 = 0, i1933 = 0, i1934 = 0, i1935 = 0, i1936 = 0, i1937 = 0, i1938 = 0, i1939 = 0;
+        int i1940 = 0, i1941 = 0, i1942 = 0, i1943 = 0, i1944 = 0, i1945 = 0, i1946 = 0, i1947 = 0, i1948 = 0, i1949 = 0;
+        int i1950 = 0, i1951 = 0, i1952 = 0, i1953 = 0, i1954 = 0, i1955 = 0, i1956 = 0, i1957 = 0, i1958 = 0, i1959 = 0;
+        int i1960 = 0, i1961 = 0, i1962 = 0, i1963 = 0, i1964 = 0, i1965 = 0, i1966 = 0, i1967 = 0, i1968 = 0, i1969 = 0;
+        int i1970 = 0, i1971 = 0, i1972 = 0, i1973 = 0, i1974 = 0, i1975 = 0, i1976 = 0, i1977 = 0, i1978 = 0, i1979 = 0;
+        int i1980 = 0, i1981 = 0, i1982 = 0, i1983 = 0, i1984 = 0, i1985 = 0, i1986 = 0, i1987 = 0, i1988 = 0, i1989 = 0;
+        int i1990 = 0, i1991 = 0, i1992 = 0, i1993 = 0, i1994 = 0, i1995 = 0, i1996 = 0, i1997 = 0, i1998 = 0, i1999 = 0;
+        int i2000 = 0, i2001 = 0, i2002 = 0, i2003 = 0, i2004 = 0, i2005 = 0, i2006 = 0, i2007 = 0, i2008 = 0, i2009 = 0;
+        int i2010 = 0, i2011 = 0, i2012 = 0, i2013 = 0, i2014 = 0, i2015 = 0, i2016 = 0, i2017 = 0, i2018 = 0, i2019 = 0;
+        int i2020 = 0, i2021 = 0, i2022 = 0, i2023 = 0, i2024 = 0, i2025 = 0, i2026 = 0, i2027 = 0, i2028 = 0, i2029 = 0;
+        int i2030 = 0, i2031 = 0, i2032 = 0, i2033 = 0, i2034 = 0, i2035 = 0, i2036 = 0, i2037 = 0, i2038 = 0, i2039 = 0;
+        int i2040 = 0, i2041 = 0, i2042 = 0, i2043 = 0, i2044 = 0, i2045 = 0, i2046 = 0, i2047 = 0, i2048 = 0, i2049 = 0;
+        int i2050 = 0, i2051 = 0, i2052 = 0, i2053 = 0, i2054 = 0, i2055 = 0, i2056 = 0, i2057 = 0, i2058 = 0, i2059 = 0;
+        int i2060 = 0, i2061 = 0, i2062 = 0, i2063 = 0, i2064 = 0, i2065 = 0, i2066 = 0, i2067 = 0, i2068 = 0, i2069 = 0;
+        int i2070 = 0, i2071 = 0, i2072 = 0, i2073 = 0, i2074 = 0, i2075 = 0, i2076 = 0, i2077 = 0, i2078 = 0, i2079 = 0;
+        int i2080 = 0, i2081 = 0, i2082 = 0, i2083 = 0, i2084 = 0, i2085 = 0, i2086 = 0, i2087 = 0, i2088 = 0, i2089 = 0;
+        int i2090 = 0, i2091 = 0, i2092 = 0, i2093 = 0, i2094 = 0, i2095 = 0, i2096 = 0, i2097 = 0, i2098 = 0, i2099 = 0;
+        int i2100 = 0, i2101 = 0, i2102 = 0, i2103 = 0, i2104 = 0, i2105 = 0, i2106 = 0, i2107 = 0, i2108 = 0, i2109 = 0;
+        int i2110 = 0, i2111 = 0, i2112 = 0, i2113 = 0, i2114 = 0, i2115 = 0, i2116 = 0, i2117 = 0, i2118 = 0, i2119 = 0;
+        int i2120 = 0, i2121 = 0, i2122 = 0, i2123 = 0, i2124 = 0, i2125 = 0, i2126 = 0, i2127 = 0, i2128 = 0, i2129 = 0;
+        int i2130 = 0, i2131 = 0, i2132 = 0, i2133 = 0, i2134 = 0, i2135 = 0, i2136 = 0, i2137 = 0, i2138 = 0, i2139 = 0;
+        int i2140 = 0, i2141 = 0, i2142 = 0, i2143 = 0, i2144 = 0, i2145 = 0, i2146 = 0, i2147 = 0, i2148 = 0, i2149 = 0;
+        int i2150 = 0, i2151 = 0, i2152 = 0, i2153 = 0, i2154 = 0, i2155 = 0, i2156 = 0, i2157 = 0, i2158 = 0, i2159 = 0;
+        int i2160 = 0, i2161 = 0, i2162 = 0, i2163 = 0, i2164 = 0, i2165 = 0, i2166 = 0, i2167 = 0, i2168 = 0, i2169 = 0;
+        int i2170 = 0, i2171 = 0, i2172 = 0, i2173 = 0, i2174 = 0, i2175 = 0, i2176 = 0, i2177 = 0, i2178 = 0, i2179 = 0;
+        int i2180 = 0, i2181 = 0, i2182 = 0, i2183 = 0, i2184 = 0, i2185 = 0, i2186 = 0, i2187 = 0, i2188 = 0, i2189 = 0;
+        int i2190 = 0, i2191 = 0, i2192 = 0, i2193 = 0, i2194 = 0, i2195 = 0, i2196 = 0, i2197 = 0, i2198 = 0, i2199 = 0;
+        int i2200 = 0, i2201 = 0, i2202 = 0, i2203 = 0, i2204 = 0, i2205 = 0, i2206 = 0, i2207 = 0, i2208 = 0, i2209 = 0;
+        int i2210 = 0, i2211 = 0, i2212 = 0, i2213 = 0, i2214 = 0, i2215 = 0, i2216 = 0, i2217 = 0, i2218 = 0, i2219 = 0;
+        int i2220 = 0, i2221 = 0, i2222 = 0, i2223 = 0, i2224 = 0, i2225 = 0, i2226 = 0, i2227 = 0, i2228 = 0, i2229 = 0;
+        int i2230 = 0, i2231 = 0, i2232 = 0, i2233 = 0, i2234 = 0, i2235 = 0, i2236 = 0, i2237 = 0, i2238 = 0, i2239 = 0;
+        int i2240 = 0, i2241 = 0, i2242 = 0, i2243 = 0, i2244 = 0, i2245 = 0, i2246 = 0, i2247 = 0, i2248 = 0, i2249 = 0;
+        int i2250 = 0, i2251 = 0, i2252 = 0, i2253 = 0, i2254 = 0, i2255 = 0, i2256 = 0, i2257 = 0, i2258 = 0, i2259 = 0;
+        int i2260 = 0, i2261 = 0, i2262 = 0, i2263 = 0, i2264 = 0, i2265 = 0, i2266 = 0, i2267 = 0, i2268 = 0, i2269 = 0;
+        int i2270 = 0, i2271 = 0, i2272 = 0, i2273 = 0, i2274 = 0, i2275 = 0, i2276 = 0, i2277 = 0, i2278 = 0, i2279 = 0;
+        int i2280 = 0, i2281 = 0, i2282 = 0, i2283 = 0, i2284 = 0, i2285 = 0, i2286 = 0, i2287 = 0, i2288 = 0, i2289 = 0;
+        int i2290 = 0, i2291 = 0, i2292 = 0, i2293 = 0, i2294 = 0, i2295 = 0, i2296 = 0, i2297 = 0, i2298 = 0, i2299 = 0;
+        int i2300 = 0, i2301 = 0, i2302 = 0, i2303 = 0, i2304 = 0, i2305 = 0, i2306 = 0, i2307 = 0, i2308 = 0, i2309 = 0;
+        int i2310 = 0, i2311 = 0, i2312 = 0, i2313 = 0, i2314 = 0, i2315 = 0, i2316 = 0, i2317 = 0, i2318 = 0, i2319 = 0;
+        int i2320 = 0, i2321 = 0, i2322 = 0, i2323 = 0, i2324 = 0, i2325 = 0, i2326 = 0, i2327 = 0, i2328 = 0, i2329 = 0;
+        int i2330 = 0, i2331 = 0, i2332 = 0, i2333 = 0, i2334 = 0, i2335 = 0, i2336 = 0, i2337 = 0, i2338 = 0, i2339 = 0;
+        int i2340 = 0, i2341 = 0, i2342 = 0, i2343 = 0, i2344 = 0, i2345 = 0, i2346 = 0, i2347 = 0, i2348 = 0, i2349 = 0;
+        int i2350 = 0, i2351 = 0, i2352 = 0, i2353 = 0, i2354 = 0, i2355 = 0, i2356 = 0, i2357 = 0, i2358 = 0, i2359 = 0;
+        int i2360 = 0, i2361 = 0, i2362 = 0, i2363 = 0, i2364 = 0, i2365 = 0, i2366 = 0, i2367 = 0, i2368 = 0, i2369 = 0;
+        int i2370 = 0, i2371 = 0, i2372 = 0, i2373 = 0, i2374 = 0, i2375 = 0, i2376 = 0, i2377 = 0, i2378 = 0, i2379 = 0;
+        int i2380 = 0, i2381 = 0, i2382 = 0, i2383 = 0, i2384 = 0, i2385 = 0, i2386 = 0, i2387 = 0, i2388 = 0, i2389 = 0;
+        int i2390 = 0, i2391 = 0, i2392 = 0, i2393 = 0, i2394 = 0, i2395 = 0, i2396 = 0, i2397 = 0, i2398 = 0, i2399 = 0;
+        int i2400 = 0, i2401 = 0, i2402 = 0, i2403 = 0, i2404 = 0, i2405 = 0, i2406 = 0, i2407 = 0, i2408 = 0, i2409 = 0;
+        int i2410 = 0, i2411 = 0, i2412 = 0, i2413 = 0, i2414 = 0, i2415 = 0, i2416 = 0, i2417 = 0, i2418 = 0, i2419 = 0;
+        int i2420 = 0, i2421 = 0, i2422 = 0, i2423 = 0, i2424 = 0, i2425 = 0, i2426 = 0, i2427 = 0, i2428 = 0, i2429 = 0;
+        int i2430 = 0, i2431 = 0, i2432 = 0, i2433 = 0, i2434 = 0, i2435 = 0, i2436 = 0, i2437 = 0, i2438 = 0, i2439 = 0;
+        int i2440 = 0, i2441 = 0, i2442 = 0, i2443 = 0, i2444 = 0, i2445 = 0, i2446 = 0, i2447 = 0, i2448 = 0, i2449 = 0;
+        int i2450 = 0, i2451 = 0, i2452 = 0, i2453 = 0, i2454 = 0, i2455 = 0, i2456 = 0, i2457 = 0, i2458 = 0, i2459 = 0;
+        int i2460 = 0, i2461 = 0, i2462 = 0, i2463 = 0, i2464 = 0, i2465 = 0, i2466 = 0, i2467 = 0, i2468 = 0, i2469 = 0;
+        int i2470 = 0, i2471 = 0, i2472 = 0, i2473 = 0, i2474 = 0, i2475 = 0, i2476 = 0, i2477 = 0, i2478 = 0, i2479 = 0;
+        int i2480 = 0, i2481 = 0, i2482 = 0, i2483 = 0, i2484 = 0, i2485 = 0, i2486 = 0, i2487 = 0, i2488 = 0, i2489 = 0;
+        int i2490 = 0, i2491 = 0, i2492 = 0, i2493 = 0, i2494 = 0, i2495 = 0, i2496 = 0, i2497 = 0, i2498 = 0, i2499 = 0;
+        int i2500 = 0, i2501 = 0, i2502 = 0, i2503 = 0, i2504 = 0, i2505 = 0, i2506 = 0, i2507 = 0, i2508 = 0, i2509 = 0;
+        int i2510 = 0, i2511 = 0, i2512 = 0, i2513 = 0, i2514 = 0, i2515 = 0, i2516 = 0, i2517 = 0, i2518 = 0, i2519 = 0;
+        int i2520 = 0, i2521 = 0, i2522 = 0, i2523 = 0, i2524 = 0, i2525 = 0, i2526 = 0, i2527 = 0, i2528 = 0, i2529 = 0;
+        int i2530 = 0, i2531 = 0, i2532 = 0, i2533 = 0, i2534 = 0, i2535 = 0, i2536 = 0, i2537 = 0, i2538 = 0, i2539 = 0;
+        int i2540 = 0, i2541 = 0, i2542 = 0, i2543 = 0, i2544 = 0, i2545 = 0, i2546 = 0, i2547 = 0, i2548 = 0, i2549 = 0;
+        int i2550 = 0, i2551 = 0, i2552 = 0, i2553 = 0, i2554 = 0, i2555 = 0, i2556 = 0, i2557 = 0, i2558 = 0, i2559 = 0;
+        int i2560 = 0, i2561 = 0, i2562 = 0, i2563 = 0, i2564 = 0, i2565 = 0, i2566 = 0, i2567 = 0, i2568 = 0, i2569 = 0;
+        int i2570 = 0, i2571 = 0, i2572 = 0, i2573 = 0, i2574 = 0, i2575 = 0, i2576 = 0, i2577 = 0, i2578 = 0, i2579 = 0;
+        int i2580 = 0, i2581 = 0, i2582 = 0, i2583 = 0, i2584 = 0, i2585 = 0, i2586 = 0, i2587 = 0, i2588 = 0, i2589 = 0;
+        int i2590 = 0, i2591 = 0, i2592 = 0, i2593 = 0, i2594 = 0, i2595 = 0, i2596 = 0, i2597 = 0, i2598 = 0, i2599 = 0;
+        int i2600 = 0, i2601 = 0, i2602 = 0, i2603 = 0, i2604 = 0, i2605 = 0, i2606 = 0, i2607 = 0, i2608 = 0, i2609 = 0;
+        int i2610 = 0, i2611 = 0, i2612 = 0, i2613 = 0, i2614 = 0, i2615 = 0, i2616 = 0, i2617 = 0, i2618 = 0, i2619 = 0;
+        int i2620 = 0, i2621 = 0, i2622 = 0, i2623 = 0, i2624 = 0, i2625 = 0, i2626 = 0, i2627 = 0, i2628 = 0, i2629 = 0;
+        int i2630 = 0, i2631 = 0, i2632 = 0, i2633 = 0, i2634 = 0, i2635 = 0, i2636 = 0, i2637 = 0, i2638 = 0, i2639 = 0;
+        int i2640 = 0, i2641 = 0, i2642 = 0, i2643 = 0, i2644 = 0, i2645 = 0, i2646 = 0, i2647 = 0, i2648 = 0, i2649 = 0;
+        int i2650 = 0, i2651 = 0, i2652 = 0, i2653 = 0, i2654 = 0, i2655 = 0, i2656 = 0, i2657 = 0, i2658 = 0, i2659 = 0;
+        int i2660 = 0, i2661 = 0, i2662 = 0, i2663 = 0, i2664 = 0, i2665 = 0, i2666 = 0, i2667 = 0, i2668 = 0, i2669 = 0;
+        int i2670 = 0, i2671 = 0, i2672 = 0, i2673 = 0, i2674 = 0, i2675 = 0, i2676 = 0, i2677 = 0, i2678 = 0, i2679 = 0;
+        int i2680 = 0, i2681 = 0, i2682 = 0, i2683 = 0, i2684 = 0, i2685 = 0, i2686 = 0, i2687 = 0, i2688 = 0, i2689 = 0;
+        int i2690 = 0, i2691 = 0, i2692 = 0, i2693 = 0, i2694 = 0, i2695 = 0, i2696 = 0, i2697 = 0, i2698 = 0, i2699 = 0;
+        int i2700 = 0, i2701 = 0, i2702 = 0, i2703 = 0, i2704 = 0, i2705 = 0, i2706 = 0, i2707 = 0, i2708 = 0, i2709 = 0;
+        int i2710 = 0, i2711 = 0, i2712 = 0, i2713 = 0, i2714 = 0, i2715 = 0, i2716 = 0, i2717 = 0, i2718 = 0, i2719 = 0;
+        int i2720 = 0, i2721 = 0, i2722 = 0, i2723 = 0, i2724 = 0, i2725 = 0, i2726 = 0, i2727 = 0, i2728 = 0, i2729 = 0;
+        int i2730 = 0, i2731 = 0, i2732 = 0, i2733 = 0, i2734 = 0, i2735 = 0, i2736 = 0, i2737 = 0, i2738 = 0, i2739 = 0;
+        int i2740 = 0, i2741 = 0, i2742 = 0, i2743 = 0, i2744 = 0, i2745 = 0, i2746 = 0, i2747 = 0, i2748 = 0, i2749 = 0;
+        int i2750 = 0, i2751 = 0, i2752 = 0, i2753 = 0, i2754 = 0, i2755 = 0, i2756 = 0, i2757 = 0, i2758 = 0, i2759 = 0;
+        int i2760 = 0, i2761 = 0, i2762 = 0, i2763 = 0, i2764 = 0, i2765 = 0, i2766 = 0, i2767 = 0, i2768 = 0, i2769 = 0;
+        int i2770 = 0, i2771 = 0, i2772 = 0, i2773 = 0, i2774 = 0, i2775 = 0, i2776 = 0, i2777 = 0, i2778 = 0, i2779 = 0;
+        int i2780 = 0, i2781 = 0, i2782 = 0, i2783 = 0, i2784 = 0, i2785 = 0, i2786 = 0, i2787 = 0, i2788 = 0, i2789 = 0;
+        int i2790 = 0, i2791 = 0, i2792 = 0, i2793 = 0, i2794 = 0, i2795 = 0, i2796 = 0, i2797 = 0, i2798 = 0, i2799 = 0;
+        int i2800 = 0, i2801 = 0, i2802 = 0, i2803 = 0, i2804 = 0, i2805 = 0, i2806 = 0, i2807 = 0, i2808 = 0, i2809 = 0;
+        int i2810 = 0, i2811 = 0, i2812 = 0, i2813 = 0, i2814 = 0, i2815 = 0, i2816 = 0, i2817 = 0, i2818 = 0, i2819 = 0;
+        int i2820 = 0, i2821 = 0, i2822 = 0, i2823 = 0, i2824 = 0, i2825 = 0, i2826 = 0, i2827 = 0, i2828 = 0, i2829 = 0;
+        int i2830 = 0, i2831 = 0, i2832 = 0, i2833 = 0, i2834 = 0, i2835 = 0, i2836 = 0, i2837 = 0, i2838 = 0, i2839 = 0;
+        int i2840 = 0, i2841 = 0, i2842 = 0, i2843 = 0, i2844 = 0, i2845 = 0, i2846 = 0, i2847 = 0, i2848 = 0, i2849 = 0;
+        int i2850 = 0, i2851 = 0, i2852 = 0, i2853 = 0, i2854 = 0, i2855 = 0, i2856 = 0, i2857 = 0, i2858 = 0, i2859 = 0;
+        int i2860 = 0, i2861 = 0, i2862 = 0, i2863 = 0, i2864 = 0, i2865 = 0, i2866 = 0, i2867 = 0, i2868 = 0, i2869 = 0;
+        int i2870 = 0, i2871 = 0, i2872 = 0, i2873 = 0, i2874 = 0, i2875 = 0, i2876 = 0, i2877 = 0, i2878 = 0, i2879 = 0;
+        int i2880 = 0, i2881 = 0, i2882 = 0, i2883 = 0, i2884 = 0, i2885 = 0, i2886 = 0, i2887 = 0, i2888 = 0, i2889 = 0;
+        int i2890 = 0, i2891 = 0, i2892 = 0, i2893 = 0, i2894 = 0, i2895 = 0, i2896 = 0, i2897 = 0, i2898 = 0, i2899 = 0;
+        int i2900 = 0, i2901 = 0, i2902 = 0, i2903 = 0, i2904 = 0, i2905 = 0, i2906 = 0, i2907 = 0, i2908 = 0, i2909 = 0;
+        int i2910 = 0, i2911 = 0, i2912 = 0, i2913 = 0, i2914 = 0, i2915 = 0, i2916 = 0, i2917 = 0, i2918 = 0, i2919 = 0;
+        int i2920 = 0, i2921 = 0, i2922 = 0, i2923 = 0, i2924 = 0, i2925 = 0, i2926 = 0, i2927 = 0, i2928 = 0, i2929 = 0;
+        int i2930 = 0, i2931 = 0, i2932 = 0, i2933 = 0, i2934 = 0, i2935 = 0, i2936 = 0, i2937 = 0, i2938 = 0, i2939 = 0;
+        int i2940 = 0, i2941 = 0, i2942 = 0, i2943 = 0, i2944 = 0, i2945 = 0, i2946 = 0, i2947 = 0, i2948 = 0, i2949 = 0;
+        int i2950 = 0, i2951 = 0, i2952 = 0, i2953 = 0, i2954 = 0, i2955 = 0, i2956 = 0, i2957 = 0, i2958 = 0, i2959 = 0;
+        int i2960 = 0, i2961 = 0, i2962 = 0, i2963 = 0, i2964 = 0, i2965 = 0, i2966 = 0, i2967 = 0, i2968 = 0, i2969 = 0;
+        int i2970 = 0, i2971 = 0, i2972 = 0, i2973 = 0, i2974 = 0, i2975 = 0, i2976 = 0, i2977 = 0, i2978 = 0, i2979 = 0;
+        int i2980 = 0, i2981 = 0, i2982 = 0, i2983 = 0, i2984 = 0, i2985 = 0, i2986 = 0, i2987 = 0, i2988 = 0, i2989 = 0;
+        int i2990 = 0, i2991 = 0, i2992 = 0, i2993 = 0, i2994 = 0, i2995 = 0, i2996 = 0, i2997 = 0, i2998 = 0, i2999 = 0;
+        int i3000 = 0, i3001 = 0, i3002 = 0, i3003 = 0, i3004 = 0, i3005 = 0, i3006 = 0, i3007 = 0, i3008 = 0, i3009 = 0;
+        int i3010 = 0, i3011 = 0, i3012 = 0, i3013 = 0, i3014 = 0, i3015 = 0, i3016 = 0, i3017 = 0, i3018 = 0, i3019 = 0;
+        int i3020 = 0, i3021 = 0, i3022 = 0, i3023 = 0, i3024 = 0, i3025 = 0, i3026 = 0, i3027 = 0, i3028 = 0, i3029 = 0;
+        int i3030 = 0, i3031 = 0, i3032 = 0, i3033 = 0, i3034 = 0, i3035 = 0, i3036 = 0, i3037 = 0, i3038 = 0, i3039 = 0;
+        int i3040 = 0, i3041 = 0, i3042 = 0, i3043 = 0, i3044 = 0, i3045 = 0, i3046 = 0, i3047 = 0, i3048 = 0, i3049 = 0;
+        int i3050 = 0, i3051 = 0, i3052 = 0, i3053 = 0, i3054 = 0, i3055 = 0, i3056 = 0, i3057 = 0, i3058 = 0, i3059 = 0;
+        int i3060 = 0, i3061 = 0, i3062 = 0, i3063 = 0, i3064 = 0, i3065 = 0, i3066 = 0, i3067 = 0, i3068 = 0, i3069 = 0;
+        int i3070 = 0, i3071 = 0, i3072 = 0, i3073 = 0, i3074 = 0, i3075 = 0, i3076 = 0, i3077 = 0, i3078 = 0, i3079 = 0;
+        int i3080 = 0, i3081 = 0, i3082 = 0, i3083 = 0, i3084 = 0, i3085 = 0, i3086 = 0, i3087 = 0, i3088 = 0, i3089 = 0;
+        int i3090 = 0, i3091 = 0, i3092 = 0, i3093 = 0, i3094 = 0, i3095 = 0, i3096 = 0, i3097 = 0, i3098 = 0, i3099 = 0;
+        int i3100 = 0, i3101 = 0, i3102 = 0, i3103 = 0, i3104 = 0, i3105 = 0, i3106 = 0, i3107 = 0, i3108 = 0, i3109 = 0;
+        int i3110 = 0, i3111 = 0, i3112 = 0, i3113 = 0, i3114 = 0, i3115 = 0, i3116 = 0, i3117 = 0, i3118 = 0, i3119 = 0;
+        int i3120 = 0, i3121 = 0, i3122 = 0, i3123 = 0, i3124 = 0, i3125 = 0, i3126 = 0, i3127 = 0, i3128 = 0, i3129 = 0;
+        int i3130 = 0, i3131 = 0, i3132 = 0, i3133 = 0, i3134 = 0, i3135 = 0, i3136 = 0, i3137 = 0, i3138 = 0, i3139 = 0;
+        int i3140 = 0, i3141 = 0, i3142 = 0, i3143 = 0, i3144 = 0, i3145 = 0, i3146 = 0, i3147 = 0, i3148 = 0, i3149 = 0;
+        int i3150 = 0, i3151 = 0, i3152 = 0, i3153 = 0, i3154 = 0, i3155 = 0, i3156 = 0, i3157 = 0, i3158 = 0, i3159 = 0;
+        int i3160 = 0, i3161 = 0, i3162 = 0, i3163 = 0, i3164 = 0, i3165 = 0, i3166 = 0, i3167 = 0, i3168 = 0, i3169 = 0;
+        int i3170 = 0, i3171 = 0, i3172 = 0, i3173 = 0, i3174 = 0, i3175 = 0, i3176 = 0, i3177 = 0, i3178 = 0, i3179 = 0;
+        int i3180 = 0, i3181 = 0, i3182 = 0, i3183 = 0, i3184 = 0, i3185 = 0, i3186 = 0, i3187 = 0, i3188 = 0, i3189 = 0;
+        int i3190 = 0, i3191 = 0, i3192 = 0, i3193 = 0, i3194 = 0, i3195 = 0, i3196 = 0, i3197 = 0, i3198 = 0, i3199 = 0;
+        int i3200 = 0, i3201 = 0, i3202 = 0, i3203 = 0, i3204 = 0, i3205 = 0, i3206 = 0, i3207 = 0, i3208 = 0, i3209 = 0;
+        int i3210 = 0, i3211 = 0, i3212 = 0, i3213 = 0, i3214 = 0, i3215 = 0, i3216 = 0, i3217 = 0, i3218 = 0, i3219 = 0;
+        int i3220 = 0, i3221 = 0, i3222 = 0, i3223 = 0, i3224 = 0, i3225 = 0, i3226 = 0, i3227 = 0, i3228 = 0, i3229 = 0;
+        int i3230 = 0, i3231 = 0, i3232 = 0, i3233 = 0, i3234 = 0, i3235 = 0, i3236 = 0, i3237 = 0, i3238 = 0, i3239 = 0;
+        int i3240 = 0, i3241 = 0, i3242 = 0, i3243 = 0, i3244 = 0, i3245 = 0, i3246 = 0, i3247 = 0, i3248 = 0, i3249 = 0;
+        int i3250 = 0, i3251 = 0, i3252 = 0, i3253 = 0, i3254 = 0, i3255 = 0, i3256 = 0, i3257 = 0, i3258 = 0, i3259 = 0;
+        int i3260 = 0, i3261 = 0, i3262 = 0, i3263 = 0, i3264 = 0, i3265 = 0, i3266 = 0, i3267 = 0, i3268 = 0, i3269 = 0;
+        int i3270 = 0, i3271 = 0, i3272 = 0, i3273 = 0, i3274 = 0, i3275 = 0, i3276 = 0, i3277 = 0, i3278 = 0, i3279 = 0;
+        int i3280 = 0, i3281 = 0, i3282 = 0, i3283 = 0, i3284 = 0, i3285 = 0, i3286 = 0, i3287 = 0, i3288 = 0, i3289 = 0;
+        int i3290 = 0, i3291 = 0, i3292 = 0, i3293 = 0, i3294 = 0, i3295 = 0, i3296 = 0, i3297 = 0, i3298 = 0, i3299 = 0;
+        int i3300 = 0, i3301 = 0, i3302 = 0, i3303 = 0, i3304 = 0, i3305 = 0, i3306 = 0, i3307 = 0, i3308 = 0, i3309 = 0;
+        int i3310 = 0, i3311 = 0, i3312 = 0, i3313 = 0, i3314 = 0, i3315 = 0, i3316 = 0, i3317 = 0, i3318 = 0, i3319 = 0;
+        int i3320 = 0, i3321 = 0, i3322 = 0, i3323 = 0, i3324 = 0, i3325 = 0, i3326 = 0, i3327 = 0, i3328 = 0, i3329 = 0;
+        int i3330 = 0, i3331 = 0, i3332 = 0, i3333 = 0, i3334 = 0, i3335 = 0, i3336 = 0, i3337 = 0, i3338 = 0, i3339 = 0;
+        int i3340 = 0, i3341 = 0, i3342 = 0, i3343 = 0, i3344 = 0, i3345 = 0, i3346 = 0, i3347 = 0, i3348 = 0, i3349 = 0;
+        int i3350 = 0, i3351 = 0, i3352 = 0, i3353 = 0, i3354 = 0, i3355 = 0, i3356 = 0, i3357 = 0, i3358 = 0, i3359 = 0;
+        int i3360 = 0, i3361 = 0, i3362 = 0, i3363 = 0, i3364 = 0, i3365 = 0, i3366 = 0, i3367 = 0, i3368 = 0, i3369 = 0;
+        int i3370 = 0, i3371 = 0, i3372 = 0, i3373 = 0, i3374 = 0, i3375 = 0, i3376 = 0, i3377 = 0, i3378 = 0, i3379 = 0;
+        int i3380 = 0, i3381 = 0, i3382 = 0, i3383 = 0, i3384 = 0, i3385 = 0, i3386 = 0, i3387 = 0, i3388 = 0, i3389 = 0;
+        int i3390 = 0, i3391 = 0, i3392 = 0, i3393 = 0, i3394 = 0, i3395 = 0, i3396 = 0, i3397 = 0, i3398 = 0, i3399 = 0;
+        int i3400 = 0, i3401 = 0, i3402 = 0, i3403 = 0, i3404 = 0, i3405 = 0, i3406 = 0, i3407 = 0, i3408 = 0, i3409 = 0;
+        int i3410 = 0, i3411 = 0, i3412 = 0, i3413 = 0, i3414 = 0, i3415 = 0, i3416 = 0, i3417 = 0, i3418 = 0, i3419 = 0;
+        int i3420 = 0, i3421 = 0, i3422 = 0, i3423 = 0, i3424 = 0, i3425 = 0, i3426 = 0, i3427 = 0, i3428 = 0, i3429 = 0;
+        int i3430 = 0, i3431 = 0, i3432 = 0, i3433 = 0, i3434 = 0, i3435 = 0, i3436 = 0, i3437 = 0, i3438 = 0, i3439 = 0;
+        int i3440 = 0, i3441 = 0, i3442 = 0, i3443 = 0, i3444 = 0, i3445 = 0, i3446 = 0, i3447 = 0, i3448 = 0, i3449 = 0;
+        int i3450 = 0, i3451 = 0, i3452 = 0, i3453 = 0, i3454 = 0, i3455 = 0, i3456 = 0, i3457 = 0, i3458 = 0, i3459 = 0;
+        int i3460 = 0, i3461 = 0, i3462 = 0, i3463 = 0, i3464 = 0, i3465 = 0, i3466 = 0, i3467 = 0, i3468 = 0, i3469 = 0;
+        int i3470 = 0, i3471 = 0, i3472 = 0, i3473 = 0, i3474 = 0, i3475 = 0, i3476 = 0, i3477 = 0, i3478 = 0, i3479 = 0;
+        int i3480 = 0, i3481 = 0, i3482 = 0, i3483 = 0, i3484 = 0, i3485 = 0, i3486 = 0, i3487 = 0, i3488 = 0, i3489 = 0;
+        int i3490 = 0, i3491 = 0, i3492 = 0, i3493 = 0, i3494 = 0, i3495 = 0, i3496 = 0, i3497 = 0, i3498 = 0, i3499 = 0;
+        int i3500 = 0, i3501 = 0, i3502 = 0, i3503 = 0, i3504 = 0, i3505 = 0, i3506 = 0, i3507 = 0, i3508 = 0, i3509 = 0;
+        int i3510 = 0, i3511 = 0, i3512 = 0, i3513 = 0, i3514 = 0, i3515 = 0, i3516 = 0, i3517 = 0, i3518 = 0, i3519 = 0;
+        int i3520 = 0, i3521 = 0, i3522 = 0, i3523 = 0, i3524 = 0, i3525 = 0, i3526 = 0, i3527 = 0, i3528 = 0, i3529 = 0;
+        int i3530 = 0, i3531 = 0, i3532 = 0, i3533 = 0, i3534 = 0, i3535 = 0, i3536 = 0, i3537 = 0, i3538 = 0, i3539 = 0;
+        int i3540 = 0, i3541 = 0, i3542 = 0, i3543 = 0, i3544 = 0, i3545 = 0, i3546 = 0, i3547 = 0, i3548 = 0, i3549 = 0;
+        int i3550 = 0, i3551 = 0, i3552 = 0, i3553 = 0, i3554 = 0, i3555 = 0, i3556 = 0, i3557 = 0, i3558 = 0, i3559 = 0;
+        int i3560 = 0, i3561 = 0, i3562 = 0, i3563 = 0, i3564 = 0, i3565 = 0, i3566 = 0, i3567 = 0, i3568 = 0, i3569 = 0;
+        int i3570 = 0, i3571 = 0, i3572 = 0, i3573 = 0, i3574 = 0, i3575 = 0, i3576 = 0, i3577 = 0, i3578 = 0, i3579 = 0;
+        int i3580 = 0, i3581 = 0, i3582 = 0, i3583 = 0, i3584 = 0, i3585 = 0, i3586 = 0, i3587 = 0, i3588 = 0, i3589 = 0;
+        int i3590 = 0, i3591 = 0, i3592 = 0, i3593 = 0, i3594 = 0, i3595 = 0, i3596 = 0, i3597 = 0, i3598 = 0, i3599 = 0;
+        int i3600 = 0, i3601 = 0, i3602 = 0, i3603 = 0, i3604 = 0, i3605 = 0, i3606 = 0, i3607 = 0, i3608 = 0, i3609 = 0;
+        int i3610 = 0, i3611 = 0, i3612 = 0, i3613 = 0, i3614 = 0, i3615 = 0, i3616 = 0, i3617 = 0, i3618 = 0, i3619 = 0;
+        int i3620 = 0, i3621 = 0, i3622 = 0, i3623 = 0, i3624 = 0, i3625 = 0, i3626 = 0, i3627 = 0, i3628 = 0, i3629 = 0;
+        int i3630 = 0, i3631 = 0, i3632 = 0, i3633 = 0, i3634 = 0, i3635 = 0, i3636 = 0, i3637 = 0, i3638 = 0, i3639 = 0;
+        int i3640 = 0, i3641 = 0, i3642 = 0, i3643 = 0, i3644 = 0, i3645 = 0, i3646 = 0, i3647 = 0, i3648 = 0, i3649 = 0;
+        int i3650 = 0, i3651 = 0, i3652 = 0, i3653 = 0, i3654 = 0, i3655 = 0, i3656 = 0, i3657 = 0, i3658 = 0, i3659 = 0;
+        int i3660 = 0, i3661 = 0, i3662 = 0, i3663 = 0, i3664 = 0, i3665 = 0, i3666 = 0, i3667 = 0, i3668 = 0, i3669 = 0;
+        int i3670 = 0, i3671 = 0, i3672 = 0, i3673 = 0, i3674 = 0, i3675 = 0, i3676 = 0, i3677 = 0, i3678 = 0, i3679 = 0;
+        int i3680 = 0, i3681 = 0, i3682 = 0, i3683 = 0, i3684 = 0, i3685 = 0, i3686 = 0, i3687 = 0, i3688 = 0, i3689 = 0;
+        int i3690 = 0, i3691 = 0, i3692 = 0, i3693 = 0, i3694 = 0, i3695 = 0, i3696 = 0, i3697 = 0, i3698 = 0, i3699 = 0;
+        int i3700 = 0, i3701 = 0, i3702 = 0, i3703 = 0, i3704 = 0, i3705 = 0, i3706 = 0, i3707 = 0, i3708 = 0, i3709 = 0;
+        int i3710 = 0, i3711 = 0, i3712 = 0, i3713 = 0, i3714 = 0, i3715 = 0, i3716 = 0, i3717 = 0, i3718 = 0, i3719 = 0;
+        int i3720 = 0, i3721 = 0, i3722 = 0, i3723 = 0, i3724 = 0, i3725 = 0, i3726 = 0, i3727 = 0, i3728 = 0, i3729 = 0;
+        int i3730 = 0, i3731 = 0, i3732 = 0, i3733 = 0, i3734 = 0, i3735 = 0, i3736 = 0, i3737 = 0, i3738 = 0, i3739 = 0;
+        int i3740 = 0, i3741 = 0, i3742 = 0, i3743 = 0, i3744 = 0, i3745 = 0, i3746 = 0, i3747 = 0, i3748 = 0, i3749 = 0;
+        int i3750 = 0, i3751 = 0, i3752 = 0, i3753 = 0, i3754 = 0, i3755 = 0, i3756 = 0, i3757 = 0, i3758 = 0, i3759 = 0;
+        int i3760 = 0, i3761 = 0, i3762 = 0, i3763 = 0, i3764 = 0, i3765 = 0, i3766 = 0, i3767 = 0, i3768 = 0, i3769 = 0;
+        int i3770 = 0, i3771 = 0, i3772 = 0, i3773 = 0, i3774 = 0, i3775 = 0, i3776 = 0, i3777 = 0, i3778 = 0, i3779 = 0;
+        int i3780 = 0, i3781 = 0, i3782 = 0, i3783 = 0, i3784 = 0, i3785 = 0, i3786 = 0, i3787 = 0, i3788 = 0, i3789 = 0;
+        int i3790 = 0, i3791 = 0, i3792 = 0, i3793 = 0, i3794 = 0, i3795 = 0, i3796 = 0, i3797 = 0, i3798 = 0, i3799 = 0;
+        int i3800 = 0, i3801 = 0, i3802 = 0, i3803 = 0, i3804 = 0, i3805 = 0, i3806 = 0, i3807 = 0, i3808 = 0, i3809 = 0;
+        int i3810 = 0, i3811 = 0, i3812 = 0, i3813 = 0, i3814 = 0, i3815 = 0, i3816 = 0, i3817 = 0, i3818 = 0, i3819 = 0;
+        int i3820 = 0, i3821 = 0, i3822 = 0, i3823 = 0, i3824 = 0, i3825 = 0, i3826 = 0, i3827 = 0, i3828 = 0, i3829 = 0;
+        int i3830 = 0, i3831 = 0, i3832 = 0, i3833 = 0, i3834 = 0, i3835 = 0, i3836 = 0, i3837 = 0, i3838 = 0, i3839 = 0;
+        int i3840 = 0, i3841 = 0, i3842 = 0, i3843 = 0, i3844 = 0, i3845 = 0, i3846 = 0, i3847 = 0, i3848 = 0, i3849 = 0;
+        int i3850 = 0, i3851 = 0, i3852 = 0, i3853 = 0, i3854 = 0, i3855 = 0, i3856 = 0, i3857 = 0, i3858 = 0, i3859 = 0;
+        int i3860 = 0, i3861 = 0, i3862 = 0, i3863 = 0, i3864 = 0, i3865 = 0, i3866 = 0, i3867 = 0, i3868 = 0, i3869 = 0;
+        int i3870 = 0, i3871 = 0, i3872 = 0, i3873 = 0, i3874 = 0, i3875 = 0, i3876 = 0, i3877 = 0, i3878 = 0, i3879 = 0;
+        int i3880 = 0, i3881 = 0, i3882 = 0, i3883 = 0, i3884 = 0, i3885 = 0, i3886 = 0, i3887 = 0, i3888 = 0, i3889 = 0;
+        int i3890 = 0, i3891 = 0, i3892 = 0, i3893 = 0, i3894 = 0, i3895 = 0, i3896 = 0, i3897 = 0, i3898 = 0, i3899 = 0;
+        int i3900 = 0, i3901 = 0, i3902 = 0, i3903 = 0, i3904 = 0, i3905 = 0, i3906 = 0, i3907 = 0, i3908 = 0, i3909 = 0;
+        int i3910 = 0, i3911 = 0, i3912 = 0, i3913 = 0, i3914 = 0, i3915 = 0, i3916 = 0, i3917 = 0, i3918 = 0, i3919 = 0;
+        int i3920 = 0, i3921 = 0, i3922 = 0, i3923 = 0, i3924 = 0, i3925 = 0, i3926 = 0, i3927 = 0, i3928 = 0, i3929 = 0;
+        int i3930 = 0, i3931 = 0, i3932 = 0, i3933 = 0, i3934 = 0, i3935 = 0, i3936 = 0, i3937 = 0, i3938 = 0, i3939 = 0;
+        int i3940 = 0, i3941 = 0, i3942 = 0, i3943 = 0, i3944 = 0, i3945 = 0, i3946 = 0, i3947 = 0, i3948 = 0, i3949 = 0;
+        int i3950 = 0, i3951 = 0, i3952 = 0, i3953 = 0, i3954 = 0, i3955 = 0, i3956 = 0, i3957 = 0, i3958 = 0, i3959 = 0;
+        int i3960 = 0, i3961 = 0, i3962 = 0, i3963 = 0, i3964 = 0, i3965 = 0, i3966 = 0, i3967 = 0, i3968 = 0, i3969 = 0;
+        int i3970 = 0, i3971 = 0, i3972 = 0, i3973 = 0, i3974 = 0, i3975 = 0, i3976 = 0, i3977 = 0, i3978 = 0, i3979 = 0;
+        int i3980 = 0, i3981 = 0, i3982 = 0, i3983 = 0, i3984 = 0, i3985 = 0, i3986 = 0, i3987 = 0, i3988 = 0, i3989 = 0;
+        int i3990 = 0, i3991 = 0, i3992 = 0, i3993 = 0, i3994 = 0, i3995 = 0, i3996 = 0, i3997 = 0, i3998 = 0, i3999 = 0;
+        int i4000 = 0, i4001 = 0, i4002 = 0, i4003 = 0, i4004 = 0, i4005 = 0, i4006 = 0, i4007 = 0, i4008 = 0, i4009 = 0;
+        int i4010 = 0, i4011 = 0, i4012 = 0, i4013 = 0, i4014 = 0, i4015 = 0, i4016 = 0, i4017 = 0, i4018 = 0, i4019 = 0;
+        int i4020 = 0, i4021 = 0, i4022 = 0, i4023 = 0, i4024 = 0, i4025 = 0, i4026 = 0, i4027 = 0, i4028 = 0, i4029 = 0;
+        int i4030 = 0, i4031 = 0, i4032 = 0, i4033 = 0, i4034 = 0, i4035 = 0, i4036 = 0, i4037 = 0, i4038 = 0, i4039 = 0;
+        int i4040 = 0, i4041 = 0, i4042 = 0, i4043 = 0, i4044 = 0, i4045 = 0, i4046 = 0, i4047 = 0, i4048 = 0, i4049 = 0;
+        int i4050 = 0, i4051 = 0, i4052 = 0, i4053 = 0, i4054 = 0, i4055 = 0, i4056 = 0, i4057 = 0, i4058 = 0, i4059 = 0;
+        int i4060 = 0, i4061 = 0, i4062 = 0, i4063 = 0, i4064 = 0, i4065 = 0, i4066 = 0, i4067 = 0, i4068 = 0, i4069 = 0;
+        int i4070 = 0, i4071 = 0, i4072 = 0, i4073 = 0, i4074 = 0, i4075 = 0, i4076 = 0, i4077 = 0, i4078 = 0, i4079 = 0;
+        int i4080 = 0, i4081 = 0, i4082 = 0, i4083 = 0, i4084 = 0, i4085 = 0, i4086 = 0, i4087 = 0, i4088 = 0, i4089 = 0;
+        int i4090 = 0, i4091 = 0, i4092 = 0, i4093 = 0, i4094 = 0, i4095 = 0, i4096 = 0, i4097 = 0, i4098 = 0, i4099 = 0;
+        int i4100 = 0, i4101 = 0, i4102 = 0, i4103 = 0, i4104 = 0, i4105 = 0, i4106 = 0, i4107 = 0, i4108 = 0, i4109 = 0;
+        int i4110 = 0, i4111 = 0, i4112 = 0, i4113 = 0, i4114 = 0, i4115 = 0, i4116 = 0, i4117 = 0, i4118 = 0, i4119 = 0;
+        int i4120 = 0, i4121 = 0, i4122 = 0, i4123 = 0, i4124 = 0, i4125 = 0, i4126 = 0, i4127 = 0, i4128 = 0, i4129 = 0;
+        int i4130 = 0, i4131 = 0, i4132 = 0, i4133 = 0, i4134 = 0, i4135 = 0, i4136 = 0, i4137 = 0, i4138 = 0, i4139 = 0;
+        int i4140 = 0, i4141 = 0, i4142 = 0, i4143 = 0, i4144 = 0, i4145 = 0, i4146 = 0, i4147 = 0, i4148 = 0, i4149 = 0;
+        int i4150 = 0, i4151 = 0, i4152 = 0, i4153 = 0, i4154 = 0, i4155 = 0, i4156 = 0, i4157 = 0, i4158 = 0, i4159 = 0;
+        int i4160 = 0, i4161 = 0, i4162 = 0, i4163 = 0, i4164 = 0, i4165 = 0, i4166 = 0, i4167 = 0, i4168 = 0, i4169 = 0;
+        int i4170 = 0, i4171 = 0, i4172 = 0, i4173 = 0, i4174 = 0, i4175 = 0, i4176 = 0, i4177 = 0, i4178 = 0, i4179 = 0;
+        int i4180 = 0, i4181 = 0, i4182 = 0, i4183 = 0, i4184 = 0, i4185 = 0, i4186 = 0, i4187 = 0, i4188 = 0, i4189 = 0;
+        int i4190 = 0, i4191 = 0, i4192 = 0, i4193 = 0, i4194 = 0, i4195 = 0, i4196 = 0, i4197 = 0, i4198 = 0, i4199 = 0;
+        int i4200 = 0, i4201 = 0, i4202 = 0, i4203 = 0, i4204 = 0, i4205 = 0, i4206 = 0, i4207 = 0, i4208 = 0, i4209 = 0;
+        int i4210 = 0, i4211 = 0, i4212 = 0, i4213 = 0, i4214 = 0, i4215 = 0, i4216 = 0, i4217 = 0, i4218 = 0, i4219 = 0;
+        int i4220 = 0, i4221 = 0, i4222 = 0, i4223 = 0, i4224 = 0, i4225 = 0, i4226 = 0, i4227 = 0, i4228 = 0, i4229 = 0;
+        int i4230 = 0, i4231 = 0, i4232 = 0, i4233 = 0, i4234 = 0, i4235 = 0, i4236 = 0, i4237 = 0, i4238 = 0, i4239 = 0;
+        int i4240 = 0, i4241 = 0, i4242 = 0, i4243 = 0, i4244 = 0, i4245 = 0, i4246 = 0, i4247 = 0, i4248 = 0, i4249 = 0;
+        int i4250 = 0, i4251 = 0, i4252 = 0, i4253 = 0, i4254 = 0, i4255 = 0, i4256 = 0, i4257 = 0, i4258 = 0, i4259 = 0;
+        int i4260 = 0, i4261 = 0, i4262 = 0, i4263 = 0, i4264 = 0, i4265 = 0, i4266 = 0, i4267 = 0, i4268 = 0, i4269 = 0;
+        int i4270 = 0, i4271 = 0, i4272 = 0, i4273 = 0, i4274 = 0, i4275 = 0, i4276 = 0, i4277 = 0, i4278 = 0, i4279 = 0;
+        int i4280 = 0, i4281 = 0, i4282 = 0, i4283 = 0, i4284 = 0, i4285 = 0, i4286 = 0, i4287 = 0, i4288 = 0, i4289 = 0;
+        int i4290 = 0, i4291 = 0, i4292 = 0, i4293 = 0, i4294 = 0, i4295 = 0, i4296 = 0, i4297 = 0, i4298 = 0, i4299 = 0;
+        int i4300 = 0, i4301 = 0, i4302 = 0, i4303 = 0, i4304 = 0, i4305 = 0, i4306 = 0, i4307 = 0, i4308 = 0, i4309 = 0;
+        int i4310 = 0, i4311 = 0, i4312 = 0, i4313 = 0, i4314 = 0, i4315 = 0, i4316 = 0, i4317 = 0, i4318 = 0, i4319 = 0;
+        int i4320 = 0, i4321 = 0, i4322 = 0, i4323 = 0, i4324 = 0, i4325 = 0, i4326 = 0, i4327 = 0, i4328 = 0, i4329 = 0;
+        int i4330 = 0, i4331 = 0, i4332 = 0, i4333 = 0, i4334 = 0, i4335 = 0, i4336 = 0, i4337 = 0, i4338 = 0, i4339 = 0;
+        int i4340 = 0, i4341 = 0, i4342 = 0, i4343 = 0, i4344 = 0, i4345 = 0, i4346 = 0, i4347 = 0, i4348 = 0, i4349 = 0;
+        int i4350 = 0, i4351 = 0, i4352 = 0, i4353 = 0, i4354 = 0, i4355 = 0, i4356 = 0, i4357 = 0, i4358 = 0, i4359 = 0;
+        int i4360 = 0, i4361 = 0, i4362 = 0, i4363 = 0, i4364 = 0, i4365 = 0, i4366 = 0, i4367 = 0, i4368 = 0, i4369 = 0;
+        int i4370 = 0, i4371 = 0, i4372 = 0, i4373 = 0, i4374 = 0, i4375 = 0, i4376 = 0, i4377 = 0, i4378 = 0, i4379 = 0;
+        int i4380 = 0, i4381 = 0, i4382 = 0, i4383 = 0, i4384 = 0, i4385 = 0, i4386 = 0, i4387 = 0, i4388 = 0, i4389 = 0;
+        int i4390 = 0, i4391 = 0, i4392 = 0, i4393 = 0, i4394 = 0, i4395 = 0, i4396 = 0, i4397 = 0, i4398 = 0, i4399 = 0;
+        int i4400 = 0, i4401 = 0, i4402 = 0, i4403 = 0, i4404 = 0, i4405 = 0, i4406 = 0, i4407 = 0, i4408 = 0, i4409 = 0;
+        int i4410 = 0, i4411 = 0, i4412 = 0, i4413 = 0, i4414 = 0, i4415 = 0, i4416 = 0, i4417 = 0, i4418 = 0, i4419 = 0;
+        int i4420 = 0, i4421 = 0, i4422 = 0, i4423 = 0, i4424 = 0, i4425 = 0, i4426 = 0, i4427 = 0, i4428 = 0, i4429 = 0;
+        int i4430 = 0, i4431 = 0, i4432 = 0, i4433 = 0, i4434 = 0, i4435 = 0, i4436 = 0, i4437 = 0, i4438 = 0, i4439 = 0;
+        int i4440 = 0, i4441 = 0, i4442 = 0, i4443 = 0, i4444 = 0, i4445 = 0, i4446 = 0, i4447 = 0, i4448 = 0, i4449 = 0;
+        int i4450 = 0, i4451 = 0, i4452 = 0, i4453 = 0, i4454 = 0, i4455 = 0, i4456 = 0, i4457 = 0, i4458 = 0, i4459 = 0;
+        int i4460 = 0, i4461 = 0, i4462 = 0, i4463 = 0, i4464 = 0, i4465 = 0, i4466 = 0, i4467 = 0, i4468 = 0, i4469 = 0;
+        int i4470 = 0, i4471 = 0, i4472 = 0, i4473 = 0, i4474 = 0, i4475 = 0, i4476 = 0, i4477 = 0, i4478 = 0, i4479 = 0;
+        int i4480 = 0, i4481 = 0, i4482 = 0, i4483 = 0, i4484 = 0, i4485 = 0, i4486 = 0, i4487 = 0, i4488 = 0, i4489 = 0;
+        int i4490 = 0, i4491 = 0, i4492 = 0, i4493 = 0, i4494 = 0, i4495 = 0, i4496 = 0, i4497 = 0, i4498 = 0, i4499 = 0;
+        int i4500 = 0, i4501 = 0, i4502 = 0, i4503 = 0, i4504 = 0, i4505 = 0, i4506 = 0, i4507 = 0, i4508 = 0, i4509 = 0;
+        int i4510 = 0, i4511 = 0, i4512 = 0, i4513 = 0, i4514 = 0, i4515 = 0, i4516 = 0, i4517 = 0, i4518 = 0, i4519 = 0;
+        int i4520 = 0, i4521 = 0, i4522 = 0, i4523 = 0, i4524 = 0, i4525 = 0, i4526 = 0, i4527 = 0, i4528 = 0, i4529 = 0;
+        int i4530 = 0, i4531 = 0, i4532 = 0, i4533 = 0, i4534 = 0, i4535 = 0, i4536 = 0, i4537 = 0, i4538 = 0, i4539 = 0;
+        int i4540 = 0, i4541 = 0, i4542 = 0, i4543 = 0, i4544 = 0, i4545 = 0, i4546 = 0, i4547 = 0, i4548 = 0, i4549 = 0;
+        int i4550 = 0, i4551 = 0, i4552 = 0, i4553 = 0, i4554 = 0, i4555 = 0, i4556 = 0, i4557 = 0, i4558 = 0, i4559 = 0;
+        int i4560 = 0, i4561 = 0, i4562 = 0, i4563 = 0, i4564 = 0, i4565 = 0, i4566 = 0, i4567 = 0, i4568 = 0, i4569 = 0;
+        int i4570 = 0, i4571 = 0, i4572 = 0, i4573 = 0, i4574 = 0, i4575 = 0, i4576 = 0, i4577 = 0, i4578 = 0, i4579 = 0;
+        int i4580 = 0, i4581 = 0, i4582 = 0, i4583 = 0, i4584 = 0, i4585 = 0, i4586 = 0, i4587 = 0, i4588 = 0, i4589 = 0;
+        int i4590 = 0, i4591 = 0, i4592 = 0, i4593 = 0, i4594 = 0, i4595 = 0, i4596 = 0, i4597 = 0, i4598 = 0, i4599 = 0;
+        int i4600 = 0, i4601 = 0, i4602 = 0, i4603 = 0, i4604 = 0, i4605 = 0, i4606 = 0, i4607 = 0, i4608 = 0, i4609 = 0;
+        int i4610 = 0, i4611 = 0, i4612 = 0, i4613 = 0, i4614 = 0, i4615 = 0, i4616 = 0, i4617 = 0, i4618 = 0, i4619 = 0;
+        int i4620 = 0, i4621 = 0, i4622 = 0, i4623 = 0, i4624 = 0, i4625 = 0, i4626 = 0, i4627 = 0, i4628 = 0, i4629 = 0;
+        int i4630 = 0, i4631 = 0, i4632 = 0, i4633 = 0, i4634 = 0, i4635 = 0, i4636 = 0, i4637 = 0, i4638 = 0, i4639 = 0;
+        int i4640 = 0, i4641 = 0, i4642 = 0, i4643 = 0, i4644 = 0, i4645 = 0, i4646 = 0, i4647 = 0, i4648 = 0, i4649 = 0;
+        int i4650 = 0, i4651 = 0, i4652 = 0, i4653 = 0, i4654 = 0, i4655 = 0, i4656 = 0, i4657 = 0, i4658 = 0, i4659 = 0;
+        int i4660 = 0, i4661 = 0, i4662 = 0, i4663 = 0, i4664 = 0, i4665 = 0, i4666 = 0, i4667 = 0, i4668 = 0, i4669 = 0;
+        int i4670 = 0, i4671 = 0, i4672 = 0, i4673 = 0, i4674 = 0, i4675 = 0, i4676 = 0, i4677 = 0, i4678 = 0, i4679 = 0;
+        int i4680 = 0, i4681 = 0, i4682 = 0, i4683 = 0, i4684 = 0, i4685 = 0, i4686 = 0, i4687 = 0, i4688 = 0, i4689 = 0;
+        int i4690 = 0, i4691 = 0, i4692 = 0, i4693 = 0, i4694 = 0, i4695 = 0, i4696 = 0, i4697 = 0, i4698 = 0, i4699 = 0;
+        int i4700 = 0, i4701 = 0, i4702 = 0, i4703 = 0, i4704 = 0, i4705 = 0, i4706 = 0, i4707 = 0, i4708 = 0, i4709 = 0;
+        int i4710 = 0, i4711 = 0, i4712 = 0, i4713 = 0, i4714 = 0, i4715 = 0, i4716 = 0, i4717 = 0, i4718 = 0, i4719 = 0;
+        int i4720 = 0, i4721 = 0, i4722 = 0, i4723 = 0, i4724 = 0, i4725 = 0, i4726 = 0, i4727 = 0, i4728 = 0, i4729 = 0;
+        int i4730 = 0, i4731 = 0, i4732 = 0, i4733 = 0, i4734 = 0, i4735 = 0, i4736 = 0, i4737 = 0, i4738 = 0, i4739 = 0;
+        int i4740 = 0, i4741 = 0, i4742 = 0, i4743 = 0, i4744 = 0, i4745 = 0, i4746 = 0, i4747 = 0, i4748 = 0, i4749 = 0;
+        int i4750 = 0, i4751 = 0, i4752 = 0, i4753 = 0, i4754 = 0, i4755 = 0, i4756 = 0, i4757 = 0, i4758 = 0, i4759 = 0;
+        int i4760 = 0, i4761 = 0, i4762 = 0, i4763 = 0, i4764 = 0, i4765 = 0, i4766 = 0, i4767 = 0, i4768 = 0, i4769 = 0;
+        int i4770 = 0, i4771 = 0, i4772 = 0, i4773 = 0, i4774 = 0, i4775 = 0, i4776 = 0, i4777 = 0, i4778 = 0, i4779 = 0;
+        int i4780 = 0, i4781 = 0, i4782 = 0, i4783 = 0, i4784 = 0, i4785 = 0, i4786 = 0, i4787 = 0, i4788 = 0, i4789 = 0;
+        int i4790 = 0, i4791 = 0, i4792 = 0, i4793 = 0, i4794 = 0, i4795 = 0, i4796 = 0, i4797 = 0, i4798 = 0, i4799 = 0;
+        int i4800 = 0, i4801 = 0, i4802 = 0, i4803 = 0, i4804 = 0, i4805 = 0, i4806 = 0, i4807 = 0, i4808 = 0, i4809 = 0;
+        int i4810 = 0, i4811 = 0, i4812 = 0, i4813 = 0, i4814 = 0, i4815 = 0, i4816 = 0, i4817 = 0, i4818 = 0, i4819 = 0;
+        int i4820 = 0, i4821 = 0, i4822 = 0, i4823 = 0, i4824 = 0, i4825 = 0, i4826 = 0, i4827 = 0, i4828 = 0, i4829 = 0;
+        int i4830 = 0, i4831 = 0, i4832 = 0, i4833 = 0, i4834 = 0, i4835 = 0, i4836 = 0, i4837 = 0, i4838 = 0, i4839 = 0;
+        int i4840 = 0, i4841 = 0, i4842 = 0, i4843 = 0, i4844 = 0, i4845 = 0, i4846 = 0, i4847 = 0, i4848 = 0, i4849 = 0;
+        int i4850 = 0, i4851 = 0, i4852 = 0, i4853 = 0, i4854 = 0, i4855 = 0, i4856 = 0, i4857 = 0, i4858 = 0, i4859 = 0;
+        int i4860 = 0, i4861 = 0, i4862 = 0, i4863 = 0, i4864 = 0, i4865 = 0, i4866 = 0, i4867 = 0, i4868 = 0, i4869 = 0;
+        int i4870 = 0, i4871 = 0, i4872 = 0, i4873 = 0, i4874 = 0, i4875 = 0, i4876 = 0, i4877 = 0, i4878 = 0, i4879 = 0;
+        int i4880 = 0, i4881 = 0, i4882 = 0, i4883 = 0, i4884 = 0, i4885 = 0, i4886 = 0, i4887 = 0, i4888 = 0, i4889 = 0;
+        int i4890 = 0, i4891 = 0, i4892 = 0, i4893 = 0, i4894 = 0, i4895 = 0, i4896 = 0, i4897 = 0, i4898 = 0, i4899 = 0;
+        int i4900 = 0, i4901 = 0, i4902 = 0, i4903 = 0, i4904 = 0, i4905 = 0, i4906 = 0, i4907 = 0, i4908 = 0, i4909 = 0;
+        int i4910 = 0, i4911 = 0, i4912 = 0, i4913 = 0, i4914 = 0, i4915 = 0, i4916 = 0, i4917 = 0, i4918 = 0, i4919 = 0;
+        int i4920 = 0, i4921 = 0, i4922 = 0, i4923 = 0, i4924 = 0, i4925 = 0, i4926 = 0, i4927 = 0, i4928 = 0, i4929 = 0;
+        int i4930 = 0, i4931 = 0, i4932 = 0, i4933 = 0, i4934 = 0, i4935 = 0, i4936 = 0, i4937 = 0, i4938 = 0, i4939 = 0;
+        int i4940 = 0, i4941 = 0, i4942 = 0, i4943 = 0, i4944 = 0, i4945 = 0, i4946 = 0, i4947 = 0, i4948 = 0, i4949 = 0;
+        int i4950 = 0, i4951 = 0, i4952 = 0, i4953 = 0, i4954 = 0, i4955 = 0, i4956 = 0, i4957 = 0, i4958 = 0, i4959 = 0;
+        int i4960 = 0, i4961 = 0, i4962 = 0, i4963 = 0, i4964 = 0, i4965 = 0, i4966 = 0, i4967 = 0, i4968 = 0, i4969 = 0;
+        int i4970 = 0, i4971 = 0, i4972 = 0, i4973 = 0, i4974 = 0, i4975 = 0, i4976 = 0, i4977 = 0, i4978 = 0, i4979 = 0;
+        int i4980 = 0, i4981 = 0, i4982 = 0, i4983 = 0, i4984 = 0, i4985 = 0, i4986 = 0, i4987 = 0, i4988 = 0, i4989 = 0;
+        int i4990 = 0, i4991 = 0, i4992 = 0, i4993 = 0, i4994 = 0, i4995 = 0, i4996 = 0, i4997 = 0, i4998 = 0, i4999 = 0;
+        int i5000 = 0, i5001 = 0, i5002 = 0, i5003 = 0, i5004 = 0, i5005 = 0, i5006 = 0, i5007 = 0, i5008 = 0, i5009 = 0;
+        int i5010 = 0, i5011 = 0, i5012 = 0, i5013 = 0, i5014 = 0, i5015 = 0, i5016 = 0, i5017 = 0, i5018 = 0, i5019 = 0;
+        int i5020 = 0, i5021 = 0, i5022 = 0, i5023 = 0, i5024 = 0, i5025 = 0, i5026 = 0, i5027 = 0, i5028 = 0, i5029 = 0;
+        int i5030 = 0, i5031 = 0, i5032 = 0, i5033 = 0, i5034 = 0, i5035 = 0, i5036 = 0, i5037 = 0, i5038 = 0, i5039 = 0;
+        int i5040 = 0, i5041 = 0, i5042 = 0, i5043 = 0, i5044 = 0, i5045 = 0, i5046 = 0, i5047 = 0, i5048 = 0, i5049 = 0;
+        int i5050 = 0, i5051 = 0, i5052 = 0, i5053 = 0, i5054 = 0, i5055 = 0, i5056 = 0, i5057 = 0, i5058 = 0, i5059 = 0;
+        int i5060 = 0, i5061 = 0, i5062 = 0, i5063 = 0, i5064 = 0, i5065 = 0, i5066 = 0, i5067 = 0, i5068 = 0, i5069 = 0;
+        int i5070 = 0, i5071 = 0, i5072 = 0, i5073 = 0, i5074 = 0, i5075 = 0, i5076 = 0, i5077 = 0, i5078 = 0, i5079 = 0;
+        int i5080 = 0, i5081 = 0, i5082 = 0, i5083 = 0, i5084 = 0, i5085 = 0, i5086 = 0, i5087 = 0, i5088 = 0, i5089 = 0;
+        int i5090 = 0, i5091 = 0, i5092 = 0, i5093 = 0, i5094 = 0, i5095 = 0, i5096 = 0, i5097 = 0, i5098 = 0, i5099 = 0;
+        int i5100 = 0, i5101 = 0, i5102 = 0, i5103 = 0, i5104 = 0, i5105 = 0, i5106 = 0, i5107 = 0, i5108 = 0, i5109 = 0;
+        int i5110 = 0, i5111 = 0, i5112 = 0, i5113 = 0, i5114 = 0, i5115 = 0, i5116 = 0, i5117 = 0, i5118 = 0, i5119 = 0;
+        int i5120 = 0, i5121 = 0, i5122 = 0, i5123 = 0, i5124 = 0, i5125 = 0, i5126 = 0, i5127 = 0, i5128 = 0, i5129 = 0;
+        int i5130 = 0, i5131 = 0, i5132 = 0, i5133 = 0, i5134 = 0, i5135 = 0, i5136 = 0, i5137 = 0, i5138 = 0, i5139 = 0;
+        int i5140 = 0, i5141 = 0, i5142 = 0, i5143 = 0, i5144 = 0, i5145 = 0, i5146 = 0, i5147 = 0, i5148 = 0, i5149 = 0;
+        int i5150 = 0, i5151 = 0, i5152 = 0, i5153 = 0, i5154 = 0, i5155 = 0, i5156 = 0, i5157 = 0, i5158 = 0, i5159 = 0;
+        int i5160 = 0, i5161 = 0, i5162 = 0, i5163 = 0, i5164 = 0, i5165 = 0, i5166 = 0, i5167 = 0, i5168 = 0, i5169 = 0;
+        int i5170 = 0, i5171 = 0, i5172 = 0, i5173 = 0, i5174 = 0, i5175 = 0, i5176 = 0, i5177 = 0, i5178 = 0, i5179 = 0;
+        int i5180 = 0, i5181 = 0, i5182 = 0, i5183 = 0, i5184 = 0, i5185 = 0, i5186 = 0, i5187 = 0, i5188 = 0, i5189 = 0;
+        int i5190 = 0, i5191 = 0, i5192 = 0, i5193 = 0, i5194 = 0, i5195 = 0, i5196 = 0, i5197 = 0, i5198 = 0, i5199 = 0;
+        int i5200 = 0, i5201 = 0, i5202 = 0, i5203 = 0, i5204 = 0, i5205 = 0, i5206 = 0, i5207 = 0, i5208 = 0, i5209 = 0;
+        int i5210 = 0, i5211 = 0, i5212 = 0, i5213 = 0, i5214 = 0, i5215 = 0, i5216 = 0, i5217 = 0, i5218 = 0, i5219 = 0;
+        int i5220 = 0, i5221 = 0, i5222 = 0, i5223 = 0, i5224 = 0, i5225 = 0, i5226 = 0, i5227 = 0, i5228 = 0, i5229 = 0;
+        int i5230 = 0, i5231 = 0, i5232 = 0, i5233 = 0, i5234 = 0, i5235 = 0, i5236 = 0, i5237 = 0, i5238 = 0, i5239 = 0;
+        int i5240 = 0, i5241 = 0, i5242 = 0, i5243 = 0, i5244 = 0, i5245 = 0, i5246 = 0, i5247 = 0, i5248 = 0, i5249 = 0;
+        int i5250 = 0, i5251 = 0, i5252 = 0, i5253 = 0, i5254 = 0, i5255 = 0, i5256 = 0, i5257 = 0, i5258 = 0, i5259 = 0;
+        int i5260 = 0, i5261 = 0, i5262 = 0, i5263 = 0, i5264 = 0, i5265 = 0, i5266 = 0, i5267 = 0, i5268 = 0, i5269 = 0;
+        int i5270 = 0, i5271 = 0, i5272 = 0, i5273 = 0, i5274 = 0, i5275 = 0, i5276 = 0, i5277 = 0, i5278 = 0, i5279 = 0;
+        int i5280 = 0, i5281 = 0, i5282 = 0, i5283 = 0, i5284 = 0, i5285 = 0, i5286 = 0, i5287 = 0, i5288 = 0, i5289 = 0;
+        int i5290 = 0, i5291 = 0, i5292 = 0, i5293 = 0, i5294 = 0, i5295 = 0, i5296 = 0, i5297 = 0, i5298 = 0, i5299 = 0;
+        int i5300 = 0, i5301 = 0, i5302 = 0, i5303 = 0, i5304 = 0, i5305 = 0, i5306 = 0, i5307 = 0, i5308 = 0, i5309 = 0;
+        int i5310 = 0, i5311 = 0, i5312 = 0, i5313 = 0, i5314 = 0, i5315 = 0, i5316 = 0, i5317 = 0, i5318 = 0, i5319 = 0;
+        int i5320 = 0, i5321 = 0, i5322 = 0, i5323 = 0, i5324 = 0, i5325 = 0, i5326 = 0, i5327 = 0, i5328 = 0, i5329 = 0;
+        int i5330 = 0, i5331 = 0, i5332 = 0, i5333 = 0, i5334 = 0, i5335 = 0, i5336 = 0, i5337 = 0, i5338 = 0, i5339 = 0;
+        int i5340 = 0, i5341 = 0, i5342 = 0, i5343 = 0, i5344 = 0, i5345 = 0, i5346 = 0, i5347 = 0, i5348 = 0, i5349 = 0;
+        int i5350 = 0, i5351 = 0, i5352 = 0, i5353 = 0, i5354 = 0, i5355 = 0, i5356 = 0, i5357 = 0, i5358 = 0, i5359 = 0;
+        int i5360 = 0, i5361 = 0, i5362 = 0, i5363 = 0, i5364 = 0, i5365 = 0, i5366 = 0, i5367 = 0, i5368 = 0, i5369 = 0;
+        int i5370 = 0, i5371 = 0, i5372 = 0, i5373 = 0, i5374 = 0, i5375 = 0, i5376 = 0, i5377 = 0, i5378 = 0, i5379 = 0;
+        int i5380 = 0, i5381 = 0, i5382 = 0, i5383 = 0, i5384 = 0, i5385 = 0, i5386 = 0, i5387 = 0, i5388 = 0, i5389 = 0;
+        int i5390 = 0, i5391 = 0, i5392 = 0, i5393 = 0, i5394 = 0, i5395 = 0, i5396 = 0, i5397 = 0, i5398 = 0, i5399 = 0;
+        int i5400 = 0, i5401 = 0, i5402 = 0, i5403 = 0, i5404 = 0, i5405 = 0, i5406 = 0, i5407 = 0, i5408 = 0, i5409 = 0;
+        int i5410 = 0, i5411 = 0, i5412 = 0, i5413 = 0, i5414 = 0, i5415 = 0, i5416 = 0, i5417 = 0, i5418 = 0, i5419 = 0;
+        int i5420 = 0, i5421 = 0, i5422 = 0, i5423 = 0, i5424 = 0, i5425 = 0, i5426 = 0, i5427 = 0, i5428 = 0, i5429 = 0;
+        int i5430 = 0, i5431 = 0, i5432 = 0, i5433 = 0, i5434 = 0, i5435 = 0, i5436 = 0, i5437 = 0, i5438 = 0, i5439 = 0;
+        int i5440 = 0, i5441 = 0, i5442 = 0, i5443 = 0, i5444 = 0, i5445 = 0, i5446 = 0, i5447 = 0, i5448 = 0, i5449 = 0;
+        int i5450 = 0, i5451 = 0, i5452 = 0, i5453 = 0, i5454 = 0, i5455 = 0, i5456 = 0, i5457 = 0, i5458 = 0, i5459 = 0;
+        int i5460 = 0, i5461 = 0, i5462 = 0, i5463 = 0, i5464 = 0, i5465 = 0, i5466 = 0, i5467 = 0, i5468 = 0, i5469 = 0;
+        int i5470 = 0, i5471 = 0, i5472 = 0, i5473 = 0, i5474 = 0, i5475 = 0, i5476 = 0, i5477 = 0, i5478 = 0, i5479 = 0;
+        int i5480 = 0, i5481 = 0, i5482 = 0, i5483 = 0, i5484 = 0, i5485 = 0, i5486 = 0, i5487 = 0, i5488 = 0, i5489 = 0;
+        int i5490 = 0, i5491 = 0, i5492 = 0, i5493 = 0, i5494 = 0, i5495 = 0, i5496 = 0, i5497 = 0, i5498 = 0, i5499 = 0;
+        int i5500 = 0, i5501 = 0, i5502 = 0, i5503 = 0, i5504 = 0, i5505 = 0, i5506 = 0, i5507 = 0, i5508 = 0, i5509 = 0;
+        int i5510 = 0, i5511 = 0, i5512 = 0, i5513 = 0, i5514 = 0, i5515 = 0, i5516 = 0, i5517 = 0, i5518 = 0, i5519 = 0;
+        int i5520 = 0, i5521 = 0, i5522 = 0, i5523 = 0, i5524 = 0, i5525 = 0, i5526 = 0, i5527 = 0, i5528 = 0, i5529 = 0;
+        int i5530 = 0, i5531 = 0, i5532 = 0, i5533 = 0, i5534 = 0, i5535 = 0, i5536 = 0, i5537 = 0, i5538 = 0, i5539 = 0;
+        int i5540 = 0, i5541 = 0, i5542 = 0, i5543 = 0, i5544 = 0, i5545 = 0, i5546 = 0, i5547 = 0, i5548 = 0, i5549 = 0;
+        int i5550 = 0, i5551 = 0, i5552 = 0, i5553 = 0, i5554 = 0, i5555 = 0, i5556 = 0, i5557 = 0, i5558 = 0, i5559 = 0;
+        int i5560 = 0, i5561 = 0, i5562 = 0, i5563 = 0, i5564 = 0, i5565 = 0, i5566 = 0, i5567 = 0, i5568 = 0, i5569 = 0;
+        int i5570 = 0, i5571 = 0, i5572 = 0, i5573 = 0, i5574 = 0, i5575 = 0, i5576 = 0, i5577 = 0, i5578 = 0, i5579 = 0;
+        int i5580 = 0, i5581 = 0, i5582 = 0, i5583 = 0, i5584 = 0, i5585 = 0, i5586 = 0, i5587 = 0, i5588 = 0, i5589 = 0;
+        int i5590 = 0, i5591 = 0, i5592 = 0, i5593 = 0, i5594 = 0, i5595 = 0, i5596 = 0, i5597 = 0, i5598 = 0, i5599 = 0;
+        int i5600 = 0, i5601 = 0, i5602 = 0, i5603 = 0, i5604 = 0, i5605 = 0, i5606 = 0, i5607 = 0, i5608 = 0, i5609 = 0;
+        int i5610 = 0, i5611 = 0, i5612 = 0, i5613 = 0, i5614 = 0, i5615 = 0, i5616 = 0, i5617 = 0, i5618 = 0, i5619 = 0;
+        int i5620 = 0, i5621 = 0, i5622 = 0, i5623 = 0, i5624 = 0, i5625 = 0, i5626 = 0, i5627 = 0, i5628 = 0, i5629 = 0;
+        int i5630 = 0, i5631 = 0, i5632 = 0, i5633 = 0, i5634 = 0, i5635 = 0, i5636 = 0, i5637 = 0, i5638 = 0, i5639 = 0;
+        int i5640 = 0, i5641 = 0, i5642 = 0, i5643 = 0, i5644 = 0, i5645 = 0, i5646 = 0, i5647 = 0, i5648 = 0, i5649 = 0;
+        int i5650 = 0, i5651 = 0, i5652 = 0, i5653 = 0, i5654 = 0, i5655 = 0, i5656 = 0, i5657 = 0, i5658 = 0, i5659 = 0;
+        int i5660 = 0, i5661 = 0, i5662 = 0, i5663 = 0, i5664 = 0, i5665 = 0, i5666 = 0, i5667 = 0, i5668 = 0, i5669 = 0;
+        int i5670 = 0, i5671 = 0, i5672 = 0, i5673 = 0, i5674 = 0, i5675 = 0, i5676 = 0, i5677 = 0, i5678 = 0, i5679 = 0;
+        int i5680 = 0, i5681 = 0, i5682 = 0, i5683 = 0, i5684 = 0, i5685 = 0, i5686 = 0, i5687 = 0, i5688 = 0, i5689 = 0;
+        int i5690 = 0, i5691 = 0, i5692 = 0, i5693 = 0, i5694 = 0, i5695 = 0, i5696 = 0, i5697 = 0, i5698 = 0, i5699 = 0;
+        int i5700 = 0, i5701 = 0, i5702 = 0, i5703 = 0, i5704 = 0, i5705 = 0, i5706 = 0, i5707 = 0, i5708 = 0, i5709 = 0;
+        int i5710 = 0, i5711 = 0, i5712 = 0, i5713 = 0, i5714 = 0, i5715 = 0, i5716 = 0, i5717 = 0, i5718 = 0, i5719 = 0;
+        int i5720 = 0, i5721 = 0, i5722 = 0, i5723 = 0, i5724 = 0, i5725 = 0, i5726 = 0, i5727 = 0, i5728 = 0, i5729 = 0;
+        int i5730 = 0, i5731 = 0, i5732 = 0, i5733 = 0, i5734 = 0, i5735 = 0, i5736 = 0, i5737 = 0, i5738 = 0, i5739 = 0;
+        int i5740 = 0, i5741 = 0, i5742 = 0, i5743 = 0, i5744 = 0, i5745 = 0, i5746 = 0, i5747 = 0, i5748 = 0, i5749 = 0;
+        int i5750 = 0, i5751 = 0, i5752 = 0, i5753 = 0, i5754 = 0, i5755 = 0, i5756 = 0, i5757 = 0, i5758 = 0, i5759 = 0;
+        int i5760 = 0, i5761 = 0, i5762 = 0, i5763 = 0, i5764 = 0, i5765 = 0, i5766 = 0, i5767 = 0, i5768 = 0, i5769 = 0;
+        int i5770 = 0, i5771 = 0, i5772 = 0, i5773 = 0, i5774 = 0, i5775 = 0, i5776 = 0, i5777 = 0, i5778 = 0, i5779 = 0;
+        int i5780 = 0, i5781 = 0, i5782 = 0, i5783 = 0, i5784 = 0, i5785 = 0, i5786 = 0, i5787 = 0, i5788 = 0, i5789 = 0;
+        int i5790 = 0, i5791 = 0, i5792 = 0, i5793 = 0, i5794 = 0, i5795 = 0, i5796 = 0, i5797 = 0, i5798 = 0, i5799 = 0;
+        int i5800 = 0, i5801 = 0, i5802 = 0, i5803 = 0, i5804 = 0, i5805 = 0, i5806 = 0, i5807 = 0, i5808 = 0, i5809 = 0;
+        int i5810 = 0, i5811 = 0, i5812 = 0, i5813 = 0, i5814 = 0, i5815 = 0, i5816 = 0, i5817 = 0, i5818 = 0, i5819 = 0;
+        int i5820 = 0, i5821 = 0, i5822 = 0, i5823 = 0, i5824 = 0, i5825 = 0, i5826 = 0, i5827 = 0, i5828 = 0, i5829 = 0;
+        int i5830 = 0, i5831 = 0, i5832 = 0, i5833 = 0, i5834 = 0, i5835 = 0, i5836 = 0, i5837 = 0, i5838 = 0, i5839 = 0;
+        int i5840 = 0, i5841 = 0, i5842 = 0, i5843 = 0, i5844 = 0, i5845 = 0, i5846 = 0, i5847 = 0, i5848 = 0, i5849 = 0;
+        int i5850 = 0, i5851 = 0, i5852 = 0, i5853 = 0, i5854 = 0, i5855 = 0, i5856 = 0, i5857 = 0, i5858 = 0, i5859 = 0;
+        int i5860 = 0, i5861 = 0, i5862 = 0, i5863 = 0, i5864 = 0, i5865 = 0, i5866 = 0, i5867 = 0, i5868 = 0, i5869 = 0;
+        int i5870 = 0, i5871 = 0, i5872 = 0, i5873 = 0, i5874 = 0, i5875 = 0, i5876 = 0, i5877 = 0, i5878 = 0, i5879 = 0;
+        int i5880 = 0, i5881 = 0, i5882 = 0, i5883 = 0, i5884 = 0, i5885 = 0, i5886 = 0, i5887 = 0, i5888 = 0, i5889 = 0;
+        int i5890 = 0, i5891 = 0, i5892 = 0, i5893 = 0, i5894 = 0, i5895 = 0, i5896 = 0, i5897 = 0, i5898 = 0, i5899 = 0;
+        int i5900 = 0, i5901 = 0, i5902 = 0, i5903 = 0, i5904 = 0, i5905 = 0, i5906 = 0, i5907 = 0, i5908 = 0, i5909 = 0;
+        int i5910 = 0, i5911 = 0, i5912 = 0, i5913 = 0, i5914 = 0, i5915 = 0, i5916 = 0, i5917 = 0, i5918 = 0, i5919 = 0;
+        int i5920 = 0, i5921 = 0, i5922 = 0, i5923 = 0, i5924 = 0, i5925 = 0, i5926 = 0, i5927 = 0, i5928 = 0, i5929 = 0;
+        int i5930 = 0, i5931 = 0, i5932 = 0, i5933 = 0, i5934 = 0, i5935 = 0, i5936 = 0, i5937 = 0, i5938 = 0, i5939 = 0;
+        int i5940 = 0, i5941 = 0, i5942 = 0, i5943 = 0, i5944 = 0, i5945 = 0, i5946 = 0, i5947 = 0, i5948 = 0, i5949 = 0;
+        int i5950 = 0, i5951 = 0, i5952 = 0, i5953 = 0, i5954 = 0, i5955 = 0, i5956 = 0, i5957 = 0, i5958 = 0, i5959 = 0;
+        int i5960 = 0, i5961 = 0, i5962 = 0, i5963 = 0, i5964 = 0, i5965 = 0, i5966 = 0, i5967 = 0, i5968 = 0, i5969 = 0;
+        int i5970 = 0, i5971 = 0, i5972 = 0, i5973 = 0, i5974 = 0, i5975 = 0, i5976 = 0, i5977 = 0, i5978 = 0, i5979 = 0;
+        int i5980 = 0, i5981 = 0, i5982 = 0, i5983 = 0, i5984 = 0, i5985 = 0, i5986 = 0, i5987 = 0, i5988 = 0, i5989 = 0;
+        int i5990 = 0, i5991 = 0, i5992 = 0, i5993 = 0, i5994 = 0, i5995 = 0, i5996 = 0, i5997 = 0, i5998 = 0, i5999 = 0;
+        int i6000 = 0, i6001 = 0, i6002 = 0, i6003 = 0, i6004 = 0, i6005 = 0, i6006 = 0, i6007 = 0, i6008 = 0, i6009 = 0;
+        int i6010 = 0, i6011 = 0, i6012 = 0, i6013 = 0, i6014 = 0, i6015 = 0, i6016 = 0, i6017 = 0, i6018 = 0, i6019 = 0;
+        int i6020 = 0, i6021 = 0, i6022 = 0, i6023 = 0, i6024 = 0, i6025 = 0, i6026 = 0, i6027 = 0, i6028 = 0, i6029 = 0;
+        int i6030 = 0, i6031 = 0, i6032 = 0, i6033 = 0, i6034 = 0, i6035 = 0, i6036 = 0, i6037 = 0, i6038 = 0, i6039 = 0;
+        int i6040 = 0, i6041 = 0, i6042 = 0, i6043 = 0, i6044 = 0, i6045 = 0, i6046 = 0, i6047 = 0, i6048 = 0, i6049 = 0;
+        int i6050 = 0, i6051 = 0, i6052 = 0, i6053 = 0, i6054 = 0, i6055 = 0, i6056 = 0, i6057 = 0, i6058 = 0, i6059 = 0;
+        int i6060 = 0, i6061 = 0, i6062 = 0, i6063 = 0, i6064 = 0, i6065 = 0, i6066 = 0, i6067 = 0, i6068 = 0, i6069 = 0;
+        int i6070 = 0, i6071 = 0, i6072 = 0, i6073 = 0, i6074 = 0, i6075 = 0, i6076 = 0, i6077 = 0, i6078 = 0, i6079 = 0;
+        int i6080 = 0, i6081 = 0, i6082 = 0, i6083 = 0, i6084 = 0, i6085 = 0, i6086 = 0, i6087 = 0, i6088 = 0, i6089 = 0;
+        int i6090 = 0, i6091 = 0, i6092 = 0, i6093 = 0, i6094 = 0, i6095 = 0, i6096 = 0, i6097 = 0, i6098 = 0, i6099 = 0;
+        int i6100 = 0, i6101 = 0, i6102 = 0, i6103 = 0, i6104 = 0, i6105 = 0, i6106 = 0, i6107 = 0, i6108 = 0, i6109 = 0;
+        int i6110 = 0, i6111 = 0, i6112 = 0, i6113 = 0, i6114 = 0, i6115 = 0, i6116 = 0, i6117 = 0, i6118 = 0, i6119 = 0;
+        int i6120 = 0, i6121 = 0, i6122 = 0, i6123 = 0, i6124 = 0, i6125 = 0, i6126 = 0, i6127 = 0, i6128 = 0, i6129 = 0;
+        int i6130 = 0, i6131 = 0, i6132 = 0, i6133 = 0, i6134 = 0, i6135 = 0, i6136 = 0, i6137 = 0, i6138 = 0, i6139 = 0;
+        int i6140 = 0, i6141 = 0, i6142 = 0, i6143 = 0, i6144 = 0, i6145 = 0, i6146 = 0, i6147 = 0, i6148 = 0, i6149 = 0;
+        int i6150 = 0, i6151 = 0, i6152 = 0, i6153 = 0, i6154 = 0, i6155 = 0, i6156 = 0, i6157 = 0, i6158 = 0, i6159 = 0;
+        int i6160 = 0, i6161 = 0, i6162 = 0, i6163 = 0, i6164 = 0, i6165 = 0, i6166 = 0, i6167 = 0, i6168 = 0, i6169 = 0;
+        int i6170 = 0, i6171 = 0, i6172 = 0, i6173 = 0, i6174 = 0, i6175 = 0, i6176 = 0, i6177 = 0, i6178 = 0, i6179 = 0;
+        int i6180 = 0, i6181 = 0, i6182 = 0, i6183 = 0, i6184 = 0, i6185 = 0, i6186 = 0, i6187 = 0, i6188 = 0, i6189 = 0;
+        int i6190 = 0, i6191 = 0, i6192 = 0, i6193 = 0, i6194 = 0, i6195 = 0, i6196 = 0, i6197 = 0, i6198 = 0, i6199 = 0;
+        int i6200 = 0, i6201 = 0, i6202 = 0, i6203 = 0, i6204 = 0, i6205 = 0, i6206 = 0, i6207 = 0, i6208 = 0, i6209 = 0;
+        int i6210 = 0, i6211 = 0, i6212 = 0, i6213 = 0, i6214 = 0, i6215 = 0, i6216 = 0, i6217 = 0, i6218 = 0, i6219 = 0;
+        int i6220 = 0, i6221 = 0, i6222 = 0, i6223 = 0, i6224 = 0, i6225 = 0, i6226 = 0, i6227 = 0, i6228 = 0, i6229 = 0;
+        int i6230 = 0, i6231 = 0, i6232 = 0, i6233 = 0, i6234 = 0, i6235 = 0, i6236 = 0, i6237 = 0, i6238 = 0, i6239 = 0;
+        int i6240 = 0, i6241 = 0, i6242 = 0, i6243 = 0, i6244 = 0, i6245 = 0, i6246 = 0, i6247 = 0, i6248 = 0, i6249 = 0;
+        int i6250 = 0, i6251 = 0, i6252 = 0, i6253 = 0, i6254 = 0, i6255 = 0, i6256 = 0, i6257 = 0, i6258 = 0, i6259 = 0;
+        int i6260 = 0, i6261 = 0, i6262 = 0, i6263 = 0, i6264 = 0, i6265 = 0, i6266 = 0, i6267 = 0, i6268 = 0, i6269 = 0;
+        int i6270 = 0, i6271 = 0, i6272 = 0, i6273 = 0, i6274 = 0, i6275 = 0, i6276 = 0, i6277 = 0, i6278 = 0, i6279 = 0;
+        int i6280 = 0, i6281 = 0, i6282 = 0, i6283 = 0, i6284 = 0, i6285 = 0, i6286 = 0, i6287 = 0, i6288 = 0, i6289 = 0;
+        int i6290 = 0, i6291 = 0, i6292 = 0, i6293 = 0, i6294 = 0, i6295 = 0, i6296 = 0, i6297 = 0, i6298 = 0, i6299 = 0;
+        int i6300 = 0, i6301 = 0, i6302 = 0, i6303 = 0, i6304 = 0, i6305 = 0, i6306 = 0, i6307 = 0, i6308 = 0, i6309 = 0;
+        int i6310 = 0, i6311 = 0, i6312 = 0, i6313 = 0, i6314 = 0, i6315 = 0, i6316 = 0, i6317 = 0, i6318 = 0, i6319 = 0;
+        int i6320 = 0, i6321 = 0, i6322 = 0, i6323 = 0, i6324 = 0, i6325 = 0, i6326 = 0, i6327 = 0, i6328 = 0, i6329 = 0;
+        int i6330 = 0, i6331 = 0, i6332 = 0, i6333 = 0, i6334 = 0, i6335 = 0, i6336 = 0, i6337 = 0, i6338 = 0, i6339 = 0;
+        int i6340 = 0, i6341 = 0, i6342 = 0, i6343 = 0, i6344 = 0, i6345 = 0, i6346 = 0, i6347 = 0, i6348 = 0, i6349 = 0;
+        int i6350 = 0, i6351 = 0, i6352 = 0, i6353 = 0, i6354 = 0, i6355 = 0, i6356 = 0, i6357 = 0, i6358 = 0, i6359 = 0;
+        int i6360 = 0, i6361 = 0, i6362 = 0, i6363 = 0, i6364 = 0, i6365 = 0, i6366 = 0, i6367 = 0, i6368 = 0, i6369 = 0;
+        int i6370 = 0, i6371 = 0, i6372 = 0, i6373 = 0, i6374 = 0, i6375 = 0, i6376 = 0, i6377 = 0, i6378 = 0, i6379 = 0;
+        int i6380 = 0, i6381 = 0, i6382 = 0, i6383 = 0, i6384 = 0, i6385 = 0, i6386 = 0, i6387 = 0, i6388 = 0, i6389 = 0;
+        int i6390 = 0, i6391 = 0, i6392 = 0, i6393 = 0, i6394 = 0, i6395 = 0, i6396 = 0, i6397 = 0, i6398 = 0, i6399 = 0;
+        int i6400 = 0, i6401 = 0, i6402 = 0, i6403 = 0, i6404 = 0, i6405 = 0, i6406 = 0, i6407 = 0, i6408 = 0, i6409 = 0;
+        int i6410 = 0, i6411 = 0, i6412 = 0, i6413 = 0, i6414 = 0, i6415 = 0, i6416 = 0, i6417 = 0, i6418 = 0, i6419 = 0;
+        int i6420 = 0, i6421 = 0, i6422 = 0, i6423 = 0, i6424 = 0, i6425 = 0, i6426 = 0, i6427 = 0, i6428 = 0, i6429 = 0;
+        int i6430 = 0, i6431 = 0, i6432 = 0, i6433 = 0, i6434 = 0, i6435 = 0, i6436 = 0, i6437 = 0, i6438 = 0, i6439 = 0;
+        int i6440 = 0, i6441 = 0, i6442 = 0, i6443 = 0, i6444 = 0, i6445 = 0, i6446 = 0, i6447 = 0, i6448 = 0, i6449 = 0;
+        int i6450 = 0, i6451 = 0, i6452 = 0, i6453 = 0, i6454 = 0, i6455 = 0, i6456 = 0, i6457 = 0, i6458 = 0, i6459 = 0;
+        int i6460 = 0, i6461 = 0, i6462 = 0, i6463 = 0, i6464 = 0, i6465 = 0, i6466 = 0, i6467 = 0, i6468 = 0, i6469 = 0;
+        int i6470 = 0, i6471 = 0, i6472 = 0, i6473 = 0, i6474 = 0, i6475 = 0, i6476 = 0, i6477 = 0, i6478 = 0, i6479 = 0;
+        int i6480 = 0, i6481 = 0, i6482 = 0, i6483 = 0, i6484 = 0, i6485 = 0, i6486 = 0, i6487 = 0, i6488 = 0, i6489 = 0;
+        int i6490 = 0, i6491 = 0, i6492 = 0, i6493 = 0, i6494 = 0, i6495 = 0, i6496 = 0, i6497 = 0, i6498 = 0, i6499 = 0;
+        int i6500 = 0, i6501 = 0, i6502 = 0, i6503 = 0, i6504 = 0, i6505 = 0, i6506 = 0, i6507 = 0, i6508 = 0, i6509 = 0;
+        int i6510 = 0, i6511 = 0, i6512 = 0, i6513 = 0, i6514 = 0, i6515 = 0, i6516 = 0, i6517 = 0, i6518 = 0, i6519 = 0;
+        int i6520 = 0, i6521 = 0, i6522 = 0, i6523 = 0, i6524 = 0, i6525 = 0, i6526 = 0, i6527 = 0, i6528 = 0, i6529 = 0;
+        int i6530 = 0, i6531 = 0, i6532 = 0, i6533 = 0, i6534 = 0, i6535 = 0, i6536 = 0, i6537 = 0, i6538 = 0, i6539 = 0;
+        int i6540 = 0, i6541 = 0, i6542 = 0, i6543 = 0, i6544 = 0, i6545 = 0, i6546 = 0, i6547 = 0, i6548 = 0, i6549 = 0;
+        int i6550 = 0, i6551 = 0, i6552 = 0, i6553 = 0, i6554 = 0, i6555 = 0, i6556 = 0, i6557 = 0, i6558 = 0, i6559 = 0;
+        int i6560 = 0, i6561 = 0, i6562 = 0, i6563 = 0, i6564 = 0, i6565 = 0, i6566 = 0, i6567 = 0, i6568 = 0, i6569 = 0;
+        int i6570 = 0, i6571 = 0, i6572 = 0, i6573 = 0, i6574 = 0, i6575 = 0, i6576 = 0, i6577 = 0, i6578 = 0, i6579 = 0;
+        int i6580 = 0, i6581 = 0, i6582 = 0, i6583 = 0, i6584 = 0, i6585 = 0, i6586 = 0, i6587 = 0, i6588 = 0, i6589 = 0;
+        int i6590 = 0, i6591 = 0, i6592 = 0, i6593 = 0, i6594 = 0, i6595 = 0, i6596 = 0, i6597 = 0, i6598 = 0, i6599 = 0;
+        int i6600 = 0, i6601 = 0, i6602 = 0, i6603 = 0, i6604 = 0, i6605 = 0, i6606 = 0, i6607 = 0, i6608 = 0, i6609 = 0;
+        int i6610 = 0, i6611 = 0, i6612 = 0, i6613 = 0, i6614 = 0, i6615 = 0, i6616 = 0, i6617 = 0, i6618 = 0, i6619 = 0;
+        int i6620 = 0, i6621 = 0, i6622 = 0, i6623 = 0, i6624 = 0, i6625 = 0, i6626 = 0, i6627 = 0, i6628 = 0, i6629 = 0;
+        int i6630 = 0, i6631 = 0, i6632 = 0, i6633 = 0, i6634 = 0, i6635 = 0, i6636 = 0, i6637 = 0, i6638 = 0, i6639 = 0;
+        int i6640 = 0, i6641 = 0, i6642 = 0, i6643 = 0, i6644 = 0, i6645 = 0, i6646 = 0, i6647 = 0, i6648 = 0, i6649 = 0;
+        int i6650 = 0, i6651 = 0, i6652 = 0, i6653 = 0, i6654 = 0, i6655 = 0, i6656 = 0, i6657 = 0, i6658 = 0, i6659 = 0;
+        int i6660 = 0, i6661 = 0, i6662 = 0, i6663 = 0, i6664 = 0, i6665 = 0, i6666 = 0, i6667 = 0, i6668 = 0, i6669 = 0;
+        int i6670 = 0, i6671 = 0, i6672 = 0, i6673 = 0, i6674 = 0, i6675 = 0, i6676 = 0, i6677 = 0, i6678 = 0, i6679 = 0;
+        int i6680 = 0, i6681 = 0, i6682 = 0, i6683 = 0, i6684 = 0, i6685 = 0, i6686 = 0, i6687 = 0, i6688 = 0, i6689 = 0;
+        int i6690 = 0, i6691 = 0, i6692 = 0, i6693 = 0, i6694 = 0, i6695 = 0, i6696 = 0, i6697 = 0, i6698 = 0, i6699 = 0;
+        int i6700 = 0, i6701 = 0, i6702 = 0, i6703 = 0, i6704 = 0, i6705 = 0, i6706 = 0, i6707 = 0, i6708 = 0, i6709 = 0;
+        int i6710 = 0, i6711 = 0, i6712 = 0, i6713 = 0, i6714 = 0, i6715 = 0, i6716 = 0, i6717 = 0, i6718 = 0, i6719 = 0;
+        int i6720 = 0, i6721 = 0, i6722 = 0, i6723 = 0, i6724 = 0, i6725 = 0, i6726 = 0, i6727 = 0, i6728 = 0, i6729 = 0;
+        int i6730 = 0, i6731 = 0, i6732 = 0, i6733 = 0, i6734 = 0, i6735 = 0, i6736 = 0, i6737 = 0, i6738 = 0, i6739 = 0;
+        int i6740 = 0, i6741 = 0, i6742 = 0, i6743 = 0, i6744 = 0, i6745 = 0, i6746 = 0, i6747 = 0, i6748 = 0, i6749 = 0;
+        int i6750 = 0, i6751 = 0, i6752 = 0, i6753 = 0, i6754 = 0, i6755 = 0, i6756 = 0, i6757 = 0, i6758 = 0, i6759 = 0;
+        int i6760 = 0, i6761 = 0, i6762 = 0, i6763 = 0, i6764 = 0, i6765 = 0, i6766 = 0, i6767 = 0, i6768 = 0, i6769 = 0;
+        int i6770 = 0, i6771 = 0, i6772 = 0, i6773 = 0, i6774 = 0, i6775 = 0, i6776 = 0, i6777 = 0, i6778 = 0, i6779 = 0;
+        int i6780 = 0, i6781 = 0, i6782 = 0, i6783 = 0, i6784 = 0, i6785 = 0, i6786 = 0, i6787 = 0, i6788 = 0, i6789 = 0;
+        int i6790 = 0, i6791 = 0, i6792 = 0, i6793 = 0, i6794 = 0, i6795 = 0, i6796 = 0, i6797 = 0, i6798 = 0, i6799 = 0;
+        int i6800 = 0, i6801 = 0, i6802 = 0, i6803 = 0, i6804 = 0, i6805 = 0, i6806 = 0, i6807 = 0, i6808 = 0, i6809 = 0;
+        int i6810 = 0, i6811 = 0, i6812 = 0, i6813 = 0, i6814 = 0, i6815 = 0, i6816 = 0, i6817 = 0, i6818 = 0, i6819 = 0;
+        int i6820 = 0, i6821 = 0, i6822 = 0, i6823 = 0, i6824 = 0, i6825 = 0, i6826 = 0, i6827 = 0, i6828 = 0, i6829 = 0;
+        int i6830 = 0, i6831 = 0, i6832 = 0, i6833 = 0, i6834 = 0, i6835 = 0, i6836 = 0, i6837 = 0, i6838 = 0, i6839 = 0;
+        int i6840 = 0, i6841 = 0, i6842 = 0, i6843 = 0, i6844 = 0, i6845 = 0, i6846 = 0, i6847 = 0, i6848 = 0, i6849 = 0;
+        int i6850 = 0, i6851 = 0, i6852 = 0, i6853 = 0, i6854 = 0, i6855 = 0, i6856 = 0, i6857 = 0, i6858 = 0, i6859 = 0;
+        int i6860 = 0, i6861 = 0, i6862 = 0, i6863 = 0, i6864 = 0, i6865 = 0, i6866 = 0, i6867 = 0, i6868 = 0, i6869 = 0;
+        int i6870 = 0, i6871 = 0, i6872 = 0, i6873 = 0, i6874 = 0, i6875 = 0, i6876 = 0, i6877 = 0, i6878 = 0, i6879 = 0;
+        int i6880 = 0, i6881 = 0, i6882 = 0, i6883 = 0, i6884 = 0, i6885 = 0, i6886 = 0, i6887 = 0, i6888 = 0, i6889 = 0;
+        int i6890 = 0, i6891 = 0, i6892 = 0, i6893 = 0, i6894 = 0, i6895 = 0, i6896 = 0, i6897 = 0, i6898 = 0, i6899 = 0;
+        int i6900 = 0, i6901 = 0, i6902 = 0, i6903 = 0, i6904 = 0, i6905 = 0, i6906 = 0, i6907 = 0, i6908 = 0, i6909 = 0;
+        int i6910 = 0, i6911 = 0, i6912 = 0, i6913 = 0, i6914 = 0, i6915 = 0, i6916 = 0, i6917 = 0, i6918 = 0, i6919 = 0;
+        int i6920 = 0, i6921 = 0, i6922 = 0, i6923 = 0, i6924 = 0, i6925 = 0, i6926 = 0, i6927 = 0, i6928 = 0, i6929 = 0;
+        int i6930 = 0, i6931 = 0, i6932 = 0, i6933 = 0, i6934 = 0, i6935 = 0, i6936 = 0, i6937 = 0, i6938 = 0, i6939 = 0;
+        int i6940 = 0, i6941 = 0, i6942 = 0, i6943 = 0, i6944 = 0, i6945 = 0, i6946 = 0, i6947 = 0, i6948 = 0, i6949 = 0;
+        int i6950 = 0, i6951 = 0, i6952 = 0, i6953 = 0, i6954 = 0, i6955 = 0, i6956 = 0, i6957 = 0, i6958 = 0, i6959 = 0;
+        int i6960 = 0, i6961 = 0, i6962 = 0, i6963 = 0, i6964 = 0, i6965 = 0, i6966 = 0, i6967 = 0, i6968 = 0, i6969 = 0;
+        int i6970 = 0, i6971 = 0, i6972 = 0, i6973 = 0, i6974 = 0, i6975 = 0, i6976 = 0, i6977 = 0, i6978 = 0, i6979 = 0;
+        int i6980 = 0, i6981 = 0, i6982 = 0, i6983 = 0, i6984 = 0, i6985 = 0, i6986 = 0, i6987 = 0, i6988 = 0, i6989 = 0;
+        int i6990 = 0, i6991 = 0, i6992 = 0, i6993 = 0, i6994 = 0, i6995 = 0, i6996 = 0, i6997 = 0, i6998 = 0, i6999 = 0;
+        int i7000 = 0, i7001 = 0, i7002 = 0, i7003 = 0, i7004 = 0, i7005 = 0, i7006 = 0, i7007 = 0, i7008 = 0, i7009 = 0;
+        int i7010 = 0, i7011 = 0, i7012 = 0, i7013 = 0, i7014 = 0, i7015 = 0, i7016 = 0, i7017 = 0, i7018 = 0, i7019 = 0;
+        int i7020 = 0, i7021 = 0, i7022 = 0, i7023 = 0, i7024 = 0, i7025 = 0, i7026 = 0, i7027 = 0, i7028 = 0, i7029 = 0;
+        int i7030 = 0, i7031 = 0, i7032 = 0, i7033 = 0, i7034 = 0, i7035 = 0, i7036 = 0, i7037 = 0, i7038 = 0, i7039 = 0;
+        int i7040 = 0, i7041 = 0, i7042 = 0, i7043 = 0, i7044 = 0, i7045 = 0, i7046 = 0, i7047 = 0, i7048 = 0, i7049 = 0;
+        int i7050 = 0, i7051 = 0, i7052 = 0, i7053 = 0, i7054 = 0, i7055 = 0, i7056 = 0, i7057 = 0, i7058 = 0, i7059 = 0;
+        int i7060 = 0, i7061 = 0, i7062 = 0, i7063 = 0, i7064 = 0, i7065 = 0, i7066 = 0, i7067 = 0, i7068 = 0, i7069 = 0;
+        int i7070 = 0, i7071 = 0, i7072 = 0, i7073 = 0, i7074 = 0, i7075 = 0, i7076 = 0, i7077 = 0, i7078 = 0, i7079 = 0;
+        int i7080 = 0, i7081 = 0, i7082 = 0, i7083 = 0, i7084 = 0, i7085 = 0, i7086 = 0, i7087 = 0, i7088 = 0, i7089 = 0;
+        int i7090 = 0, i7091 = 0, i7092 = 0, i7093 = 0, i7094 = 0, i7095 = 0, i7096 = 0, i7097 = 0, i7098 = 0, i7099 = 0;
+        int i7100 = 0, i7101 = 0, i7102 = 0, i7103 = 0, i7104 = 0, i7105 = 0, i7106 = 0, i7107 = 0, i7108 = 0, i7109 = 0;
+        int i7110 = 0, i7111 = 0, i7112 = 0, i7113 = 0, i7114 = 0, i7115 = 0, i7116 = 0, i7117 = 0, i7118 = 0, i7119 = 0;
+        int i7120 = 0, i7121 = 0, i7122 = 0, i7123 = 0, i7124 = 0, i7125 = 0, i7126 = 0, i7127 = 0, i7128 = 0, i7129 = 0;
+        int i7130 = 0, i7131 = 0, i7132 = 0, i7133 = 0, i7134 = 0, i7135 = 0, i7136 = 0, i7137 = 0, i7138 = 0, i7139 = 0;
+        int i7140 = 0, i7141 = 0, i7142 = 0, i7143 = 0, i7144 = 0, i7145 = 0, i7146 = 0, i7147 = 0, i7148 = 0, i7149 = 0;
+        int i7150 = 0, i7151 = 0, i7152 = 0, i7153 = 0, i7154 = 0, i7155 = 0, i7156 = 0, i7157 = 0, i7158 = 0, i7159 = 0;
+        int i7160 = 0, i7161 = 0, i7162 = 0, i7163 = 0, i7164 = 0, i7165 = 0, i7166 = 0, i7167 = 0, i7168 = 0, i7169 = 0;
+        int i7170 = 0, i7171 = 0, i7172 = 0, i7173 = 0, i7174 = 0, i7175 = 0, i7176 = 0, i7177 = 0, i7178 = 0, i7179 = 0;
+        int i7180 = 0, i7181 = 0, i7182 = 0, i7183 = 0, i7184 = 0, i7185 = 0, i7186 = 0, i7187 = 0, i7188 = 0, i7189 = 0;
+        int i7190 = 0, i7191 = 0, i7192 = 0, i7193 = 0, i7194 = 0, i7195 = 0, i7196 = 0, i7197 = 0, i7198 = 0, i7199 = 0;
+        int i7200 = 0, i7201 = 0, i7202 = 0, i7203 = 0, i7204 = 0, i7205 = 0, i7206 = 0, i7207 = 0, i7208 = 0, i7209 = 0;
+        int i7210 = 0, i7211 = 0, i7212 = 0, i7213 = 0, i7214 = 0, i7215 = 0, i7216 = 0, i7217 = 0, i7218 = 0, i7219 = 0;
+        int i7220 = 0, i7221 = 0, i7222 = 0, i7223 = 0, i7224 = 0, i7225 = 0, i7226 = 0, i7227 = 0, i7228 = 0, i7229 = 0;
+        int i7230 = 0, i7231 = 0, i7232 = 0, i7233 = 0, i7234 = 0, i7235 = 0, i7236 = 0, i7237 = 0, i7238 = 0, i7239 = 0;
+        int i7240 = 0, i7241 = 0, i7242 = 0, i7243 = 0, i7244 = 0, i7245 = 0, i7246 = 0, i7247 = 0, i7248 = 0, i7249 = 0;
+        int i7250 = 0, i7251 = 0, i7252 = 0, i7253 = 0, i7254 = 0, i7255 = 0, i7256 = 0, i7257 = 0, i7258 = 0, i7259 = 0;
+        int i7260 = 0, i7261 = 0, i7262 = 0, i7263 = 0, i7264 = 0, i7265 = 0, i7266 = 0, i7267 = 0, i7268 = 0, i7269 = 0;
+        int i7270 = 0, i7271 = 0, i7272 = 0, i7273 = 0, i7274 = 0, i7275 = 0, i7276 = 0, i7277 = 0, i7278 = 0, i7279 = 0;
+        int i7280 = 0, i7281 = 0, i7282 = 0, i7283 = 0, i7284 = 0, i7285 = 0, i7286 = 0, i7287 = 0, i7288 = 0, i7289 = 0;
+        int i7290 = 0, i7291 = 0, i7292 = 0, i7293 = 0, i7294 = 0, i7295 = 0, i7296 = 0, i7297 = 0, i7298 = 0, i7299 = 0;
+        int i7300 = 0, i7301 = 0, i7302 = 0, i7303 = 0, i7304 = 0, i7305 = 0, i7306 = 0, i7307 = 0, i7308 = 0, i7309 = 0;
+        int i7310 = 0, i7311 = 0, i7312 = 0, i7313 = 0, i7314 = 0, i7315 = 0, i7316 = 0, i7317 = 0, i7318 = 0, i7319 = 0;
+        int i7320 = 0, i7321 = 0, i7322 = 0, i7323 = 0, i7324 = 0, i7325 = 0, i7326 = 0, i7327 = 0, i7328 = 0, i7329 = 0;
+        int i7330 = 0, i7331 = 0, i7332 = 0, i7333 = 0, i7334 = 0, i7335 = 0, i7336 = 0, i7337 = 0, i7338 = 0, i7339 = 0;
+        int i7340 = 0, i7341 = 0, i7342 = 0, i7343 = 0, i7344 = 0, i7345 = 0, i7346 = 0, i7347 = 0, i7348 = 0, i7349 = 0;
+        int i7350 = 0, i7351 = 0, i7352 = 0, i7353 = 0, i7354 = 0, i7355 = 0, i7356 = 0, i7357 = 0, i7358 = 0, i7359 = 0;
+        int i7360 = 0, i7361 = 0, i7362 = 0, i7363 = 0, i7364 = 0, i7365 = 0, i7366 = 0, i7367 = 0, i7368 = 0, i7369 = 0;
+        int i7370 = 0, i7371 = 0, i7372 = 0, i7373 = 0, i7374 = 0, i7375 = 0, i7376 = 0, i7377 = 0, i7378 = 0, i7379 = 0;
+        int i7380 = 0, i7381 = 0, i7382 = 0, i7383 = 0, i7384 = 0, i7385 = 0, i7386 = 0, i7387 = 0, i7388 = 0, i7389 = 0;
+        int i7390 = 0, i7391 = 0, i7392 = 0, i7393 = 0, i7394 = 0, i7395 = 0, i7396 = 0, i7397 = 0, i7398 = 0, i7399 = 0;
+        int i7400 = 0, i7401 = 0, i7402 = 0, i7403 = 0, i7404 = 0, i7405 = 0, i7406 = 0, i7407 = 0, i7408 = 0, i7409 = 0;
+        int i7410 = 0, i7411 = 0, i7412 = 0, i7413 = 0, i7414 = 0, i7415 = 0, i7416 = 0, i7417 = 0, i7418 = 0, i7419 = 0;
+        int i7420 = 0, i7421 = 0, i7422 = 0, i7423 = 0, i7424 = 0, i7425 = 0, i7426 = 0, i7427 = 0, i7428 = 0, i7429 = 0;
+        int i7430 = 0, i7431 = 0, i7432 = 0, i7433 = 0, i7434 = 0, i7435 = 0, i7436 = 0, i7437 = 0, i7438 = 0, i7439 = 0;
+        int i7440 = 0, i7441 = 0, i7442 = 0, i7443 = 0, i7444 = 0, i7445 = 0, i7446 = 0, i7447 = 0, i7448 = 0, i7449 = 0;
+        int i7450 = 0, i7451 = 0, i7452 = 0, i7453 = 0, i7454 = 0, i7455 = 0, i7456 = 0, i7457 = 0, i7458 = 0, i7459 = 0;
+        int i7460 = 0, i7461 = 0, i7462 = 0, i7463 = 0, i7464 = 0, i7465 = 0, i7466 = 0, i7467 = 0, i7468 = 0, i7469 = 0;
+        int i7470 = 0, i7471 = 0, i7472 = 0, i7473 = 0, i7474 = 0, i7475 = 0, i7476 = 0, i7477 = 0, i7478 = 0, i7479 = 0;
+        int i7480 = 0, i7481 = 0, i7482 = 0, i7483 = 0, i7484 = 0, i7485 = 0, i7486 = 0, i7487 = 0, i7488 = 0, i7489 = 0;
+        int i7490 = 0, i7491 = 0, i7492 = 0, i7493 = 0, i7494 = 0, i7495 = 0, i7496 = 0, i7497 = 0, i7498 = 0, i7499 = 0;
+        int i7500 = 0, i7501 = 0, i7502 = 0, i7503 = 0, i7504 = 0, i7505 = 0, i7506 = 0, i7507 = 0, i7508 = 0, i7509 = 0;
+        int i7510 = 0, i7511 = 0, i7512 = 0, i7513 = 0, i7514 = 0, i7515 = 0, i7516 = 0, i7517 = 0, i7518 = 0, i7519 = 0;
+        int i7520 = 0, i7521 = 0, i7522 = 0, i7523 = 0, i7524 = 0, i7525 = 0, i7526 = 0, i7527 = 0, i7528 = 0, i7529 = 0;
+        int i7530 = 0, i7531 = 0, i7532 = 0, i7533 = 0, i7534 = 0, i7535 = 0, i7536 = 0, i7537 = 0, i7538 = 0, i7539 = 0;
+        int i7540 = 0, i7541 = 0, i7542 = 0, i7543 = 0, i7544 = 0, i7545 = 0, i7546 = 0, i7547 = 0, i7548 = 0, i7549 = 0;
+        int i7550 = 0, i7551 = 0, i7552 = 0, i7553 = 0, i7554 = 0, i7555 = 0, i7556 = 0, i7557 = 0, i7558 = 0, i7559 = 0;
+        int i7560 = 0, i7561 = 0, i7562 = 0, i7563 = 0, i7564 = 0, i7565 = 0, i7566 = 0, i7567 = 0, i7568 = 0, i7569 = 0;
+        int i7570 = 0, i7571 = 0, i7572 = 0, i7573 = 0, i7574 = 0, i7575 = 0, i7576 = 0, i7577 = 0, i7578 = 0, i7579 = 0;
+        int i7580 = 0, i7581 = 0, i7582 = 0, i7583 = 0, i7584 = 0, i7585 = 0, i7586 = 0, i7587 = 0, i7588 = 0, i7589 = 0;
+        int i7590 = 0, i7591 = 0, i7592 = 0, i7593 = 0, i7594 = 0, i7595 = 0, i7596 = 0, i7597 = 0, i7598 = 0, i7599 = 0;
+        int i7600 = 0, i7601 = 0, i7602 = 0, i7603 = 0, i7604 = 0, i7605 = 0, i7606 = 0, i7607 = 0, i7608 = 0, i7609 = 0;
+        int i7610 = 0, i7611 = 0, i7612 = 0, i7613 = 0, i7614 = 0, i7615 = 0, i7616 = 0, i7617 = 0, i7618 = 0, i7619 = 0;
+        int i7620 = 0, i7621 = 0, i7622 = 0, i7623 = 0, i7624 = 0, i7625 = 0, i7626 = 0, i7627 = 0, i7628 = 0, i7629 = 0;
+        int i7630 = 0, i7631 = 0, i7632 = 0, i7633 = 0, i7634 = 0, i7635 = 0, i7636 = 0, i7637 = 0, i7638 = 0, i7639 = 0;
+        int i7640 = 0, i7641 = 0, i7642 = 0, i7643 = 0, i7644 = 0, i7645 = 0, i7646 = 0, i7647 = 0, i7648 = 0, i7649 = 0;
+        int i7650 = 0, i7651 = 0, i7652 = 0, i7653 = 0, i7654 = 0, i7655 = 0, i7656 = 0, i7657 = 0, i7658 = 0, i7659 = 0;
+        int i7660 = 0, i7661 = 0, i7662 = 0, i7663 = 0, i7664 = 0, i7665 = 0, i7666 = 0, i7667 = 0, i7668 = 0, i7669 = 0;
+        int i7670 = 0, i7671 = 0, i7672 = 0, i7673 = 0, i7674 = 0, i7675 = 0, i7676 = 0, i7677 = 0, i7678 = 0, i7679 = 0;
+        int i7680 = 0, i7681 = 0, i7682 = 0, i7683 = 0, i7684 = 0, i7685 = 0, i7686 = 0, i7687 = 0, i7688 = 0, i7689 = 0;
+        int i7690 = 0, i7691 = 0, i7692 = 0, i7693 = 0, i7694 = 0, i7695 = 0, i7696 = 0, i7697 = 0, i7698 = 0, i7699 = 0;
+        int i7700 = 0, i7701 = 0, i7702 = 0, i7703 = 0, i7704 = 0, i7705 = 0, i7706 = 0, i7707 = 0, i7708 = 0, i7709 = 0;
+        int i7710 = 0, i7711 = 0, i7712 = 0, i7713 = 0, i7714 = 0, i7715 = 0, i7716 = 0, i7717 = 0, i7718 = 0, i7719 = 0;
+        int i7720 = 0, i7721 = 0, i7722 = 0, i7723 = 0, i7724 = 0, i7725 = 0, i7726 = 0, i7727 = 0, i7728 = 0, i7729 = 0;
+        int i7730 = 0, i7731 = 0, i7732 = 0, i7733 = 0, i7734 = 0, i7735 = 0, i7736 = 0, i7737 = 0, i7738 = 0, i7739 = 0;
+        int i7740 = 0, i7741 = 0, i7742 = 0, i7743 = 0, i7744 = 0, i7745 = 0, i7746 = 0, i7747 = 0, i7748 = 0, i7749 = 0;
+        int i7750 = 0, i7751 = 0, i7752 = 0, i7753 = 0, i7754 = 0, i7755 = 0, i7756 = 0, i7757 = 0, i7758 = 0, i7759 = 0;
+        int i7760 = 0, i7761 = 0, i7762 = 0, i7763 = 0, i7764 = 0, i7765 = 0, i7766 = 0, i7767 = 0, i7768 = 0, i7769 = 0;
+        int i7770 = 0, i7771 = 0, i7772 = 0, i7773 = 0, i7774 = 0, i7775 = 0, i7776 = 0, i7777 = 0, i7778 = 0, i7779 = 0;
+        int i7780 = 0, i7781 = 0, i7782 = 0, i7783 = 0, i7784 = 0, i7785 = 0, i7786 = 0, i7787 = 0, i7788 = 0, i7789 = 0;
+        int i7790 = 0, i7791 = 0, i7792 = 0, i7793 = 0, i7794 = 0, i7795 = 0, i7796 = 0, i7797 = 0, i7798 = 0, i7799 = 0;
+        int i7800 = 0, i7801 = 0, i7802 = 0, i7803 = 0, i7804 = 0, i7805 = 0, i7806 = 0, i7807 = 0, i7808 = 0, i7809 = 0;
+        int i7810 = 0, i7811 = 0, i7812 = 0, i7813 = 0, i7814 = 0, i7815 = 0, i7816 = 0, i7817 = 0, i7818 = 0, i7819 = 0;
+        int i7820 = 0, i7821 = 0, i7822 = 0, i7823 = 0, i7824 = 0, i7825 = 0, i7826 = 0, i7827 = 0, i7828 = 0, i7829 = 0;
+        int i7830 = 0, i7831 = 0, i7832 = 0, i7833 = 0, i7834 = 0, i7835 = 0, i7836 = 0, i7837 = 0, i7838 = 0, i7839 = 0;
+        int i7840 = 0, i7841 = 0, i7842 = 0, i7843 = 0, i7844 = 0, i7845 = 0, i7846 = 0, i7847 = 0, i7848 = 0, i7849 = 0;
+        int i7850 = 0, i7851 = 0, i7852 = 0, i7853 = 0, i7854 = 0, i7855 = 0, i7856 = 0, i7857 = 0, i7858 = 0, i7859 = 0;
+        int i7860 = 0, i7861 = 0, i7862 = 0, i7863 = 0, i7864 = 0, i7865 = 0, i7866 = 0, i7867 = 0, i7868 = 0, i7869 = 0;
+        int i7870 = 0, i7871 = 0, i7872 = 0, i7873 = 0, i7874 = 0, i7875 = 0, i7876 = 0, i7877 = 0, i7878 = 0, i7879 = 0;
+        int i7880 = 0, i7881 = 0, i7882 = 0, i7883 = 0, i7884 = 0, i7885 = 0, i7886 = 0, i7887 = 0, i7888 = 0, i7889 = 0;
+        int i7890 = 0, i7891 = 0, i7892 = 0, i7893 = 0, i7894 = 0, i7895 = 0, i7896 = 0, i7897 = 0, i7898 = 0, i7899 = 0;
+        int i7900 = 0, i7901 = 0, i7902 = 0, i7903 = 0, i7904 = 0, i7905 = 0, i7906 = 0, i7907 = 0, i7908 = 0, i7909 = 0;
+        int i7910 = 0, i7911 = 0, i7912 = 0, i7913 = 0, i7914 = 0, i7915 = 0, i7916 = 0, i7917 = 0, i7918 = 0, i7919 = 0;
+        int i7920 = 0, i7921 = 0, i7922 = 0, i7923 = 0, i7924 = 0, i7925 = 0, i7926 = 0, i7927 = 0, i7928 = 0, i7929 = 0;
+        int i7930 = 0, i7931 = 0, i7932 = 0, i7933 = 0, i7934 = 0, i7935 = 0, i7936 = 0, i7937 = 0, i7938 = 0, i7939 = 0;
+        int i7940 = 0, i7941 = 0, i7942 = 0, i7943 = 0, i7944 = 0, i7945 = 0, i7946 = 0, i7947 = 0, i7948 = 0, i7949 = 0;
+        int i7950 = 0, i7951 = 0, i7952 = 0, i7953 = 0, i7954 = 0, i7955 = 0, i7956 = 0, i7957 = 0, i7958 = 0, i7959 = 0;
+        int i7960 = 0, i7961 = 0, i7962 = 0, i7963 = 0, i7964 = 0, i7965 = 0, i7966 = 0, i7967 = 0, i7968 = 0, i7969 = 0;
+        int i7970 = 0, i7971 = 0, i7972 = 0, i7973 = 0, i7974 = 0, i7975 = 0, i7976 = 0, i7977 = 0, i7978 = 0, i7979 = 0;
+        int i7980 = 0, i7981 = 0, i7982 = 0, i7983 = 0, i7984 = 0, i7985 = 0, i7986 = 0, i7987 = 0, i7988 = 0, i7989 = 0;
+        int i7990 = 0, i7991 = 0, i7992 = 0, i7993 = 0, i7994 = 0, i7995 = 0, i7996 = 0, i7997 = 0, i7998 = 0, i7999 = 0;
+        int i8000 = 0, i8001 = 0, i8002 = 0, i8003 = 0, i8004 = 0, i8005 = 0, i8006 = 0, i8007 = 0, i8008 = 0, i8009 = 0;
+        int i8010 = 0, i8011 = 0, i8012 = 0, i8013 = 0, i8014 = 0, i8015 = 0, i8016 = 0, i8017 = 0, i8018 = 0, i8019 = 0;
+        int i8020 = 0, i8021 = 0, i8022 = 0, i8023 = 0, i8024 = 0, i8025 = 0, i8026 = 0, i8027 = 0, i8028 = 0, i8029 = 0;
+        int i8030 = 0, i8031 = 0, i8032 = 0, i8033 = 0, i8034 = 0, i8035 = 0, i8036 = 0, i8037 = 0, i8038 = 0, i8039 = 0;
+        int i8040 = 0, i8041 = 0, i8042 = 0, i8043 = 0, i8044 = 0, i8045 = 0, i8046 = 0, i8047 = 0, i8048 = 0, i8049 = 0;
+        int i8050 = 0, i8051 = 0, i8052 = 0, i8053 = 0, i8054 = 0, i8055 = 0, i8056 = 0, i8057 = 0, i8058 = 0, i8059 = 0;
+        int i8060 = 0, i8061 = 0, i8062 = 0, i8063 = 0, i8064 = 0, i8065 = 0, i8066 = 0, i8067 = 0, i8068 = 0, i8069 = 0;
+        int i8070 = 0, i8071 = 0, i8072 = 0, i8073 = 0, i8074 = 0, i8075 = 0, i8076 = 0, i8077 = 0, i8078 = 0, i8079 = 0;
+        int i8080 = 0, i8081 = 0, i8082 = 0, i8083 = 0, i8084 = 0, i8085 = 0, i8086 = 0, i8087 = 0, i8088 = 0, i8089 = 0;
+        int i8090 = 0, i8091 = 0, i8092 = 0, i8093 = 0, i8094 = 0, i8095 = 0, i8096 = 0, i8097 = 0, i8098 = 0, i8099 = 0;
+        int i8100 = 0, i8101 = 0, i8102 = 0, i8103 = 0, i8104 = 0, i8105 = 0, i8106 = 0, i8107 = 0, i8108 = 0, i8109 = 0;
+        int i8110 = 0, i8111 = 0, i8112 = 0, i8113 = 0, i8114 = 0, i8115 = 0, i8116 = 0, i8117 = 0, i8118 = 0, i8119 = 0;
+        int i8120 = 0, i8121 = 0, i8122 = 0, i8123 = 0, i8124 = 0, i8125 = 0, i8126 = 0, i8127 = 0, i8128 = 0, i8129 = 0;
+        int i8130 = 0, i8131 = 0, i8132 = 0, i8133 = 0, i8134 = 0, i8135 = 0, i8136 = 0, i8137 = 0, i8138 = 0, i8139 = 0;
+        int i8140 = 0, i8141 = 0, i8142 = 0, i8143 = 0, i8144 = 0, i8145 = 0, i8146 = 0, i8147 = 0, i8148 = 0, i8149 = 0;
+        int i8150 = 0, i8151 = 0, i8152 = 0, i8153 = 0, i8154 = 0, i8155 = 0, i8156 = 0, i8157 = 0, i8158 = 0, i8159 = 0;
+        int i8160 = 0, i8161 = 0, i8162 = 0, i8163 = 0, i8164 = 0, i8165 = 0, i8166 = 0, i8167 = 0, i8168 = 0, i8169 = 0;
+        int i8170 = 0, i8171 = 0, i8172 = 0, i8173 = 0, i8174 = 0, i8175 = 0, i8176 = 0, i8177 = 0, i8178 = 0, i8179 = 0;
+        int i8180 = 0, i8181 = 0, i8182 = 0, i8183 = 0, i8184 = 0, i8185 = 0, i8186 = 0, i8187 = 0, i8188 = 0, i8189 = 0;
+        int i8190 = 0, i8191 = 0, i8192 = 0, i8193 = 0, i8194 = 0, i8195 = 0, i8196 = 0, i8197 = 0, i8198 = 0, i8199 = 0;
+        int i8200 = 0, i8201 = 0, i8202 = 0, i8203 = 0, i8204 = 0, i8205 = 0, i8206 = 0, i8207 = 0, i8208 = 0, i8209 = 0;
+        int i8210 = 0, i8211 = 0, i8212 = 0, i8213 = 0, i8214 = 0, i8215 = 0, i8216 = 0, i8217 = 0, i8218 = 0, i8219 = 0;
+        int i8220 = 0, i8221 = 0, i8222 = 0, i8223 = 0, i8224 = 0, i8225 = 0, i8226 = 0, i8227 = 0, i8228 = 0, i8229 = 0;
+        int i8230 = 0, i8231 = 0, i8232 = 0, i8233 = 0, i8234 = 0, i8235 = 0, i8236 = 0, i8237 = 0, i8238 = 0, i8239 = 0;
+        int i8240 = 0, i8241 = 0, i8242 = 0, i8243 = 0, i8244 = 0, i8245 = 0, i8246 = 0, i8247 = 0, i8248 = 0, i8249 = 0;
+        int i8250 = 0, i8251 = 0, i8252 = 0, i8253 = 0, i8254 = 0, i8255 = 0, i8256 = 0, i8257 = 0, i8258 = 0, i8259 = 0;
+        int i8260 = 0, i8261 = 0, i8262 = 0, i8263 = 0, i8264 = 0, i8265 = 0, i8266 = 0, i8267 = 0, i8268 = 0, i8269 = 0;
+        int i8270 = 0, i8271 = 0, i8272 = 0, i8273 = 0, i8274 = 0, i8275 = 0, i8276 = 0, i8277 = 0, i8278 = 0, i8279 = 0;
+        int i8280 = 0, i8281 = 0, i8282 = 0, i8283 = 0, i8284 = 0, i8285 = 0, i8286 = 0, i8287 = 0, i8288 = 0, i8289 = 0;
+        int i8290 = 0, i8291 = 0, i8292 = 0, i8293 = 0, i8294 = 0, i8295 = 0, i8296 = 0, i8297 = 0, i8298 = 0, i8299 = 0;
+        int i8300 = 0, i8301 = 0, i8302 = 0, i8303 = 0, i8304 = 0, i8305 = 0, i8306 = 0, i8307 = 0, i8308 = 0, i8309 = 0;
+        int i8310 = 0, i8311 = 0, i8312 = 0, i8313 = 0, i8314 = 0, i8315 = 0, i8316 = 0, i8317 = 0, i8318 = 0, i8319 = 0;
+        int i8320 = 0, i8321 = 0, i8322 = 0, i8323 = 0, i8324 = 0, i8325 = 0, i8326 = 0, i8327 = 0, i8328 = 0, i8329 = 0;
+        int i8330 = 0, i8331 = 0, i8332 = 0, i8333 = 0, i8334 = 0, i8335 = 0, i8336 = 0, i8337 = 0, i8338 = 0, i8339 = 0;
+        int i8340 = 0, i8341 = 0, i8342 = 0, i8343 = 0, i8344 = 0, i8345 = 0, i8346 = 0, i8347 = 0, i8348 = 0, i8349 = 0;
+        int i8350 = 0, i8351 = 0, i8352 = 0, i8353 = 0, i8354 = 0, i8355 = 0, i8356 = 0, i8357 = 0, i8358 = 0, i8359 = 0;
+        int i8360 = 0, i8361 = 0, i8362 = 0, i8363 = 0, i8364 = 0, i8365 = 0, i8366 = 0, i8367 = 0, i8368 = 0, i8369 = 0;
+        int i8370 = 0, i8371 = 0, i8372 = 0, i8373 = 0, i8374 = 0, i8375 = 0, i8376 = 0, i8377 = 0, i8378 = 0, i8379 = 0;
+        int i8380 = 0, i8381 = 0, i8382 = 0, i8383 = 0, i8384 = 0, i8385 = 0, i8386 = 0, i8387 = 0, i8388 = 0, i8389 = 0;
+        int i8390 = 0, i8391 = 0, i8392 = 0, i8393 = 0, i8394 = 0, i8395 = 0, i8396 = 0, i8397 = 0, i8398 = 0, i8399 = 0;
+        int i8400 = 0, i8401 = 0, i8402 = 0, i8403 = 0, i8404 = 0, i8405 = 0, i8406 = 0, i8407 = 0, i8408 = 0, i8409 = 0;
+        int i8410 = 0, i8411 = 0, i8412 = 0, i8413 = 0, i8414 = 0, i8415 = 0, i8416 = 0, i8417 = 0, i8418 = 0, i8419 = 0;
+        int i8420 = 0, i8421 = 0, i8422 = 0, i8423 = 0, i8424 = 0, i8425 = 0, i8426 = 0, i8427 = 0, i8428 = 0, i8429 = 0;
+        int i8430 = 0, i8431 = 0, i8432 = 0, i8433 = 0, i8434 = 0, i8435 = 0, i8436 = 0, i8437 = 0, i8438 = 0, i8439 = 0;
+        int i8440 = 0, i8441 = 0, i8442 = 0, i8443 = 0, i8444 = 0, i8445 = 0, i8446 = 0, i8447 = 0, i8448 = 0, i8449 = 0;
+        int i8450 = 0, i8451 = 0, i8452 = 0, i8453 = 0, i8454 = 0, i8455 = 0, i8456 = 0, i8457 = 0, i8458 = 0, i8459 = 0;
+        int i8460 = 0, i8461 = 0, i8462 = 0, i8463 = 0, i8464 = 0, i8465 = 0, i8466 = 0, i8467 = 0, i8468 = 0, i8469 = 0;
+        int i8470 = 0, i8471 = 0, i8472 = 0, i8473 = 0, i8474 = 0, i8475 = 0, i8476 = 0, i8477 = 0, i8478 = 0, i8479 = 0;
+        int i8480 = 0, i8481 = 0, i8482 = 0, i8483 = 0, i8484 = 0, i8485 = 0, i8486 = 0, i8487 = 0, i8488 = 0, i8489 = 0;
+        int i8490 = 0, i8491 = 0, i8492 = 0, i8493 = 0, i8494 = 0, i8495 = 0, i8496 = 0, i8497 = 0, i8498 = 0, i8499 = 0;
+        int i8500 = 0, i8501 = 0, i8502 = 0, i8503 = 0, i8504 = 0, i8505 = 0, i8506 = 0, i8507 = 0, i8508 = 0, i8509 = 0;
+        int i8510 = 0, i8511 = 0, i8512 = 0, i8513 = 0, i8514 = 0, i8515 = 0, i8516 = 0, i8517 = 0, i8518 = 0, i8519 = 0;
+        int i8520 = 0, i8521 = 0, i8522 = 0, i8523 = 0, i8524 = 0, i8525 = 0, i8526 = 0, i8527 = 0, i8528 = 0, i8529 = 0;
+        int i8530 = 0, i8531 = 0, i8532 = 0, i8533 = 0, i8534 = 0, i8535 = 0, i8536 = 0, i8537 = 0, i8538 = 0, i8539 = 0;
+        int i8540 = 0, i8541 = 0, i8542 = 0, i8543 = 0, i8544 = 0, i8545 = 0, i8546 = 0, i8547 = 0, i8548 = 0, i8549 = 0;
+        int i8550 = 0, i8551 = 0, i8552 = 0, i8553 = 0, i8554 = 0, i8555 = 0, i8556 = 0, i8557 = 0, i8558 = 0, i8559 = 0;
+        int i8560 = 0, i8561 = 0, i8562 = 0, i8563 = 0, i8564 = 0, i8565 = 0, i8566 = 0, i8567 = 0, i8568 = 0, i8569 = 0;
+        int i8570 = 0, i8571 = 0, i8572 = 0, i8573 = 0, i8574 = 0, i8575 = 0, i8576 = 0, i8577 = 0, i8578 = 0, i8579 = 0;
+        int i8580 = 0, i8581 = 0, i8582 = 0, i8583 = 0, i8584 = 0, i8585 = 0, i8586 = 0, i8587 = 0, i8588 = 0, i8589 = 0;
+        int i8590 = 0, i8591 = 0, i8592 = 0, i8593 = 0, i8594 = 0, i8595 = 0, i8596 = 0, i8597 = 0, i8598 = 0, i8599 = 0;
+        int i8600 = 0, i8601 = 0, i8602 = 0, i8603 = 0, i8604 = 0, i8605 = 0, i8606 = 0, i8607 = 0, i8608 = 0, i8609 = 0;
+        int i8610 = 0, i8611 = 0, i8612 = 0, i8613 = 0, i8614 = 0, i8615 = 0, i8616 = 0, i8617 = 0, i8618 = 0, i8619 = 0;
+        int i8620 = 0, i8621 = 0, i8622 = 0, i8623 = 0, i8624 = 0, i8625 = 0, i8626 = 0, i8627 = 0, i8628 = 0, i8629 = 0;
+        int i8630 = 0, i8631 = 0, i8632 = 0, i8633 = 0, i8634 = 0, i8635 = 0, i8636 = 0, i8637 = 0, i8638 = 0, i8639 = 0;
+        int i8640 = 0, i8641 = 0, i8642 = 0, i8643 = 0, i8644 = 0, i8645 = 0, i8646 = 0, i8647 = 0, i8648 = 0, i8649 = 0;
+        int i8650 = 0, i8651 = 0, i8652 = 0, i8653 = 0, i8654 = 0, i8655 = 0, i8656 = 0, i8657 = 0, i8658 = 0, i8659 = 0;
+        int i8660 = 0, i8661 = 0, i8662 = 0, i8663 = 0, i8664 = 0, i8665 = 0, i8666 = 0, i8667 = 0, i8668 = 0, i8669 = 0;
+        int i8670 = 0, i8671 = 0, i8672 = 0, i8673 = 0, i8674 = 0, i8675 = 0, i8676 = 0, i8677 = 0, i8678 = 0, i8679 = 0;
+        int i8680 = 0, i8681 = 0, i8682 = 0, i8683 = 0, i8684 = 0, i8685 = 0, i8686 = 0, i8687 = 0, i8688 = 0, i8689 = 0;
+        int i8690 = 0, i8691 = 0, i8692 = 0, i8693 = 0, i8694 = 0, i8695 = 0, i8696 = 0, i8697 = 0, i8698 = 0, i8699 = 0;
+        int i8700 = 0, i8701 = 0, i8702 = 0, i8703 = 0, i8704 = 0, i8705 = 0, i8706 = 0, i8707 = 0, i8708 = 0, i8709 = 0;
+        int i8710 = 0, i8711 = 0, i8712 = 0, i8713 = 0, i8714 = 0, i8715 = 0, i8716 = 0, i8717 = 0, i8718 = 0, i8719 = 0;
+        int i8720 = 0, i8721 = 0, i8722 = 0, i8723 = 0, i8724 = 0, i8725 = 0, i8726 = 0, i8727 = 0, i8728 = 0, i8729 = 0;
+        int i8730 = 0, i8731 = 0, i8732 = 0, i8733 = 0, i8734 = 0, i8735 = 0, i8736 = 0, i8737 = 0, i8738 = 0, i8739 = 0;
+        int i8740 = 0, i8741 = 0, i8742 = 0, i8743 = 0, i8744 = 0, i8745 = 0, i8746 = 0, i8747 = 0, i8748 = 0, i8749 = 0;
+        int i8750 = 0, i8751 = 0, i8752 = 0, i8753 = 0, i8754 = 0, i8755 = 0, i8756 = 0, i8757 = 0, i8758 = 0, i8759 = 0;
+        int i8760 = 0, i8761 = 0, i8762 = 0, i8763 = 0, i8764 = 0, i8765 = 0, i8766 = 0, i8767 = 0, i8768 = 0, i8769 = 0;
+        int i8770 = 0, i8771 = 0, i8772 = 0, i8773 = 0, i8774 = 0, i8775 = 0, i8776 = 0, i8777 = 0, i8778 = 0, i8779 = 0;
+        int i8780 = 0, i8781 = 0, i8782 = 0, i8783 = 0, i8784 = 0, i8785 = 0, i8786 = 0, i8787 = 0, i8788 = 0, i8789 = 0;
+        int i8790 = 0, i8791 = 0, i8792 = 0, i8793 = 0, i8794 = 0, i8795 = 0, i8796 = 0, i8797 = 0, i8798 = 0, i8799 = 0;
+        int i8800 = 0, i8801 = 0, i8802 = 0, i8803 = 0, i8804 = 0, i8805 = 0, i8806 = 0, i8807 = 0, i8808 = 0, i8809 = 0;
+        int i8810 = 0, i8811 = 0, i8812 = 0, i8813 = 0, i8814 = 0, i8815 = 0, i8816 = 0, i8817 = 0, i8818 = 0, i8819 = 0;
+        int i8820 = 0, i8821 = 0, i8822 = 0, i8823 = 0, i8824 = 0, i8825 = 0, i8826 = 0, i8827 = 0, i8828 = 0, i8829 = 0;
+        int i8830 = 0, i8831 = 0, i8832 = 0, i8833 = 0, i8834 = 0, i8835 = 0, i8836 = 0, i8837 = 0, i8838 = 0, i8839 = 0;
+        int i8840 = 0, i8841 = 0, i8842 = 0, i8843 = 0, i8844 = 0, i8845 = 0, i8846 = 0, i8847 = 0, i8848 = 0, i8849 = 0;
+        int i8850 = 0, i8851 = 0, i8852 = 0, i8853 = 0, i8854 = 0, i8855 = 0, i8856 = 0, i8857 = 0, i8858 = 0, i8859 = 0;
+        int i8860 = 0, i8861 = 0, i8862 = 0, i8863 = 0, i8864 = 0, i8865 = 0, i8866 = 0, i8867 = 0, i8868 = 0, i8869 = 0;
+        int i8870 = 0, i8871 = 0, i8872 = 0, i8873 = 0, i8874 = 0, i8875 = 0, i8876 = 0, i8877 = 0, i8878 = 0, i8879 = 0;
+        int i8880 = 0, i8881 = 0, i8882 = 0, i8883 = 0, i8884 = 0, i8885 = 0, i8886 = 0, i8887 = 0, i8888 = 0, i8889 = 0;
+        int i8890 = 0, i8891 = 0, i8892 = 0, i8893 = 0, i8894 = 0, i8895 = 0, i8896 = 0, i8897 = 0, i8898 = 0, i8899 = 0;
+        int i8900 = 0, i8901 = 0, i8902 = 0, i8903 = 0, i8904 = 0, i8905 = 0, i8906 = 0, i8907 = 0, i8908 = 0, i8909 = 0;
+        int i8910 = 0, i8911 = 0, i8912 = 0, i8913 = 0, i8914 = 0, i8915 = 0, i8916 = 0, i8917 = 0, i8918 = 0, i8919 = 0;
+        int i8920 = 0, i8921 = 0, i8922 = 0, i8923 = 0, i8924 = 0, i8925 = 0, i8926 = 0, i8927 = 0, i8928 = 0, i8929 = 0;
+        int i8930 = 0, i8931 = 0, i8932 = 0, i8933 = 0, i8934 = 0, i8935 = 0, i8936 = 0, i8937 = 0, i8938 = 0, i8939 = 0;
+        int i8940 = 0, i8941 = 0, i8942 = 0, i8943 = 0, i8944 = 0, i8945 = 0, i8946 = 0, i8947 = 0, i8948 = 0, i8949 = 0;
+        int i8950 = 0, i8951 = 0, i8952 = 0, i8953 = 0, i8954 = 0, i8955 = 0, i8956 = 0, i8957 = 0, i8958 = 0, i8959 = 0;
+        int i8960 = 0, i8961 = 0, i8962 = 0, i8963 = 0, i8964 = 0, i8965 = 0, i8966 = 0, i8967 = 0, i8968 = 0, i8969 = 0;
+        int i8970 = 0, i8971 = 0, i8972 = 0, i8973 = 0, i8974 = 0, i8975 = 0, i8976 = 0, i8977 = 0, i8978 = 0, i8979 = 0;
+        int i8980 = 0, i8981 = 0, i8982 = 0, i8983 = 0, i8984 = 0, i8985 = 0, i8986 = 0, i8987 = 0, i8988 = 0, i8989 = 0;
+        int i8990 = 0, i8991 = 0, i8992 = 0, i8993 = 0, i8994 = 0, i8995 = 0, i8996 = 0, i8997 = 0, i8998 = 0, i8999 = 0;
+        int i9000 = 0, i9001 = 0, i9002 = 0, i9003 = 0, i9004 = 0, i9005 = 0, i9006 = 0, i9007 = 0, i9008 = 0, i9009 = 0;
+        int i9010 = 0, i9011 = 0, i9012 = 0, i9013 = 0, i9014 = 0, i9015 = 0, i9016 = 0, i9017 = 0, i9018 = 0, i9019 = 0;
+        int i9020 = 0, i9021 = 0, i9022 = 0, i9023 = 0, i9024 = 0, i9025 = 0, i9026 = 0, i9027 = 0, i9028 = 0, i9029 = 0;
+        int i9030 = 0, i9031 = 0, i9032 = 0, i9033 = 0, i9034 = 0, i9035 = 0, i9036 = 0, i9037 = 0, i9038 = 0, i9039 = 0;
+        int i9040 = 0, i9041 = 0, i9042 = 0, i9043 = 0, i9044 = 0, i9045 = 0, i9046 = 0, i9047 = 0, i9048 = 0, i9049 = 0;
+        int i9050 = 0, i9051 = 0, i9052 = 0, i9053 = 0, i9054 = 0, i9055 = 0, i9056 = 0, i9057 = 0, i9058 = 0, i9059 = 0;
+        int i9060 = 0, i9061 = 0, i9062 = 0, i9063 = 0, i9064 = 0, i9065 = 0, i9066 = 0, i9067 = 0, i9068 = 0, i9069 = 0;
+        int i9070 = 0, i9071 = 0, i9072 = 0, i9073 = 0, i9074 = 0, i9075 = 0, i9076 = 0, i9077 = 0, i9078 = 0, i9079 = 0;
+        int i9080 = 0, i9081 = 0, i9082 = 0, i9083 = 0, i9084 = 0, i9085 = 0, i9086 = 0, i9087 = 0, i9088 = 0, i9089 = 0;
+        int i9090 = 0, i9091 = 0, i9092 = 0, i9093 = 0, i9094 = 0, i9095 = 0, i9096 = 0, i9097 = 0, i9098 = 0, i9099 = 0;
+        int i9100 = 0, i9101 = 0, i9102 = 0, i9103 = 0, i9104 = 0, i9105 = 0, i9106 = 0, i9107 = 0, i9108 = 0, i9109 = 0;
+        int i9110 = 0, i9111 = 0, i9112 = 0, i9113 = 0, i9114 = 0, i9115 = 0, i9116 = 0, i9117 = 0, i9118 = 0, i9119 = 0;
+        int i9120 = 0, i9121 = 0, i9122 = 0, i9123 = 0, i9124 = 0, i9125 = 0, i9126 = 0, i9127 = 0, i9128 = 0, i9129 = 0;
+        int i9130 = 0, i9131 = 0, i9132 = 0, i9133 = 0, i9134 = 0, i9135 = 0, i9136 = 0, i9137 = 0, i9138 = 0, i9139 = 0;
+        int i9140 = 0, i9141 = 0, i9142 = 0, i9143 = 0, i9144 = 0, i9145 = 0, i9146 = 0, i9147 = 0, i9148 = 0, i9149 = 0;
+        int i9150 = 0, i9151 = 0, i9152 = 0, i9153 = 0, i9154 = 0, i9155 = 0, i9156 = 0, i9157 = 0, i9158 = 0, i9159 = 0;
+        int i9160 = 0, i9161 = 0, i9162 = 0, i9163 = 0, i9164 = 0, i9165 = 0, i9166 = 0, i9167 = 0, i9168 = 0, i9169 = 0;
+        int i9170 = 0, i9171 = 0, i9172 = 0, i9173 = 0, i9174 = 0, i9175 = 0, i9176 = 0, i9177 = 0, i9178 = 0, i9179 = 0;
+        int i9180 = 0, i9181 = 0, i9182 = 0, i9183 = 0, i9184 = 0, i9185 = 0, i9186 = 0, i9187 = 0, i9188 = 0, i9189 = 0;
+        int i9190 = 0, i9191 = 0, i9192 = 0, i9193 = 0, i9194 = 0, i9195 = 0, i9196 = 0, i9197 = 0, i9198 = 0, i9199 = 0;
+        int i9200 = 0, i9201 = 0, i9202 = 0, i9203 = 0, i9204 = 0, i9205 = 0, i9206 = 0, i9207 = 0, i9208 = 0, i9209 = 0;
+        int i9210 = 0, i9211 = 0, i9212 = 0, i9213 = 0, i9214 = 0, i9215 = 0, i9216 = 0, i9217 = 0, i9218 = 0, i9219 = 0;
+        int i9220 = 0, i9221 = 0, i9222 = 0, i9223 = 0, i9224 = 0, i9225 = 0, i9226 = 0, i9227 = 0, i9228 = 0, i9229 = 0;
+        int i9230 = 0, i9231 = 0, i9232 = 0, i9233 = 0, i9234 = 0, i9235 = 0, i9236 = 0, i9237 = 0, i9238 = 0, i9239 = 0;
+        int i9240 = 0, i9241 = 0, i9242 = 0, i9243 = 0, i9244 = 0, i9245 = 0, i9246 = 0, i9247 = 0, i9248 = 0, i9249 = 0;
+        int i9250 = 0, i9251 = 0, i9252 = 0, i9253 = 0, i9254 = 0, i9255 = 0, i9256 = 0, i9257 = 0, i9258 = 0, i9259 = 0;
+        int i9260 = 0, i9261 = 0, i9262 = 0, i9263 = 0, i9264 = 0, i9265 = 0, i9266 = 0, i9267 = 0, i9268 = 0, i9269 = 0;
+        int i9270 = 0, i9271 = 0, i9272 = 0, i9273 = 0, i9274 = 0, i9275 = 0, i9276 = 0, i9277 = 0, i9278 = 0, i9279 = 0;
+        int i9280 = 0, i9281 = 0, i9282 = 0, i9283 = 0, i9284 = 0, i9285 = 0, i9286 = 0, i9287 = 0, i9288 = 0, i9289 = 0;
+        int i9290 = 0, i9291 = 0, i9292 = 0, i9293 = 0, i9294 = 0, i9295 = 0, i9296 = 0, i9297 = 0, i9298 = 0, i9299 = 0;
+        int i9300 = 0, i9301 = 0, i9302 = 0, i9303 = 0, i9304 = 0, i9305 = 0, i9306 = 0, i9307 = 0, i9308 = 0, i9309 = 0;
+        int i9310 = 0, i9311 = 0, i9312 = 0, i9313 = 0, i9314 = 0, i9315 = 0, i9316 = 0, i9317 = 0, i9318 = 0, i9319 = 0;
+        int i9320 = 0, i9321 = 0, i9322 = 0, i9323 = 0, i9324 = 0, i9325 = 0, i9326 = 0, i9327 = 0, i9328 = 0, i9329 = 0;
+        int i9330 = 0, i9331 = 0, i9332 = 0, i9333 = 0, i9334 = 0, i9335 = 0, i9336 = 0, i9337 = 0, i9338 = 0, i9339 = 0;
+        int i9340 = 0, i9341 = 0, i9342 = 0, i9343 = 0, i9344 = 0, i9345 = 0, i9346 = 0, i9347 = 0, i9348 = 0, i9349 = 0;
+        int i9350 = 0, i9351 = 0, i9352 = 0, i9353 = 0, i9354 = 0, i9355 = 0, i9356 = 0, i9357 = 0, i9358 = 0, i9359 = 0;
+        int i9360 = 0, i9361 = 0, i9362 = 0, i9363 = 0, i9364 = 0, i9365 = 0, i9366 = 0, i9367 = 0, i9368 = 0, i9369 = 0;
+        int i9370 = 0, i9371 = 0, i9372 = 0, i9373 = 0, i9374 = 0, i9375 = 0, i9376 = 0, i9377 = 0, i9378 = 0, i9379 = 0;
+        int i9380 = 0, i9381 = 0, i9382 = 0, i9383 = 0, i9384 = 0, i9385 = 0, i9386 = 0, i9387 = 0, i9388 = 0, i9389 = 0;
+        int i9390 = 0, i9391 = 0, i9392 = 0, i9393 = 0, i9394 = 0, i9395 = 0, i9396 = 0, i9397 = 0, i9398 = 0, i9399 = 0;
+        int i9400 = 0, i9401 = 0, i9402 = 0, i9403 = 0, i9404 = 0, i9405 = 0, i9406 = 0, i9407 = 0, i9408 = 0, i9409 = 0;
+        int i9410 = 0, i9411 = 0, i9412 = 0, i9413 = 0, i9414 = 0, i9415 = 0, i9416 = 0, i9417 = 0, i9418 = 0, i9419 = 0;
+        int i9420 = 0, i9421 = 0, i9422 = 0, i9423 = 0, i9424 = 0, i9425 = 0, i9426 = 0, i9427 = 0, i9428 = 0, i9429 = 0;
+        int i9430 = 0, i9431 = 0, i9432 = 0, i9433 = 0, i9434 = 0, i9435 = 0, i9436 = 0, i9437 = 0, i9438 = 0, i9439 = 0;
+        int i9440 = 0, i9441 = 0, i9442 = 0, i9443 = 0, i9444 = 0, i9445 = 0, i9446 = 0, i9447 = 0, i9448 = 0, i9449 = 0;
+        int i9450 = 0, i9451 = 0, i9452 = 0, i9453 = 0, i9454 = 0, i9455 = 0, i9456 = 0, i9457 = 0, i9458 = 0, i9459 = 0;
+        int i9460 = 0, i9461 = 0, i9462 = 0, i9463 = 0, i9464 = 0, i9465 = 0, i9466 = 0, i9467 = 0, i9468 = 0, i9469 = 0;
+        int i9470 = 0, i9471 = 0, i9472 = 0, i9473 = 0, i9474 = 0, i9475 = 0, i9476 = 0, i9477 = 0, i9478 = 0, i9479 = 0;
+        int i9480 = 0, i9481 = 0, i9482 = 0, i9483 = 0, i9484 = 0, i9485 = 0, i9486 = 0, i9487 = 0, i9488 = 0, i9489 = 0;
+        int i9490 = 0, i9491 = 0, i9492 = 0, i9493 = 0, i9494 = 0, i9495 = 0, i9496 = 0, i9497 = 0, i9498 = 0, i9499 = 0;
+        int i9500 = 0, i9501 = 0, i9502 = 0, i9503 = 0, i9504 = 0, i9505 = 0, i9506 = 0, i9507 = 0, i9508 = 0, i9509 = 0;
+        int i9510 = 0, i9511 = 0, i9512 = 0, i9513 = 0, i9514 = 0, i9515 = 0, i9516 = 0, i9517 = 0, i9518 = 0, i9519 = 0;
+        int i9520 = 0, i9521 = 0, i9522 = 0, i9523 = 0, i9524 = 0, i9525 = 0, i9526 = 0, i9527 = 0, i9528 = 0, i9529 = 0;
+        int i9530 = 0, i9531 = 0, i9532 = 0, i9533 = 0, i9534 = 0, i9535 = 0, i9536 = 0, i9537 = 0, i9538 = 0, i9539 = 0;
+        int i9540 = 0, i9541 = 0, i9542 = 0, i9543 = 0, i9544 = 0, i9545 = 0, i9546 = 0, i9547 = 0, i9548 = 0, i9549 = 0;
+        int i9550 = 0, i9551 = 0, i9552 = 0, i9553 = 0, i9554 = 0, i9555 = 0, i9556 = 0, i9557 = 0, i9558 = 0, i9559 = 0;
+        int i9560 = 0, i9561 = 0, i9562 = 0, i9563 = 0, i9564 = 0, i9565 = 0, i9566 = 0, i9567 = 0, i9568 = 0, i9569 = 0;
+        int i9570 = 0, i9571 = 0, i9572 = 0, i9573 = 0, i9574 = 0, i9575 = 0, i9576 = 0, i9577 = 0, i9578 = 0, i9579 = 0;
+        int i9580 = 0, i9581 = 0, i9582 = 0, i9583 = 0, i9584 = 0, i9585 = 0, i9586 = 0, i9587 = 0, i9588 = 0, i9589 = 0;
+        int i9590 = 0, i9591 = 0, i9592 = 0, i9593 = 0, i9594 = 0, i9595 = 0, i9596 = 0, i9597 = 0, i9598 = 0, i9599 = 0;
+        int i9600 = 0, i9601 = 0, i9602 = 0, i9603 = 0, i9604 = 0, i9605 = 0, i9606 = 0, i9607 = 0, i9608 = 0, i9609 = 0;
+        int i9610 = 0, i9611 = 0, i9612 = 0, i9613 = 0, i9614 = 0, i9615 = 0, i9616 = 0, i9617 = 0, i9618 = 0, i9619 = 0;
+        int i9620 = 0, i9621 = 0, i9622 = 0, i9623 = 0, i9624 = 0, i9625 = 0, i9626 = 0, i9627 = 0, i9628 = 0, i9629 = 0;
+        int i9630 = 0, i9631 = 0, i9632 = 0, i9633 = 0, i9634 = 0, i9635 = 0, i9636 = 0, i9637 = 0, i9638 = 0, i9639 = 0;
+        int i9640 = 0, i9641 = 0, i9642 = 0, i9643 = 0, i9644 = 0, i9645 = 0, i9646 = 0, i9647 = 0, i9648 = 0, i9649 = 0;
+        int i9650 = 0, i9651 = 0, i9652 = 0, i9653 = 0, i9654 = 0, i9655 = 0, i9656 = 0, i9657 = 0, i9658 = 0, i9659 = 0;
+        int i9660 = 0, i9661 = 0, i9662 = 0, i9663 = 0, i9664 = 0, i9665 = 0, i9666 = 0, i9667 = 0, i9668 = 0, i9669 = 0;
+        int i9670 = 0, i9671 = 0, i9672 = 0, i9673 = 0, i9674 = 0, i9675 = 0, i9676 = 0, i9677 = 0, i9678 = 0, i9679 = 0;
+        int i9680 = 0, i9681 = 0, i9682 = 0, i9683 = 0, i9684 = 0, i9685 = 0, i9686 = 0, i9687 = 0, i9688 = 0, i9689 = 0;
+        int i9690 = 0, i9691 = 0, i9692 = 0, i9693 = 0, i9694 = 0, i9695 = 0, i9696 = 0, i9697 = 0, i9698 = 0, i9699 = 0;
+        int i9700 = 0, i9701 = 0, i9702 = 0, i9703 = 0, i9704 = 0, i9705 = 0, i9706 = 0, i9707 = 0, i9708 = 0, i9709 = 0;
+        int i9710 = 0, i9711 = 0, i9712 = 0, i9713 = 0, i9714 = 0, i9715 = 0, i9716 = 0, i9717 = 0, i9718 = 0, i9719 = 0;
+        int i9720 = 0, i9721 = 0, i9722 = 0, i9723 = 0, i9724 = 0, i9725 = 0, i9726 = 0, i9727 = 0, i9728 = 0, i9729 = 0;
+        int i9730 = 0, i9731 = 0, i9732 = 0, i9733 = 0, i9734 = 0, i9735 = 0, i9736 = 0, i9737 = 0, i9738 = 0, i9739 = 0;
+        int i9740 = 0, i9741 = 0, i9742 = 0, i9743 = 0, i9744 = 0, i9745 = 0, i9746 = 0, i9747 = 0, i9748 = 0, i9749 = 0;
+        int i9750 = 0, i9751 = 0, i9752 = 0, i9753 = 0, i9754 = 0, i9755 = 0, i9756 = 0, i9757 = 0, i9758 = 0, i9759 = 0;
+        int i9760 = 0, i9761 = 0, i9762 = 0, i9763 = 0, i9764 = 0, i9765 = 0, i9766 = 0, i9767 = 0, i9768 = 0, i9769 = 0;
+        int i9770 = 0, i9771 = 0, i9772 = 0, i9773 = 0, i9774 = 0, i9775 = 0, i9776 = 0, i9777 = 0, i9778 = 0, i9779 = 0;
+        int i9780 = 0, i9781 = 0, i9782 = 0, i9783 = 0, i9784 = 0, i9785 = 0, i9786 = 0, i9787 = 0, i9788 = 0, i9789 = 0;
+        int i9790 = 0, i9791 = 0, i9792 = 0, i9793 = 0, i9794 = 0, i9795 = 0, i9796 = 0, i9797 = 0, i9798 = 0, i9799 = 0;
+        int i9800 = 0, i9801 = 0, i9802 = 0, i9803 = 0, i9804 = 0, i9805 = 0, i9806 = 0, i9807 = 0, i9808 = 0, i9809 = 0;
+        int i9810 = 0, i9811 = 0, i9812 = 0, i9813 = 0, i9814 = 0, i9815 = 0, i9816 = 0, i9817 = 0, i9818 = 0, i9819 = 0;
+        int i9820 = 0, i9821 = 0, i9822 = 0, i9823 = 0, i9824 = 0, i9825 = 0, i9826 = 0, i9827 = 0, i9828 = 0, i9829 = 0;
+        int i9830 = 0, i9831 = 0, i9832 = 0, i9833 = 0, i9834 = 0, i9835 = 0, i9836 = 0, i9837 = 0, i9838 = 0, i9839 = 0;
+        int i9840 = 0, i9841 = 0, i9842 = 0, i9843 = 0, i9844 = 0, i9845 = 0, i9846 = 0, i9847 = 0, i9848 = 0, i9849 = 0;
+        int i9850 = 0, i9851 = 0, i9852 = 0, i9853 = 0, i9854 = 0, i9855 = 0, i9856 = 0, i9857 = 0, i9858 = 0, i9859 = 0;
+        int i9860 = 0, i9861 = 0, i9862 = 0, i9863 = 0, i9864 = 0, i9865 = 0, i9866 = 0, i9867 = 0, i9868 = 0, i9869 = 0;
+        int i9870 = 0, i9871 = 0, i9872 = 0, i9873 = 0, i9874 = 0, i9875 = 0, i9876 = 0, i9877 = 0, i9878 = 0, i9879 = 0;
+        int i9880 = 0, i9881 = 0, i9882 = 0, i9883 = 0, i9884 = 0, i9885 = 0, i9886 = 0, i9887 = 0, i9888 = 0, i9889 = 0;
+        int i9890 = 0, i9891 = 0, i9892 = 0, i9893 = 0, i9894 = 0, i9895 = 0, i9896 = 0, i9897 = 0, i9898 = 0, i9899 = 0;
+        int i9900 = 0, i9901 = 0, i9902 = 0, i9903 = 0, i9904 = 0, i9905 = 0, i9906 = 0, i9907 = 0, i9908 = 0, i9909 = 0;
+        int i9910 = 0, i9911 = 0, i9912 = 0, i9913 = 0, i9914 = 0, i9915 = 0, i9916 = 0, i9917 = 0, i9918 = 0, i9919 = 0;
+        int i9920 = 0, i9921 = 0, i9922 = 0, i9923 = 0, i9924 = 0, i9925 = 0, i9926 = 0, i9927 = 0, i9928 = 0, i9929 = 0;
+        int i9930 = 0, i9931 = 0, i9932 = 0, i9933 = 0, i9934 = 0, i9935 = 0, i9936 = 0, i9937 = 0, i9938 = 0, i9939 = 0;
+        int i9940 = 0, i9941 = 0, i9942 = 0, i9943 = 0, i9944 = 0, i9945 = 0, i9946 = 0, i9947 = 0, i9948 = 0, i9949 = 0;
+        int i9950 = 0, i9951 = 0, i9952 = 0, i9953 = 0, i9954 = 0, i9955 = 0, i9956 = 0, i9957 = 0, i9958 = 0, i9959 = 0;
+        int i9960 = 0, i9961 = 0, i9962 = 0, i9963 = 0, i9964 = 0, i9965 = 0, i9966 = 0, i9967 = 0, i9968 = 0, i9969 = 0;
+        int i9970 = 0, i9971 = 0, i9972 = 0, i9973 = 0, i9974 = 0, i9975 = 0, i9976 = 0, i9977 = 0, i9978 = 0, i9979 = 0;
+        int i9980 = 0, i9981 = 0, i9982 = 0, i9983 = 0, i9984 = 0, i9985 = 0, i9986 = 0, i9987 = 0, i9988 = 0, i9989 = 0;
+        int i9990 = 0, i9991 = 0, i9992 = 0, i9993 = 0, i9994 = 0, i9995 = 0, i9996 = 0, i9997 = 0, i9998 = 0, i9999 = 0;
+        int i10000 = 0, i10001 = 0, i10002 = 0, i10003 = 0, i10004 = 0, i10005 = 0, i10006 = 0, i10007 = 0, i10008 = 0, i10009 = 0;
+        int i10010 = 0, i10011 = 0, i10012 = 0, i10013 = 0, i10014 = 0, i10015 = 0, i10016 = 0, i10017 = 0, i10018 = 0, i10019 = 0;
+        int i10020 = 0, i10021 = 0, i10022 = 0, i10023 = 0, i10024 = 0, i10025 = 0, i10026 = 0, i10027 = 0, i10028 = 0, i10029 = 0;
+        int i10030 = 0, i10031 = 0, i10032 = 0, i10033 = 0, i10034 = 0, i10035 = 0, i10036 = 0, i10037 = 0, i10038 = 0, i10039 = 0;
+        int i10040 = 0, i10041 = 0, i10042 = 0, i10043 = 0, i10044 = 0, i10045 = 0, i10046 = 0, i10047 = 0, i10048 = 0, i10049 = 0;
+        int i10050 = 0, i10051 = 0, i10052 = 0, i10053 = 0, i10054 = 0, i10055 = 0, i10056 = 0, i10057 = 0, i10058 = 0, i10059 = 0;
+        int i10060 = 0, i10061 = 0, i10062 = 0, i10063 = 0, i10064 = 0, i10065 = 0, i10066 = 0, i10067 = 0, i10068 = 0, i10069 = 0;
+        int i10070 = 0, i10071 = 0, i10072 = 0, i10073 = 0, i10074 = 0, i10075 = 0, i10076 = 0, i10077 = 0, i10078 = 0, i10079 = 0;
+        int i10080 = 0, i10081 = 0, i10082 = 0, i10083 = 0, i10084 = 0, i10085 = 0, i10086 = 0, i10087 = 0, i10088 = 0, i10089 = 0;
+        int i10090 = 0, i10091 = 0, i10092 = 0, i10093 = 0, i10094 = 0, i10095 = 0, i10096 = 0, i10097 = 0, i10098 = 0, i10099 = 0;
+        int i10100 = 0, i10101 = 0, i10102 = 0, i10103 = 0, i10104 = 0, i10105 = 0, i10106 = 0, i10107 = 0, i10108 = 0, i10109 = 0;
+        int i10110 = 0, i10111 = 0, i10112 = 0, i10113 = 0, i10114 = 0, i10115 = 0, i10116 = 0, i10117 = 0, i10118 = 0, i10119 = 0;
+        int i10120 = 0, i10121 = 0, i10122 = 0, i10123 = 0, i10124 = 0, i10125 = 0, i10126 = 0, i10127 = 0, i10128 = 0, i10129 = 0;
+        int i10130 = 0, i10131 = 0, i10132 = 0, i10133 = 0, i10134 = 0, i10135 = 0, i10136 = 0, i10137 = 0, i10138 = 0, i10139 = 0;
+        int i10140 = 0, i10141 = 0, i10142 = 0, i10143 = 0, i10144 = 0, i10145 = 0, i10146 = 0, i10147 = 0, i10148 = 0, i10149 = 0;
+        int i10150 = 0, i10151 = 0, i10152 = 0, i10153 = 0, i10154 = 0, i10155 = 0, i10156 = 0, i10157 = 0, i10158 = 0, i10159 = 0;
+        int i10160 = 0, i10161 = 0, i10162 = 0, i10163 = 0, i10164 = 0, i10165 = 0, i10166 = 0, i10167 = 0, i10168 = 0, i10169 = 0;
+        int i10170 = 0, i10171 = 0, i10172 = 0, i10173 = 0, i10174 = 0, i10175 = 0, i10176 = 0, i10177 = 0, i10178 = 0, i10179 = 0;
+        int i10180 = 0, i10181 = 0, i10182 = 0, i10183 = 0, i10184 = 0, i10185 = 0, i10186 = 0, i10187 = 0, i10188 = 0, i10189 = 0;
+        int i10190 = 0, i10191 = 0, i10192 = 0, i10193 = 0, i10194 = 0, i10195 = 0, i10196 = 0, i10197 = 0, i10198 = 0, i10199 = 0;
+        int i10200 = 0, i10201 = 0, i10202 = 0, i10203 = 0, i10204 = 0, i10205 = 0, i10206 = 0, i10207 = 0, i10208 = 0, i10209 = 0;
+        int i10210 = 0, i10211 = 0, i10212 = 0, i10213 = 0, i10214 = 0, i10215 = 0, i10216 = 0, i10217 = 0, i10218 = 0, i10219 = 0;
+        int i10220 = 0, i10221 = 0, i10222 = 0, i10223 = 0, i10224 = 0, i10225 = 0, i10226 = 0, i10227 = 0, i10228 = 0, i10229 = 0;
+        int i10230 = 0, i10231 = 0, i10232 = 0, i10233 = 0, i10234 = 0, i10235 = 0, i10236 = 0, i10237 = 0, i10238 = 0, i10239 = 0;
+        int i10240 = 0, i10241 = 0, i10242 = 0, i10243 = 0, i10244 = 0, i10245 = 0, i10246 = 0, i10247 = 0, i10248 = 0, i10249 = 0;
+        int i10250 = 0, i10251 = 0, i10252 = 0, i10253 = 0, i10254 = 0, i10255 = 0, i10256 = 0, i10257 = 0, i10258 = 0, i10259 = 0;
+        int i10260 = 0, i10261 = 0, i10262 = 0, i10263 = 0, i10264 = 0, i10265 = 0, i10266 = 0, i10267 = 0, i10268 = 0, i10269 = 0;
+        int i10270 = 0, i10271 = 0, i10272 = 0, i10273 = 0, i10274 = 0, i10275 = 0, i10276 = 0, i10277 = 0, i10278 = 0, i10279 = 0;
+        int i10280 = 0, i10281 = 0, i10282 = 0, i10283 = 0, i10284 = 0, i10285 = 0, i10286 = 0, i10287 = 0, i10288 = 0, i10289 = 0;
+        int i10290 = 0, i10291 = 0, i10292 = 0, i10293 = 0, i10294 = 0, i10295 = 0, i10296 = 0, i10297 = 0, i10298 = 0, i10299 = 0;
+        int i10300 = 0, i10301 = 0, i10302 = 0, i10303 = 0, i10304 = 0, i10305 = 0, i10306 = 0, i10307 = 0, i10308 = 0, i10309 = 0;
+        int i10310 = 0, i10311 = 0, i10312 = 0, i10313 = 0, i10314 = 0, i10315 = 0, i10316 = 0, i10317 = 0, i10318 = 0, i10319 = 0;
+        int i10320 = 0, i10321 = 0, i10322 = 0, i10323 = 0, i10324 = 0, i10325 = 0, i10326 = 0, i10327 = 0, i10328 = 0, i10329 = 0;
+        int i10330 = 0, i10331 = 0, i10332 = 0, i10333 = 0, i10334 = 0, i10335 = 0, i10336 = 0, i10337 = 0, i10338 = 0, i10339 = 0;
+        int i10340 = 0, i10341 = 0, i10342 = 0, i10343 = 0, i10344 = 0, i10345 = 0, i10346 = 0, i10347 = 0, i10348 = 0, i10349 = 0;
+        int i10350 = 0, i10351 = 0, i10352 = 0, i10353 = 0, i10354 = 0, i10355 = 0, i10356 = 0, i10357 = 0, i10358 = 0, i10359 = 0;
+        int i10360 = 0, i10361 = 0, i10362 = 0, i10363 = 0, i10364 = 0, i10365 = 0, i10366 = 0, i10367 = 0, i10368 = 0, i10369 = 0;
+        int i10370 = 0, i10371 = 0, i10372 = 0, i10373 = 0, i10374 = 0, i10375 = 0, i10376 = 0, i10377 = 0, i10378 = 0, i10379 = 0;
+        int i10380 = 0, i10381 = 0, i10382 = 0, i10383 = 0, i10384 = 0, i10385 = 0, i10386 = 0, i10387 = 0, i10388 = 0, i10389 = 0;
+        int i10390 = 0, i10391 = 0, i10392 = 0, i10393 = 0, i10394 = 0, i10395 = 0, i10396 = 0, i10397 = 0, i10398 = 0, i10399 = 0;
+        int i10400 = 0, i10401 = 0, i10402 = 0, i10403 = 0, i10404 = 0, i10405 = 0, i10406 = 0, i10407 = 0, i10408 = 0, i10409 = 0;
+        int i10410 = 0, i10411 = 0, i10412 = 0, i10413 = 0, i10414 = 0, i10415 = 0, i10416 = 0, i10417 = 0, i10418 = 0, i10419 = 0;
+        int i10420 = 0, i10421 = 0, i10422 = 0, i10423 = 0, i10424 = 0, i10425 = 0, i10426 = 0, i10427 = 0, i10428 = 0, i10429 = 0;
+        int i10430 = 0, i10431 = 0, i10432 = 0, i10433 = 0, i10434 = 0, i10435 = 0, i10436 = 0, i10437 = 0, i10438 = 0, i10439 = 0;
+        int i10440 = 0, i10441 = 0, i10442 = 0, i10443 = 0, i10444 = 0, i10445 = 0, i10446 = 0, i10447 = 0, i10448 = 0, i10449 = 0;
+        int i10450 = 0, i10451 = 0, i10452 = 0, i10453 = 0, i10454 = 0, i10455 = 0, i10456 = 0, i10457 = 0, i10458 = 0, i10459 = 0;
+        int i10460 = 0, i10461 = 0, i10462 = 0, i10463 = 0, i10464 = 0, i10465 = 0, i10466 = 0, i10467 = 0, i10468 = 0, i10469 = 0;
+        int i10470 = 0, i10471 = 0, i10472 = 0, i10473 = 0, i10474 = 0, i10475 = 0, i10476 = 0, i10477 = 0, i10478 = 0, i10479 = 0;
+        int i10480 = 0, i10481 = 0, i10482 = 0, i10483 = 0, i10484 = 0, i10485 = 0, i10486 = 0, i10487 = 0, i10488 = 0, i10489 = 0;
+        int i10490 = 0, i10491 = 0, i10492 = 0, i10493 = 0, i10494 = 0, i10495 = 0, i10496 = 0, i10497 = 0, i10498 = 0, i10499 = 0;
+        int i10500 = 0, i10501 = 0, i10502 = 0, i10503 = 0, i10504 = 0, i10505 = 0, i10506 = 0, i10507 = 0, i10508 = 0, i10509 = 0;
+        int i10510 = 0, i10511 = 0, i10512 = 0, i10513 = 0, i10514 = 0, i10515 = 0, i10516 = 0, i10517 = 0, i10518 = 0, i10519 = 0;
+        int i10520 = 0, i10521 = 0, i10522 = 0, i10523 = 0, i10524 = 0, i10525 = 0, i10526 = 0, i10527 = 0, i10528 = 0, i10529 = 0;
+        int i10530 = 0, i10531 = 0, i10532 = 0, i10533 = 0, i10534 = 0, i10535 = 0, i10536 = 0, i10537 = 0, i10538 = 0, i10539 = 0;
+        int i10540 = 0, i10541 = 0, i10542 = 0, i10543 = 0, i10544 = 0, i10545 = 0, i10546 = 0, i10547 = 0, i10548 = 0, i10549 = 0;
+        int i10550 = 0, i10551 = 0, i10552 = 0, i10553 = 0, i10554 = 0, i10555 = 0, i10556 = 0, i10557 = 0, i10558 = 0, i10559 = 0;
+        int i10560 = 0, i10561 = 0, i10562 = 0, i10563 = 0, i10564 = 0, i10565 = 0, i10566 = 0, i10567 = 0, i10568 = 0, i10569 = 0;
+        int i10570 = 0, i10571 = 0, i10572 = 0, i10573 = 0, i10574 = 0, i10575 = 0, i10576 = 0, i10577 = 0, i10578 = 0, i10579 = 0;
+        int i10580 = 0, i10581 = 0, i10582 = 0, i10583 = 0, i10584 = 0, i10585 = 0, i10586 = 0, i10587 = 0, i10588 = 0, i10589 = 0;
+        int i10590 = 0, i10591 = 0, i10592 = 0, i10593 = 0, i10594 = 0, i10595 = 0, i10596 = 0, i10597 = 0, i10598 = 0, i10599 = 0;
+        int i10600 = 0, i10601 = 0, i10602 = 0, i10603 = 0, i10604 = 0, i10605 = 0, i10606 = 0, i10607 = 0, i10608 = 0, i10609 = 0;
+        int i10610 = 0, i10611 = 0, i10612 = 0, i10613 = 0, i10614 = 0, i10615 = 0, i10616 = 0, i10617 = 0, i10618 = 0, i10619 = 0;
+        int i10620 = 0, i10621 = 0, i10622 = 0, i10623 = 0, i10624 = 0, i10625 = 0, i10626 = 0, i10627 = 0, i10628 = 0, i10629 = 0;
+        int i10630 = 0, i10631 = 0, i10632 = 0, i10633 = 0, i10634 = 0, i10635 = 0, i10636 = 0, i10637 = 0, i10638 = 0, i10639 = 0;
+        int i10640 = 0, i10641 = 0, i10642 = 0, i10643 = 0, i10644 = 0, i10645 = 0, i10646 = 0, i10647 = 0, i10648 = 0, i10649 = 0;
+        int i10650 = 0, i10651 = 0, i10652 = 0, i10653 = 0, i10654 = 0, i10655 = 0, i10656 = 0, i10657 = 0, i10658 = 0, i10659 = 0;
+        int i10660 = 0, i10661 = 0, i10662 = 0, i10663 = 0, i10664 = 0, i10665 = 0, i10666 = 0, i10667 = 0, i10668 = 0, i10669 = 0;
+        int i10670 = 0, i10671 = 0, i10672 = 0, i10673 = 0, i10674 = 0, i10675 = 0, i10676 = 0, i10677 = 0, i10678 = 0, i10679 = 0;
+        int i10680 = 0, i10681 = 0, i10682 = 0, i10683 = 0, i10684 = 0, i10685 = 0, i10686 = 0, i10687 = 0, i10688 = 0, i10689 = 0;
+        int i10690 = 0, i10691 = 0, i10692 = 0, i10693 = 0, i10694 = 0, i10695 = 0, i10696 = 0, i10697 = 0, i10698 = 0, i10699 = 0;
+        int i10700 = 0, i10701 = 0, i10702 = 0, i10703 = 0, i10704 = 0, i10705 = 0, i10706 = 0, i10707 = 0, i10708 = 0, i10709 = 0;
+        int i10710 = 0, i10711 = 0, i10712 = 0, i10713 = 0, i10714 = 0, i10715 = 0, i10716 = 0, i10717 = 0, i10718 = 0, i10719 = 0;
+        int i10720 = 0, i10721 = 0, i10722 = 0, i10723 = 0, i10724 = 0, i10725 = 0, i10726 = 0, i10727 = 0, i10728 = 0, i10729 = 0;
+        int i10730 = 0, i10731 = 0, i10732 = 0, i10733 = 0, i10734 = 0, i10735 = 0, i10736 = 0, i10737 = 0, i10738 = 0, i10739 = 0;
+        int i10740 = 0, i10741 = 0, i10742 = 0, i10743 = 0, i10744 = 0, i10745 = 0, i10746 = 0, i10747 = 0, i10748 = 0, i10749 = 0;
+        int i10750 = 0, i10751 = 0, i10752 = 0, i10753 = 0, i10754 = 0, i10755 = 0, i10756 = 0, i10757 = 0, i10758 = 0, i10759 = 0;
+        int i10760 = 0, i10761 = 0, i10762 = 0, i10763 = 0, i10764 = 0, i10765 = 0, i10766 = 0, i10767 = 0, i10768 = 0, i10769 = 0;
+        int i10770 = 0, i10771 = 0, i10772 = 0, i10773 = 0, i10774 = 0, i10775 = 0, i10776 = 0, i10777 = 0, i10778 = 0, i10779 = 0;
+        int i10780 = 0, i10781 = 0, i10782 = 0, i10783 = 0, i10784 = 0, i10785 = 0, i10786 = 0, i10787 = 0, i10788 = 0, i10789 = 0;
+        int i10790 = 0, i10791 = 0, i10792 = 0, i10793 = 0, i10794 = 0, i10795 = 0, i10796 = 0, i10797 = 0, i10798 = 0, i10799 = 0;
+        int i10800 = 0, i10801 = 0, i10802 = 0, i10803 = 0, i10804 = 0, i10805 = 0, i10806 = 0, i10807 = 0, i10808 = 0, i10809 = 0;
+        int i10810 = 0, i10811 = 0, i10812 = 0, i10813 = 0, i10814 = 0, i10815 = 0, i10816 = 0, i10817 = 0, i10818 = 0, i10819 = 0;
+        int i10820 = 0, i10821 = 0, i10822 = 0, i10823 = 0, i10824 = 0, i10825 = 0, i10826 = 0, i10827 = 0, i10828 = 0, i10829 = 0;
+        int i10830 = 0, i10831 = 0, i10832 = 0, i10833 = 0, i10834 = 0, i10835 = 0, i10836 = 0, i10837 = 0, i10838 = 0, i10839 = 0;
+        int i10840 = 0, i10841 = 0, i10842 = 0, i10843 = 0, i10844 = 0, i10845 = 0, i10846 = 0, i10847 = 0, i10848 = 0, i10849 = 0;
+        int i10850 = 0, i10851 = 0, i10852 = 0, i10853 = 0, i10854 = 0, i10855 = 0, i10856 = 0, i10857 = 0, i10858 = 0, i10859 = 0;
+        int i10860 = 0, i10861 = 0, i10862 = 0, i10863 = 0, i10864 = 0, i10865 = 0, i10866 = 0, i10867 = 0, i10868 = 0, i10869 = 0;
+        int i10870 = 0, i10871 = 0, i10872 = 0, i10873 = 0, i10874 = 0, i10875 = 0, i10876 = 0, i10877 = 0, i10878 = 0, i10879 = 0;
+        int i10880 = 0, i10881 = 0, i10882 = 0, i10883 = 0, i10884 = 0, i10885 = 0, i10886 = 0, i10887 = 0, i10888 = 0, i10889 = 0;
+        int i10890 = 0, i10891 = 0, i10892 = 0, i10893 = 0, i10894 = 0, i10895 = 0, i10896 = 0, i10897 = 0, i10898 = 0, i10899 = 0;
+        int i10900 = 0, i10901 = 0, i10902 = 0, i10903 = 0, i10904 = 0, i10905 = 0, i10906 = 0, i10907 = 0, i10908 = 0, i10909 = 0;
+        int i10910 = 0, i10911 = 0, i10912 = 0, i10913 = 0, i10914 = 0, i10915 = 0, i10916 = 0, i10917 = 0, i10918 = 0, i10919 = 0;
+        int i10920 = 0, i10921 = 0, i10922 = 0, i10923 = 0, i10924 = 0, i10925 = 0, i10926 = 0, i10927 = 0, i10928 = 0, i10929 = 0;
+        int i10930 = 0, i10931 = 0, i10932 = 0, i10933 = 0, i10934 = 0, i10935 = 0, i10936 = 0, i10937 = 0, i10938 = 0, i10939 = 0;
+        int i10940 = 0, i10941 = 0, i10942 = 0, i10943 = 0, i10944 = 0, i10945 = 0, i10946 = 0, i10947 = 0, i10948 = 0, i10949 = 0;
+        int i10950 = 0, i10951 = 0, i10952 = 0, i10953 = 0, i10954 = 0, i10955 = 0, i10956 = 0, i10957 = 0, i10958 = 0, i10959 = 0;
+        int i10960 = 0, i10961 = 0, i10962 = 0, i10963 = 0, i10964 = 0, i10965 = 0, i10966 = 0, i10967 = 0, i10968 = 0, i10969 = 0;
+        int i10970 = 0, i10971 = 0, i10972 = 0, i10973 = 0, i10974 = 0, i10975 = 0, i10976 = 0, i10977 = 0, i10978 = 0, i10979 = 0;
+        int i10980 = 0, i10981 = 0, i10982 = 0, i10983 = 0, i10984 = 0, i10985 = 0, i10986 = 0, i10987 = 0, i10988 = 0, i10989 = 0;
+        int i10990 = 0, i10991 = 0, i10992 = 0, i10993 = 0, i10994 = 0, i10995 = 0, i10996 = 0, i10997 = 0, i10998 = 0, i10999 = 0;
+        int i11000 = 0, i11001 = 0, i11002 = 0, i11003 = 0, i11004 = 0, i11005 = 0, i11006 = 0, i11007 = 0, i11008 = 0, i11009 = 0;
+        int i11010 = 0, i11011 = 0, i11012 = 0, i11013 = 0, i11014 = 0, i11015 = 0, i11016 = 0, i11017 = 0, i11018 = 0, i11019 = 0;
+        int i11020 = 0, i11021 = 0, i11022 = 0, i11023 = 0, i11024 = 0, i11025 = 0, i11026 = 0, i11027 = 0, i11028 = 0, i11029 = 0;
+        int i11030 = 0, i11031 = 0, i11032 = 0, i11033 = 0, i11034 = 0, i11035 = 0, i11036 = 0, i11037 = 0, i11038 = 0, i11039 = 0;
+        int i11040 = 0, i11041 = 0, i11042 = 0, i11043 = 0, i11044 = 0, i11045 = 0, i11046 = 0, i11047 = 0, i11048 = 0, i11049 = 0;
+        int i11050 = 0, i11051 = 0, i11052 = 0, i11053 = 0, i11054 = 0, i11055 = 0, i11056 = 0, i11057 = 0, i11058 = 0, i11059 = 0;
+        int i11060 = 0, i11061 = 0, i11062 = 0, i11063 = 0, i11064 = 0, i11065 = 0, i11066 = 0, i11067 = 0, i11068 = 0, i11069 = 0;
+        int i11070 = 0, i11071 = 0, i11072 = 0, i11073 = 0, i11074 = 0, i11075 = 0, i11076 = 0, i11077 = 0, i11078 = 0, i11079 = 0;
+        int i11080 = 0, i11081 = 0, i11082 = 0, i11083 = 0, i11084 = 0, i11085 = 0, i11086 = 0, i11087 = 0, i11088 = 0, i11089 = 0;
+        int i11090 = 0, i11091 = 0, i11092 = 0, i11093 = 0, i11094 = 0, i11095 = 0, i11096 = 0, i11097 = 0, i11098 = 0, i11099 = 0;
+        int i11100 = 0, i11101 = 0, i11102 = 0, i11103 = 0, i11104 = 0, i11105 = 0, i11106 = 0, i11107 = 0, i11108 = 0, i11109 = 0;
+        int i11110 = 0, i11111 = 0, i11112 = 0, i11113 = 0, i11114 = 0, i11115 = 0, i11116 = 0, i11117 = 0, i11118 = 0, i11119 = 0;
+        int i11120 = 0, i11121 = 0, i11122 = 0, i11123 = 0, i11124 = 0, i11125 = 0, i11126 = 0, i11127 = 0, i11128 = 0, i11129 = 0;
+        int i11130 = 0, i11131 = 0, i11132 = 0, i11133 = 0, i11134 = 0, i11135 = 0, i11136 = 0, i11137 = 0, i11138 = 0, i11139 = 0;
+        int i11140 = 0, i11141 = 0, i11142 = 0, i11143 = 0, i11144 = 0, i11145 = 0, i11146 = 0, i11147 = 0, i11148 = 0, i11149 = 0;
+        int i11150 = 0, i11151 = 0, i11152 = 0, i11153 = 0, i11154 = 0, i11155 = 0, i11156 = 0, i11157 = 0, i11158 = 0, i11159 = 0;
+        int i11160 = 0, i11161 = 0, i11162 = 0, i11163 = 0, i11164 = 0, i11165 = 0, i11166 = 0, i11167 = 0, i11168 = 0, i11169 = 0;
+        int i11170 = 0, i11171 = 0, i11172 = 0, i11173 = 0, i11174 = 0, i11175 = 0, i11176 = 0, i11177 = 0, i11178 = 0, i11179 = 0;
+        int i11180 = 0, i11181 = 0, i11182 = 0, i11183 = 0, i11184 = 0, i11185 = 0, i11186 = 0, i11187 = 0, i11188 = 0, i11189 = 0;
+        int i11190 = 0, i11191 = 0, i11192 = 0, i11193 = 0, i11194 = 0, i11195 = 0, i11196 = 0, i11197 = 0, i11198 = 0, i11199 = 0;
+        int i11200 = 0, i11201 = 0, i11202 = 0, i11203 = 0, i11204 = 0, i11205 = 0, i11206 = 0, i11207 = 0, i11208 = 0, i11209 = 0;
+        int i11210 = 0, i11211 = 0, i11212 = 0, i11213 = 0, i11214 = 0, i11215 = 0, i11216 = 0, i11217 = 0, i11218 = 0, i11219 = 0;
+        int i11220 = 0, i11221 = 0, i11222 = 0, i11223 = 0, i11224 = 0, i11225 = 0, i11226 = 0, i11227 = 0, i11228 = 0, i11229 = 0;
+        int i11230 = 0, i11231 = 0, i11232 = 0, i11233 = 0, i11234 = 0, i11235 = 0, i11236 = 0, i11237 = 0, i11238 = 0, i11239 = 0;
+        int i11240 = 0, i11241 = 0, i11242 = 0, i11243 = 0, i11244 = 0, i11245 = 0, i11246 = 0, i11247 = 0, i11248 = 0, i11249 = 0;
+        int i11250 = 0, i11251 = 0, i11252 = 0, i11253 = 0, i11254 = 0, i11255 = 0, i11256 = 0, i11257 = 0, i11258 = 0, i11259 = 0;
+        int i11260 = 0, i11261 = 0, i11262 = 0, i11263 = 0, i11264 = 0, i11265 = 0, i11266 = 0, i11267 = 0, i11268 = 0, i11269 = 0;
+        int i11270 = 0, i11271 = 0, i11272 = 0, i11273 = 0, i11274 = 0, i11275 = 0, i11276 = 0, i11277 = 0, i11278 = 0, i11279 = 0;
+        int i11280 = 0, i11281 = 0, i11282 = 0, i11283 = 0, i11284 = 0, i11285 = 0, i11286 = 0, i11287 = 0, i11288 = 0, i11289 = 0;
+        int i11290 = 0, i11291 = 0, i11292 = 0, i11293 = 0, i11294 = 0, i11295 = 0, i11296 = 0, i11297 = 0, i11298 = 0, i11299 = 0;
+        int i11300 = 0, i11301 = 0, i11302 = 0, i11303 = 0, i11304 = 0, i11305 = 0, i11306 = 0, i11307 = 0, i11308 = 0, i11309 = 0;
+        int i11310 = 0, i11311 = 0, i11312 = 0, i11313 = 0, i11314 = 0, i11315 = 0, i11316 = 0, i11317 = 0, i11318 = 0, i11319 = 0;
+        int i11320 = 0, i11321 = 0, i11322 = 0, i11323 = 0, i11324 = 0, i11325 = 0, i11326 = 0, i11327 = 0, i11328 = 0, i11329 = 0;
+        int i11330 = 0, i11331 = 0, i11332 = 0, i11333 = 0, i11334 = 0, i11335 = 0, i11336 = 0, i11337 = 0, i11338 = 0, i11339 = 0;
+        int i11340 = 0, i11341 = 0, i11342 = 0, i11343 = 0, i11344 = 0, i11345 = 0, i11346 = 0, i11347 = 0, i11348 = 0, i11349 = 0;
+        int i11350 = 0, i11351 = 0, i11352 = 0, i11353 = 0, i11354 = 0, i11355 = 0, i11356 = 0, i11357 = 0, i11358 = 0, i11359 = 0;
+        int i11360 = 0, i11361 = 0, i11362 = 0, i11363 = 0, i11364 = 0, i11365 = 0, i11366 = 0, i11367 = 0, i11368 = 0, i11369 = 0;
+        int i11370 = 0, i11371 = 0, i11372 = 0, i11373 = 0, i11374 = 0, i11375 = 0, i11376 = 0, i11377 = 0, i11378 = 0, i11379 = 0;
+        int i11380 = 0, i11381 = 0, i11382 = 0, i11383 = 0, i11384 = 0, i11385 = 0, i11386 = 0, i11387 = 0, i11388 = 0, i11389 = 0;
+        int i11390 = 0, i11391 = 0, i11392 = 0, i11393 = 0, i11394 = 0, i11395 = 0, i11396 = 0, i11397 = 0, i11398 = 0, i11399 = 0;
+        int i11400 = 0, i11401 = 0, i11402 = 0, i11403 = 0, i11404 = 0, i11405 = 0, i11406 = 0, i11407 = 0, i11408 = 0, i11409 = 0;
+        int i11410 = 0, i11411 = 0, i11412 = 0, i11413 = 0, i11414 = 0, i11415 = 0, i11416 = 0, i11417 = 0, i11418 = 0, i11419 = 0;
+        int i11420 = 0, i11421 = 0, i11422 = 0, i11423 = 0, i11424 = 0, i11425 = 0, i11426 = 0, i11427 = 0, i11428 = 0, i11429 = 0;
+        int i11430 = 0, i11431 = 0, i11432 = 0, i11433 = 0, i11434 = 0, i11435 = 0, i11436 = 0, i11437 = 0, i11438 = 0, i11439 = 0;
+        int i11440 = 0, i11441 = 0, i11442 = 0, i11443 = 0, i11444 = 0, i11445 = 0, i11446 = 0, i11447 = 0, i11448 = 0, i11449 = 0;
+        int i11450 = 0, i11451 = 0, i11452 = 0, i11453 = 0, i11454 = 0, i11455 = 0, i11456 = 0, i11457 = 0, i11458 = 0, i11459 = 0;
+        int i11460 = 0, i11461 = 0, i11462 = 0, i11463 = 0, i11464 = 0, i11465 = 0, i11466 = 0, i11467 = 0, i11468 = 0, i11469 = 0;
+        int i11470 = 0, i11471 = 0, i11472 = 0, i11473 = 0, i11474 = 0, i11475 = 0, i11476 = 0, i11477 = 0, i11478 = 0, i11479 = 0;
+        int i11480 = 0, i11481 = 0, i11482 = 0, i11483 = 0, i11484 = 0, i11485 = 0, i11486 = 0, i11487 = 0, i11488 = 0, i11489 = 0;
+        int i11490 = 0, i11491 = 0, i11492 = 0, i11493 = 0, i11494 = 0, i11495 = 0, i11496 = 0, i11497 = 0, i11498 = 0, i11499 = 0;
+        int i11500 = 0, i11501 = 0, i11502 = 0, i11503 = 0, i11504 = 0, i11505 = 0, i11506 = 0, i11507 = 0, i11508 = 0, i11509 = 0;
+        int i11510 = 0, i11511 = 0, i11512 = 0, i11513 = 0, i11514 = 0, i11515 = 0, i11516 = 0, i11517 = 0, i11518 = 0, i11519 = 0;
+        int i11520 = 0, i11521 = 0, i11522 = 0, i11523 = 0, i11524 = 0, i11525 = 0, i11526 = 0, i11527 = 0, i11528 = 0, i11529 = 0;
+        int i11530 = 0, i11531 = 0, i11532 = 0, i11533 = 0, i11534 = 0, i11535 = 0, i11536 = 0, i11537 = 0, i11538 = 0, i11539 = 0;
+        int i11540 = 0, i11541 = 0, i11542 = 0, i11543 = 0, i11544 = 0, i11545 = 0, i11546 = 0, i11547 = 0, i11548 = 0, i11549 = 0;
+        int i11550 = 0, i11551 = 0, i11552 = 0, i11553 = 0, i11554 = 0, i11555 = 0, i11556 = 0, i11557 = 0, i11558 = 0, i11559 = 0;
+        int i11560 = 0, i11561 = 0, i11562 = 0, i11563 = 0, i11564 = 0, i11565 = 0, i11566 = 0, i11567 = 0, i11568 = 0, i11569 = 0;
+        int i11570 = 0, i11571 = 0, i11572 = 0, i11573 = 0, i11574 = 0, i11575 = 0, i11576 = 0, i11577 = 0, i11578 = 0, i11579 = 0;
+        int i11580 = 0, i11581 = 0, i11582 = 0, i11583 = 0, i11584 = 0, i11585 = 0, i11586 = 0, i11587 = 0, i11588 = 0, i11589 = 0;
+        int i11590 = 0, i11591 = 0, i11592 = 0, i11593 = 0, i11594 = 0, i11595 = 0, i11596 = 0, i11597 = 0, i11598 = 0, i11599 = 0;
+        int i11600 = 0, i11601 = 0, i11602 = 0, i11603 = 0, i11604 = 0, i11605 = 0, i11606 = 0, i11607 = 0, i11608 = 0, i11609 = 0;
+        int i11610 = 0, i11611 = 0, i11612 = 0, i11613 = 0, i11614 = 0, i11615 = 0, i11616 = 0, i11617 = 0, i11618 = 0, i11619 = 0;
+        int i11620 = 0, i11621 = 0, i11622 = 0, i11623 = 0, i11624 = 0, i11625 = 0, i11626 = 0, i11627 = 0, i11628 = 0, i11629 = 0;
+        int i11630 = 0, i11631 = 0, i11632 = 0, i11633 = 0, i11634 = 0, i11635 = 0, i11636 = 0, i11637 = 0, i11638 = 0, i11639 = 0;
+        int i11640 = 0, i11641 = 0, i11642 = 0, i11643 = 0, i11644 = 0, i11645 = 0, i11646 = 0, i11647 = 0, i11648 = 0, i11649 = 0;
+        int i11650 = 0, i11651 = 0, i11652 = 0, i11653 = 0, i11654 = 0, i11655 = 0, i11656 = 0, i11657 = 0, i11658 = 0, i11659 = 0;
+        int i11660 = 0, i11661 = 0, i11662 = 0, i11663 = 0, i11664 = 0, i11665 = 0, i11666 = 0, i11667 = 0, i11668 = 0, i11669 = 0;
+        int i11670 = 0, i11671 = 0, i11672 = 0, i11673 = 0, i11674 = 0, i11675 = 0, i11676 = 0, i11677 = 0, i11678 = 0, i11679 = 0;
+        int i11680 = 0, i11681 = 0, i11682 = 0, i11683 = 0, i11684 = 0, i11685 = 0, i11686 = 0, i11687 = 0, i11688 = 0, i11689 = 0;
+        int i11690 = 0, i11691 = 0, i11692 = 0, i11693 = 0, i11694 = 0, i11695 = 0, i11696 = 0, i11697 = 0, i11698 = 0, i11699 = 0;
+        int i11700 = 0, i11701 = 0, i11702 = 0, i11703 = 0, i11704 = 0, i11705 = 0, i11706 = 0, i11707 = 0, i11708 = 0, i11709 = 0;
+        int i11710 = 0, i11711 = 0, i11712 = 0, i11713 = 0, i11714 = 0, i11715 = 0, i11716 = 0, i11717 = 0, i11718 = 0, i11719 = 0;
+        int i11720 = 0, i11721 = 0, i11722 = 0, i11723 = 0, i11724 = 0, i11725 = 0, i11726 = 0, i11727 = 0, i11728 = 0, i11729 = 0;
+        int i11730 = 0, i11731 = 0, i11732 = 0, i11733 = 0, i11734 = 0, i11735 = 0, i11736 = 0, i11737 = 0, i11738 = 0, i11739 = 0;
+        int i11740 = 0, i11741 = 0, i11742 = 0, i11743 = 0, i11744 = 0, i11745 = 0, i11746 = 0, i11747 = 0, i11748 = 0, i11749 = 0;
+        int i11750 = 0, i11751 = 0, i11752 = 0, i11753 = 0, i11754 = 0, i11755 = 0, i11756 = 0, i11757 = 0, i11758 = 0, i11759 = 0;
+        int i11760 = 0, i11761 = 0, i11762 = 0, i11763 = 0, i11764 = 0, i11765 = 0, i11766 = 0, i11767 = 0, i11768 = 0, i11769 = 0;
+        int i11770 = 0, i11771 = 0, i11772 = 0, i11773 = 0, i11774 = 0, i11775 = 0, i11776 = 0, i11777 = 0, i11778 = 0, i11779 = 0;
+        int i11780 = 0, i11781 = 0, i11782 = 0, i11783 = 0, i11784 = 0, i11785 = 0, i11786 = 0, i11787 = 0, i11788 = 0, i11789 = 0;
+        int i11790 = 0, i11791 = 0, i11792 = 0, i11793 = 0, i11794 = 0, i11795 = 0, i11796 = 0, i11797 = 0, i11798 = 0, i11799 = 0;
+        int i11800 = 0, i11801 = 0, i11802 = 0, i11803 = 0, i11804 = 0, i11805 = 0, i11806 = 0, i11807 = 0, i11808 = 0, i11809 = 0;
+        int i11810 = 0, i11811 = 0, i11812 = 0, i11813 = 0, i11814 = 0, i11815 = 0, i11816 = 0, i11817 = 0, i11818 = 0, i11819 = 0;
+        int i11820 = 0, i11821 = 0, i11822 = 0, i11823 = 0, i11824 = 0, i11825 = 0, i11826 = 0, i11827 = 0, i11828 = 0, i11829 = 0;
+        int i11830 = 0, i11831 = 0, i11832 = 0, i11833 = 0, i11834 = 0, i11835 = 0, i11836 = 0, i11837 = 0, i11838 = 0, i11839 = 0;
+        int i11840 = 0, i11841 = 0, i11842 = 0, i11843 = 0, i11844 = 0, i11845 = 0, i11846 = 0, i11847 = 0, i11848 = 0, i11849 = 0;
+        int i11850 = 0, i11851 = 0, i11852 = 0, i11853 = 0, i11854 = 0, i11855 = 0, i11856 = 0, i11857 = 0, i11858 = 0, i11859 = 0;
+        int i11860 = 0, i11861 = 0, i11862 = 0, i11863 = 0, i11864 = 0, i11865 = 0, i11866 = 0, i11867 = 0, i11868 = 0, i11869 = 0;
+        int i11870 = 0, i11871 = 0, i11872 = 0, i11873 = 0, i11874 = 0, i11875 = 0, i11876 = 0, i11877 = 0, i11878 = 0, i11879 = 0;
+        int i11880 = 0, i11881 = 0, i11882 = 0, i11883 = 0, i11884 = 0, i11885 = 0, i11886 = 0, i11887 = 0, i11888 = 0, i11889 = 0;
+        int i11890 = 0, i11891 = 0, i11892 = 0, i11893 = 0, i11894 = 0, i11895 = 0, i11896 = 0, i11897 = 0, i11898 = 0, i11899 = 0;
+        int i11900 = 0, i11901 = 0, i11902 = 0, i11903 = 0, i11904 = 0, i11905 = 0, i11906 = 0, i11907 = 0, i11908 = 0, i11909 = 0;
+        int i11910 = 0, i11911 = 0, i11912 = 0, i11913 = 0, i11914 = 0, i11915 = 0, i11916 = 0, i11917 = 0, i11918 = 0, i11919 = 0;
+        int i11920 = 0, i11921 = 0, i11922 = 0, i11923 = 0, i11924 = 0, i11925 = 0, i11926 = 0, i11927 = 0, i11928 = 0, i11929 = 0;
+        int i11930 = 0, i11931 = 0, i11932 = 0, i11933 = 0, i11934 = 0, i11935 = 0, i11936 = 0, i11937 = 0, i11938 = 0, i11939 = 0;
+        int i11940 = 0, i11941 = 0, i11942 = 0, i11943 = 0, i11944 = 0, i11945 = 0, i11946 = 0, i11947 = 0, i11948 = 0, i11949 = 0;
+        int i11950 = 0, i11951 = 0, i11952 = 0, i11953 = 0, i11954 = 0, i11955 = 0, i11956 = 0, i11957 = 0, i11958 = 0, i11959 = 0;
+        int i11960 = 0, i11961 = 0, i11962 = 0, i11963 = 0, i11964 = 0, i11965 = 0, i11966 = 0, i11967 = 0, i11968 = 0, i11969 = 0;
+        int i11970 = 0, i11971 = 0, i11972 = 0, i11973 = 0, i11974 = 0, i11975 = 0, i11976 = 0, i11977 = 0, i11978 = 0, i11979 = 0;
+        int i11980 = 0, i11981 = 0, i11982 = 0, i11983 = 0, i11984 = 0, i11985 = 0, i11986 = 0, i11987 = 0, i11988 = 0, i11989 = 0;
+        int i11990 = 0, i11991 = 0, i11992 = 0, i11993 = 0, i11994 = 0, i11995 = 0, i11996 = 0, i11997 = 0, i11998 = 0, i11999 = 0;
+        int i12000 = 0, i12001 = 0, i12002 = 0, i12003 = 0, i12004 = 0, i12005 = 0, i12006 = 0, i12007 = 0, i12008 = 0, i12009 = 0;
+        int i12010 = 0, i12011 = 0, i12012 = 0, i12013 = 0, i12014 = 0, i12015 = 0, i12016 = 0, i12017 = 0, i12018 = 0, i12019 = 0;
+        int i12020 = 0, i12021 = 0, i12022 = 0, i12023 = 0, i12024 = 0, i12025 = 0, i12026 = 0, i12027 = 0, i12028 = 0, i12029 = 0;
+        int i12030 = 0, i12031 = 0, i12032 = 0, i12033 = 0, i12034 = 0, i12035 = 0, i12036 = 0, i12037 = 0, i12038 = 0, i12039 = 0;
+        int i12040 = 0, i12041 = 0, i12042 = 0, i12043 = 0, i12044 = 0, i12045 = 0, i12046 = 0, i12047 = 0, i12048 = 0, i12049 = 0;
+        int i12050 = 0, i12051 = 0, i12052 = 0, i12053 = 0, i12054 = 0, i12055 = 0, i12056 = 0, i12057 = 0, i12058 = 0, i12059 = 0;
+        int i12060 = 0, i12061 = 0, i12062 = 0, i12063 = 0, i12064 = 0, i12065 = 0, i12066 = 0, i12067 = 0, i12068 = 0, i12069 = 0;
+        int i12070 = 0, i12071 = 0, i12072 = 0, i12073 = 0, i12074 = 0, i12075 = 0, i12076 = 0, i12077 = 0, i12078 = 0, i12079 = 0;
+        int i12080 = 0, i12081 = 0, i12082 = 0, i12083 = 0, i12084 = 0, i12085 = 0, i12086 = 0, i12087 = 0, i12088 = 0, i12089 = 0;
+        int i12090 = 0, i12091 = 0, i12092 = 0, i12093 = 0, i12094 = 0, i12095 = 0, i12096 = 0, i12097 = 0, i12098 = 0, i12099 = 0;
+        int i12100 = 0, i12101 = 0, i12102 = 0, i12103 = 0, i12104 = 0, i12105 = 0, i12106 = 0, i12107 = 0, i12108 = 0, i12109 = 0;
+        int i12110 = 0, i12111 = 0, i12112 = 0, i12113 = 0, i12114 = 0, i12115 = 0, i12116 = 0, i12117 = 0, i12118 = 0, i12119 = 0;
+        int i12120 = 0, i12121 = 0, i12122 = 0, i12123 = 0, i12124 = 0, i12125 = 0, i12126 = 0, i12127 = 0, i12128 = 0, i12129 = 0;
+        int i12130 = 0, i12131 = 0, i12132 = 0, i12133 = 0, i12134 = 0, i12135 = 0, i12136 = 0, i12137 = 0, i12138 = 0, i12139 = 0;
+        int i12140 = 0, i12141 = 0, i12142 = 0, i12143 = 0, i12144 = 0, i12145 = 0, i12146 = 0, i12147 = 0, i12148 = 0, i12149 = 0;
+        int i12150 = 0, i12151 = 0, i12152 = 0, i12153 = 0, i12154 = 0, i12155 = 0, i12156 = 0, i12157 = 0, i12158 = 0, i12159 = 0;
+        int i12160 = 0, i12161 = 0, i12162 = 0, i12163 = 0, i12164 = 0, i12165 = 0, i12166 = 0, i12167 = 0, i12168 = 0, i12169 = 0;
+        int i12170 = 0, i12171 = 0, i12172 = 0, i12173 = 0, i12174 = 0, i12175 = 0, i12176 = 0, i12177 = 0, i12178 = 0, i12179 = 0;
+        int i12180 = 0, i12181 = 0, i12182 = 0, i12183 = 0, i12184 = 0, i12185 = 0, i12186 = 0, i12187 = 0, i12188 = 0, i12189 = 0;
+        int i12190 = 0, i12191 = 0, i12192 = 0, i12193 = 0, i12194 = 0, i12195 = 0, i12196 = 0, i12197 = 0, i12198 = 0, i12199 = 0;
+        int i12200 = 0, i12201 = 0, i12202 = 0, i12203 = 0, i12204 = 0, i12205 = 0, i12206 = 0, i12207 = 0, i12208 = 0, i12209 = 0;
+        int i12210 = 0, i12211 = 0, i12212 = 0, i12213 = 0, i12214 = 0, i12215 = 0, i12216 = 0, i12217 = 0, i12218 = 0, i12219 = 0;
+        int i12220 = 0, i12221 = 0, i12222 = 0, i12223 = 0, i12224 = 0, i12225 = 0, i12226 = 0, i12227 = 0, i12228 = 0, i12229 = 0;
+        int i12230 = 0, i12231 = 0, i12232 = 0, i12233 = 0, i12234 = 0, i12235 = 0, i12236 = 0, i12237 = 0, i12238 = 0, i12239 = 0;
+        int i12240 = 0, i12241 = 0, i12242 = 0, i12243 = 0, i12244 = 0, i12245 = 0, i12246 = 0, i12247 = 0, i12248 = 0, i12249 = 0;
+        int i12250 = 0, i12251 = 0, i12252 = 0, i12253 = 0, i12254 = 0, i12255 = 0, i12256 = 0, i12257 = 0, i12258 = 0, i12259 = 0;
+        int i12260 = 0, i12261 = 0, i12262 = 0, i12263 = 0, i12264 = 0, i12265 = 0, i12266 = 0, i12267 = 0, i12268 = 0, i12269 = 0;
+        int i12270 = 0, i12271 = 0, i12272 = 0, i12273 = 0, i12274 = 0, i12275 = 0, i12276 = 0, i12277 = 0, i12278 = 0, i12279 = 0;
+        int i12280 = 0, i12281 = 0, i12282 = 0, i12283 = 0, i12284 = 0, i12285 = 0, i12286 = 0, i12287 = 0, i12288 = 0, i12289 = 0;
+        int i12290 = 0, i12291 = 0, i12292 = 0, i12293 = 0, i12294 = 0, i12295 = 0, i12296 = 0, i12297 = 0, i12298 = 0, i12299 = 0;
+        int i12300 = 0, i12301 = 0, i12302 = 0, i12303 = 0, i12304 = 0, i12305 = 0, i12306 = 0, i12307 = 0, i12308 = 0, i12309 = 0;
+        int i12310 = 0, i12311 = 0, i12312 = 0, i12313 = 0, i12314 = 0, i12315 = 0, i12316 = 0, i12317 = 0, i12318 = 0, i12319 = 0;
+        int i12320 = 0, i12321 = 0, i12322 = 0, i12323 = 0, i12324 = 0, i12325 = 0, i12326 = 0, i12327 = 0, i12328 = 0, i12329 = 0;
+        int i12330 = 0, i12331 = 0, i12332 = 0, i12333 = 0, i12334 = 0, i12335 = 0, i12336 = 0, i12337 = 0, i12338 = 0, i12339 = 0;
+        int i12340 = 0, i12341 = 0, i12342 = 0, i12343 = 0, i12344 = 0, i12345 = 0, i12346 = 0, i12347 = 0, i12348 = 0, i12349 = 0;
+        int i12350 = 0, i12351 = 0, i12352 = 0, i12353 = 0, i12354 = 0, i12355 = 0, i12356 = 0, i12357 = 0, i12358 = 0, i12359 = 0;
+        int i12360 = 0, i12361 = 0, i12362 = 0, i12363 = 0, i12364 = 0, i12365 = 0, i12366 = 0, i12367 = 0, i12368 = 0, i12369 = 0;
+        int i12370 = 0, i12371 = 0, i12372 = 0, i12373 = 0, i12374 = 0, i12375 = 0, i12376 = 0, i12377 = 0, i12378 = 0, i12379 = 0;
+        int i12380 = 0, i12381 = 0, i12382 = 0, i12383 = 0, i12384 = 0, i12385 = 0, i12386 = 0, i12387 = 0, i12388 = 0, i12389 = 0;
+        int i12390 = 0, i12391 = 0, i12392 = 0, i12393 = 0, i12394 = 0, i12395 = 0, i12396 = 0, i12397 = 0, i12398 = 0, i12399 = 0;
+        int i12400 = 0, i12401 = 0, i12402 = 0, i12403 = 0, i12404 = 0, i12405 = 0, i12406 = 0, i12407 = 0, i12408 = 0, i12409 = 0;
+        int i12410 = 0, i12411 = 0, i12412 = 0, i12413 = 0, i12414 = 0, i12415 = 0, i12416 = 0, i12417 = 0, i12418 = 0, i12419 = 0;
+        int i12420 = 0, i12421 = 0, i12422 = 0, i12423 = 0, i12424 = 0, i12425 = 0, i12426 = 0, i12427 = 0, i12428 = 0, i12429 = 0;
+        int i12430 = 0, i12431 = 0, i12432 = 0, i12433 = 0, i12434 = 0, i12435 = 0, i12436 = 0, i12437 = 0, i12438 = 0, i12439 = 0;
+        int i12440 = 0, i12441 = 0, i12442 = 0, i12443 = 0, i12444 = 0, i12445 = 0, i12446 = 0, i12447 = 0, i12448 = 0, i12449 = 0;
+        int i12450 = 0, i12451 = 0, i12452 = 0, i12453 = 0, i12454 = 0, i12455 = 0, i12456 = 0, i12457 = 0, i12458 = 0, i12459 = 0;
+        int i12460 = 0, i12461 = 0, i12462 = 0, i12463 = 0, i12464 = 0, i12465 = 0, i12466 = 0, i12467 = 0, i12468 = 0, i12469 = 0;
+        int i12470 = 0, i12471 = 0, i12472 = 0, i12473 = 0, i12474 = 0, i12475 = 0, i12476 = 0, i12477 = 0, i12478 = 0, i12479 = 0;
+        int i12480 = 0, i12481 = 0, i12482 = 0, i12483 = 0, i12484 = 0, i12485 = 0, i12486 = 0, i12487 = 0, i12488 = 0, i12489 = 0;
+        int i12490 = 0, i12491 = 0, i12492 = 0, i12493 = 0, i12494 = 0, i12495 = 0, i12496 = 0, i12497 = 0, i12498 = 0, i12499 = 0;
+        int i12500 = 0, i12501 = 0, i12502 = 0, i12503 = 0, i12504 = 0, i12505 = 0, i12506 = 0, i12507 = 0, i12508 = 0, i12509 = 0;
+        int i12510 = 0, i12511 = 0, i12512 = 0, i12513 = 0, i12514 = 0, i12515 = 0, i12516 = 0, i12517 = 0, i12518 = 0, i12519 = 0;
+        int i12520 = 0, i12521 = 0, i12522 = 0, i12523 = 0, i12524 = 0, i12525 = 0, i12526 = 0, i12527 = 0, i12528 = 0, i12529 = 0;
+        int i12530 = 0, i12531 = 0, i12532 = 0, i12533 = 0, i12534 = 0, i12535 = 0, i12536 = 0, i12537 = 0, i12538 = 0, i12539 = 0;
+        int i12540 = 0, i12541 = 0, i12542 = 0, i12543 = 0, i12544 = 0, i12545 = 0, i12546 = 0, i12547 = 0, i12548 = 0, i12549 = 0;
+        int i12550 = 0, i12551 = 0, i12552 = 0, i12553 = 0, i12554 = 0, i12555 = 0, i12556 = 0, i12557 = 0, i12558 = 0, i12559 = 0;
+        int i12560 = 0, i12561 = 0, i12562 = 0, i12563 = 0, i12564 = 0, i12565 = 0, i12566 = 0, i12567 = 0, i12568 = 0, i12569 = 0;
+        int i12570 = 0, i12571 = 0, i12572 = 0, i12573 = 0, i12574 = 0, i12575 = 0, i12576 = 0, i12577 = 0, i12578 = 0, i12579 = 0;
+        int i12580 = 0, i12581 = 0, i12582 = 0, i12583 = 0, i12584 = 0, i12585 = 0, i12586 = 0, i12587 = 0, i12588 = 0, i12589 = 0;
+        int i12590 = 0, i12591 = 0, i12592 = 0, i12593 = 0, i12594 = 0, i12595 = 0, i12596 = 0, i12597 = 0, i12598 = 0, i12599 = 0;
+        int i12600 = 0, i12601 = 0, i12602 = 0, i12603 = 0, i12604 = 0, i12605 = 0, i12606 = 0, i12607 = 0, i12608 = 0, i12609 = 0;
+        int i12610 = 0, i12611 = 0, i12612 = 0, i12613 = 0, i12614 = 0, i12615 = 0, i12616 = 0, i12617 = 0, i12618 = 0, i12619 = 0;
+        int i12620 = 0, i12621 = 0, i12622 = 0, i12623 = 0, i12624 = 0, i12625 = 0, i12626 = 0, i12627 = 0, i12628 = 0, i12629 = 0;
+        int i12630 = 0, i12631 = 0, i12632 = 0, i12633 = 0, i12634 = 0, i12635 = 0, i12636 = 0, i12637 = 0, i12638 = 0, i12639 = 0;
+        int i12640 = 0, i12641 = 0, i12642 = 0, i12643 = 0, i12644 = 0, i12645 = 0, i12646 = 0, i12647 = 0, i12648 = 0, i12649 = 0;
+        int i12650 = 0, i12651 = 0, i12652 = 0, i12653 = 0, i12654 = 0, i12655 = 0, i12656 = 0, i12657 = 0, i12658 = 0, i12659 = 0;
+        int i12660 = 0, i12661 = 0, i12662 = 0, i12663 = 0, i12664 = 0, i12665 = 0, i12666 = 0, i12667 = 0, i12668 = 0, i12669 = 0;
+        int i12670 = 0, i12671 = 0, i12672 = 0, i12673 = 0, i12674 = 0, i12675 = 0, i12676 = 0, i12677 = 0, i12678 = 0, i12679 = 0;
+        int i12680 = 0, i12681 = 0, i12682 = 0, i12683 = 0, i12684 = 0, i12685 = 0, i12686 = 0, i12687 = 0, i12688 = 0, i12689 = 0;
+        int i12690 = 0, i12691 = 0, i12692 = 0, i12693 = 0, i12694 = 0, i12695 = 0, i12696 = 0, i12697 = 0, i12698 = 0, i12699 = 0;
+        int i12700 = 0, i12701 = 0, i12702 = 0, i12703 = 0, i12704 = 0, i12705 = 0, i12706 = 0, i12707 = 0, i12708 = 0, i12709 = 0;
+        int i12710 = 0, i12711 = 0, i12712 = 0, i12713 = 0, i12714 = 0, i12715 = 0, i12716 = 0, i12717 = 0, i12718 = 0, i12719 = 0;
+        int i12720 = 0, i12721 = 0, i12722 = 0, i12723 = 0, i12724 = 0, i12725 = 0, i12726 = 0, i12727 = 0, i12728 = 0, i12729 = 0;
+        int i12730 = 0, i12731 = 0, i12732 = 0, i12733 = 0, i12734 = 0, i12735 = 0, i12736 = 0, i12737 = 0, i12738 = 0, i12739 = 0;
+        int i12740 = 0, i12741 = 0, i12742 = 0, i12743 = 0, i12744 = 0, i12745 = 0, i12746 = 0, i12747 = 0, i12748 = 0, i12749 = 0;
+        int i12750 = 0, i12751 = 0, i12752 = 0, i12753 = 0, i12754 = 0, i12755 = 0, i12756 = 0, i12757 = 0, i12758 = 0, i12759 = 0;
+        int i12760 = 0, i12761 = 0, i12762 = 0, i12763 = 0, i12764 = 0, i12765 = 0, i12766 = 0, i12767 = 0, i12768 = 0, i12769 = 0;
+        int i12770 = 0, i12771 = 0, i12772 = 0, i12773 = 0, i12774 = 0, i12775 = 0, i12776 = 0, i12777 = 0, i12778 = 0, i12779 = 0;
+        int i12780 = 0, i12781 = 0, i12782 = 0, i12783 = 0, i12784 = 0, i12785 = 0, i12786 = 0, i12787 = 0, i12788 = 0, i12789 = 0;
+        int i12790 = 0, i12791 = 0, i12792 = 0, i12793 = 0, i12794 = 0, i12795 = 0, i12796 = 0, i12797 = 0, i12798 = 0, i12799 = 0;
+        int i12800 = 0, i12801 = 0, i12802 = 0, i12803 = 0, i12804 = 0, i12805 = 0, i12806 = 0, i12807 = 0, i12808 = 0, i12809 = 0;
+        int i12810 = 0, i12811 = 0, i12812 = 0, i12813 = 0, i12814 = 0, i12815 = 0, i12816 = 0, i12817 = 0, i12818 = 0, i12819 = 0;
+        int i12820 = 0, i12821 = 0, i12822 = 0, i12823 = 0, i12824 = 0, i12825 = 0, i12826 = 0, i12827 = 0, i12828 = 0, i12829 = 0;
+        int i12830 = 0, i12831 = 0, i12832 = 0, i12833 = 0, i12834 = 0, i12835 = 0, i12836 = 0, i12837 = 0, i12838 = 0, i12839 = 0;
+        int i12840 = 0, i12841 = 0, i12842 = 0, i12843 = 0, i12844 = 0, i12845 = 0, i12846 = 0, i12847 = 0, i12848 = 0, i12849 = 0;
+        int i12850 = 0, i12851 = 0, i12852 = 0, i12853 = 0, i12854 = 0, i12855 = 0, i12856 = 0, i12857 = 0, i12858 = 0, i12859 = 0;
+        int i12860 = 0, i12861 = 0, i12862 = 0, i12863 = 0, i12864 = 0, i12865 = 0, i12866 = 0, i12867 = 0, i12868 = 0, i12869 = 0;
+        int i12870 = 0, i12871 = 0, i12872 = 0, i12873 = 0, i12874 = 0, i12875 = 0, i12876 = 0, i12877 = 0, i12878 = 0, i12879 = 0;
+        int i12880 = 0, i12881 = 0, i12882 = 0, i12883 = 0, i12884 = 0, i12885 = 0, i12886 = 0, i12887 = 0, i12888 = 0, i12889 = 0;
+        int i12890 = 0, i12891 = 0, i12892 = 0, i12893 = 0, i12894 = 0, i12895 = 0, i12896 = 0, i12897 = 0, i12898 = 0, i12899 = 0;
+        int i12900 = 0, i12901 = 0, i12902 = 0, i12903 = 0, i12904 = 0, i12905 = 0, i12906 = 0, i12907 = 0, i12908 = 0, i12909 = 0;
+        int i12910 = 0, i12911 = 0, i12912 = 0, i12913 = 0, i12914 = 0, i12915 = 0, i12916 = 0, i12917 = 0, i12918 = 0, i12919 = 0;
+        int i12920 = 0, i12921 = 0, i12922 = 0, i12923 = 0, i12924 = 0, i12925 = 0, i12926 = 0, i12927 = 0, i12928 = 0, i12929 = 0;
+        int i12930 = 0, i12931 = 0, i12932 = 0, i12933 = 0, i12934 = 0, i12935 = 0, i12936 = 0, i12937 = 0, i12938 = 0, i12939 = 0;
+        int i12940 = 0, i12941 = 0, i12942 = 0, i12943 = 0, i12944 = 0, i12945 = 0, i12946 = 0, i12947 = 0, i12948 = 0, i12949 = 0;
+        int i12950 = 0, i12951 = 0, i12952 = 0, i12953 = 0, i12954 = 0, i12955 = 0, i12956 = 0, i12957 = 0, i12958 = 0, i12959 = 0;
+        int i12960 = 0, i12961 = 0, i12962 = 0, i12963 = 0, i12964 = 0, i12965 = 0, i12966 = 0, i12967 = 0, i12968 = 0, i12969 = 0;
+        int i12970 = 0, i12971 = 0, i12972 = 0, i12973 = 0, i12974 = 0, i12975 = 0, i12976 = 0, i12977 = 0, i12978 = 0, i12979 = 0;
+        int i12980 = 0, i12981 = 0, i12982 = 0, i12983 = 0, i12984 = 0, i12985 = 0, i12986 = 0, i12987 = 0, i12988 = 0, i12989 = 0;
+        int i12990 = 0, i12991 = 0, i12992 = 0, i12993 = 0, i12994 = 0, i12995 = 0, i12996 = 0, i12997 = 0, i12998 = 0, i12999 = 0;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7116216/StackOverflow.java	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7116216
+ * @summary The vm crashes when GC happens during throwing a StackOverflow exception
+ *
+ * @run main/othervm -Xcomp -Xbatch StackOverflow
+ */
+
+public class StackOverflow {
+    static String stackOverflow_largeFrame_liveOopForGC;
+
+    public static int stackOverflow_largeFrame(int call_count, String liveOopForGC) {
+        try {
+            int return_count = stackOverflow_largeFrame(++call_count, liveOopForGC);
+            if (return_count == 0) {
+                try {
+                    LargeFrame.method_with_many_locals(liveOopForGC, 2,3,4,5,6,7,liveOopForGC);
+                } catch (StackOverflowError e2) {
+                    // access liveOopForGC to make it a live variable
+                    stackOverflow_largeFrame_liveOopForGC = liveOopForGC;
+                }
+            }
+            return return_count - 1;
+        } catch (StackOverflowError e) {
+            // Return a value that is large enough such that no unrecoverable
+            // stack overflow will occur afterwards, but that is small enough
+            // such that calling LargeFrame.method_with_many_locals() will
+            // cause a StackOverflowError.
+            // Don't use a call here because we're out of stack space anyway!
+            int tmp = call_count / 2;
+            return (tmp < 100 ? tmp : 100);
+        }
+    }
+    public static void main(String args[]) {
+        LargeFrame.method_with_many_locals(new Object(), 2,3,4,5,6,7,new Object());
+
+        stackOverflow_largeFrame(0, "this is a live oop to test GC");
+        System.out.println("finished ok!");
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7123108/Test7123108.java	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7123108
+ * @summary C1 crashes with assert(if_state != NULL) failed: states do not match up
+ *
+ * @run main/othervm -Xcomp Test7123108
+ */
+
+public class Test7123108 {
+
+    static class Test_Class_0 {
+        final static byte var_2 = 67;
+        byte var_3;
+    }
+
+    Object var_25 = "kgfpyhcms";
+    static long var_27 = 6899666748616086528L;
+
+    static float func_1()
+    {
+        return 0.0F;
+    }
+
+    private void test()
+    {
+        "dlwq".charAt(((short)'x' > var_27 | func_1() <= (((Test_Class_0)var_25).var_3) ? true : true) ? Test_Class_0.var_2 & (short)-1.1173839E38F : 'Y');
+    }
+
+    public static void main(String[] args)
+    {
+        Test7123108 t = new Test7123108();
+        try {
+            t.test();
+        } catch (Throwable e) { }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7125879/Test7125879.java	Wed Jul 05 18:00:12 2017 +0200
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7125879
+ * @summary assert(proj != NULL) failed: must be found
+ *
+ * @run main/othervm -Xcomp Test7125879
+ */
+
+public class Test7125879 {
+    String var_1 = "abc";
+
+    public Test7125879() {
+        var_1 = var_1.replaceAll("d", "e") + var_1;
+    }
+
+    public static void main(String[] args) {
+        Test7125879 t = new Test7125879();
+        try {
+            t.test();
+        } catch(Throwable e) { }
+    }
+
+    private void test() {
+        new Test7125879().var_1 = ((Test7125879)(new Object[-1])[0]).var_1;
+    }
+}
+
--- a/jaxp/.hgtags	Wed Jul 05 17:59:32 2017 +0200
+++ b/jaxp/.hgtags	Wed Jul 05 18:00:12 2017 +0200
@@ -142,3 +142,4 @@
 ebec6a7e8d4e481d205a2109ddeea8121d76db7f jdk8-b18
 dffeb62b1a7fc8b316bf58fe5479323f3661894e jdk8-b19
 f052abb8f37444ba77858913887d0d92795dd6b8 jdk8-b20
+d41eeadf5c1344b88c5051a997aec9e1ad7ce1db jdk8-b21
--- a/jaxws/.hgtags	Wed Jul 05 17:59:32 2017 +0200
+++ b/jaxws/.hgtags	Wed Jul 05 18:00:12 2017 +0200
@@ -142,3 +142,4 @@
 54928c8850f5498670dd43d6c9299f36de1a6746 jdk8-b18
 b73b733214aa43648d69a2da51e6b48fda902a2d jdk8-b19
 2b2818e3386f4510c390f6aea90d77e1c6a5bf9e jdk8-b20
+c266cab0e3fff05f2048c23046c14d60f7102175 jdk8-b21
--- a/jdk/.hgtags	Wed Jul 05 17:59:32 2017 +0200
+++ b/jdk/.hgtags	Wed Jul 05 18:00:12 2017 +0200
@@ -142,3 +142,4 @@
 334bd51fb3f321cd6777416ae7bafac71a84140a jdk8-b18
 3778f85773055e81eab6c5ef828935ecca241810 jdk8-b19
 39e938cd1b82ec3aab0a9aa66fd8a0457cd0c9c2 jdk8-b20
+664fa4fb0ee411ef048903c479f8b962fcdb2f4b jdk8-b21
--- a/langtools/.hgtags	Wed Jul 05 17:59:32 2017 +0200
+++ b/langtools/.hgtags	Wed Jul 05 18:00:12 2017 +0200
@@ -142,3 +142,4 @@
 ab1b1cc7857716914f2bb20b3128e5a8978290f7 jdk8-b18
 77b2c066084cbc75150efc6603a713c558329813 jdk8-b19
 ffd294128a48cbb90ce8f0569f82b61f1f164a18 jdk8-b20
+bcb21abf1c4177baf4574f99709513dcd4474727 jdk8-b21