jdk-sandbox: changeset 47612:b512c5781ca1

--- a/make/common/Modules.gmk	Thu Oct 05 18:29:47 2017 +0100
+++ b/make/common/Modules.gmk	Tue Oct 10 16:29:04 2017 +0200
@@ -113,6 +113,7 @@
     jdk.dynalink \
     jdk.httpserver \
     jdk.incubator.httpclient \
+    jdk.internal.vm.compiler.management \
     jdk.jsobject \
     jdk.localedata \
     jdk.naming.dns \
@@ -215,6 +216,7 @@
 
 ifeq ($(INCLUDE_GRAAL), false)
   MODULES_FILTER += jdk.internal.vm.compiler
+  MODULES_FILTER += jdk.internal.vm.compiler.management
 endif
 
 ################################################################################

--- a/make/conf/jib-profiles.js	Thu Oct 05 18:29:47 2017 +0100
+++ b/make/conf/jib-profiles.js	Tue Oct 10 16:29:04 2017 +0200
@@ -1063,7 +1063,7 @@
         jtreg: {
             server: "javare",
             revision: "4.2",
-            build_number: "b08",
+            build_number: "b09",
             checksum_file: "MD5_VALUES",
             file: "jtreg_bin-4.2.zip",
             environment_name: "JT_HOME",

--- a/make/gensrc/GensrcModuleLoaderMap.gmk	Thu Oct 05 18:29:47 2017 +0100
+++ b/make/gensrc/GensrcModuleLoaderMap.gmk	Tue Oct 10 16:29:04 2017 +0200
@@ -54,15 +54,4 @@
 
 GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java
 
-$(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat: \
-    $(TOPDIR)/src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat \
-    $(VARDEPS_FILE) $(BUILD_TOOLS_JDK)
-	$(MKDIR) -p $(@D)
-	$(RM) $@ $@.tmp
-	$(TOOL_GENCLASSLOADERMAP) -boot $(BOOT_MODULES_LIST) \
-	    -platform $(PLATFORM_MODULES_LIST) -o $@.tmp $<
-	$(MV) $@.tmp $@
-
-GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat
-
 ################################################################################

--- a/make/hotspot/lib/JvmFeatures.gmk	Thu Oct 05 18:29:47 2017 +0100
+++ b/make/hotspot/lib/JvmFeatures.gmk	Tue Oct 10 16:29:04 2017 +0200
@@ -47,6 +47,9 @@
 ifeq ($(call check-jvm-feature, zero), true)
   JVM_CFLAGS_FEATURES += -DZERO -DCC_INTERP -DZERO_LIBARCH='"$(OPENJDK_TARGET_CPU_LEGACY_LIB)"' $(LIBFFI_CFLAGS)
   JVM_LIBS_FEATURES += $(LIBFFI_LIBS)
+  ifeq ($(OPENJDK_TARGET_CPU), sparcv9)
+    BUILD_LIBJVM_EXTRA_FILES := $(TOPDIR)/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp
+  endif
 endif
 
 ifeq ($(call check-jvm-feature, shark), true)

--- a/make/jdk/src/classes/build/tools/module/GenModuleLoaderMap.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/make/jdk/src/classes/build/tools/module/GenModuleLoaderMap.java	Tue Oct 10 16:29:04 2017 +0200
@@ -77,30 +77,22 @@
             throw new IllegalArgumentException(source + " not exist");
         }
 
-        boolean needsQuotes = outfile.toString().contains(".java.tmp");
-
         try (BufferedWriter bw = Files.newBufferedWriter(outfile, StandardCharsets.UTF_8);
              PrintWriter writer = new PrintWriter(bw)) {
             for (String line : Files.readAllLines(source)) {
                 if (line.contains("@@BOOT_MODULE_NAMES@@")) {
-                    line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules, needsQuotes);
+                    line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules);
                 } else if (line.contains("@@PLATFORM_MODULE_NAMES@@")) {
-                    line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules, needsQuotes);
+                    line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules);
                 }
                 writer.println(line);
             }
         }
     }
 
-    private static String patch(String s, String tag, Stream<String> stream, boolean needsQuotes) {
-        String mns = null;
-        if (needsQuotes) {
-            mns = stream.sorted()
-                .collect(Collectors.joining("\",\n            \""));
-        } else {
-            mns = stream.sorted()
-                .collect(Collectors.joining("\n"));
-        }
+    private static String patch(String s, String tag, Stream<String> stream) {
+        String mns = stream.sorted()
+            .collect(Collectors.joining("\",\n            \""));
         return s.replace(tag, mns);
     }

--- a/make/test/JtregNativeHotspot.gmk	Thu Oct 05 18:29:47 2017 +0100
+++ b/make/test/JtregNativeHotspot.gmk	Tue Oct 10 16:29:04 2017 +0200
@@ -59,6 +59,7 @@
     $(TOPDIR)/test/hotspot/jtreg/runtime/SameObject \
     $(TOPDIR)/test/hotspot/jtreg/runtime/BoolReturn \
     $(TOPDIR)/test/hotspot/jtreg/runtime/noClassDefFoundMsg \
+    $(TOPDIR)/test/hotspot/jtreg/runtime/RedefineTests \
     $(TOPDIR)/test/hotspot/jtreg/compiler/floatingpoint/ \
     $(TOPDIR)/test/hotspot/jtreg/compiler/calls \
     $(TOPDIR)/test/hotspot/jtreg/serviceability/jvmti/GetOwnedMonitorInfo \
@@ -103,6 +104,7 @@
     BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAClassLoadPrepare := -lc
     BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAThreadStart := -lc
     BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libAllowedFunctions := -lc
+    BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libRedefineDoubleDelete := -lc
 endif
 
 ifeq ($(OPENJDK_TARGET_OS), linux)

--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -2840,6 +2840,44 @@
   bind(L_done);
 }
 
+// Code for BigInteger::mulAdd instrinsic
+// out     = r0
+// in      = r1
+// offset  = r2  (already out.length-offset)
+// len     = r3
+// k       = r4
+//
+// pseudo code from java implementation:
+// carry = 0;
+// offset = out.length-offset - 1;
+// for (int j=len-1; j >= 0; j--) {
+//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
+//     out[offset--] = (int)product;
+//     carry = product >>> 32;
+// }
+// return (int)carry;
+void MacroAssembler::mul_add(Register out, Register in, Register offset,
+      Register len, Register k) {
+    Label LOOP, END;
+    // pre-loop
+    cmp(len, zr); // cmp, not cbz/cbnz: to use condition twice => less branches
+    csel(out, zr, out, Assembler::EQ);
+    br(Assembler::EQ, END);
+    add(in, in, len, LSL, 2); // in[j+1] address
+    add(offset, out, offset, LSL, 2); // out[offset + 1] address
+    mov(out, zr); // used to keep carry now
+    BIND(LOOP);
+    ldrw(rscratch1, Address(pre(in, -4)));
+    madd(rscratch1, rscratch1, k, out);
+    ldrw(rscratch2, Address(pre(offset, -4)));
+    add(rscratch1, rscratch1, rscratch2);
+    strw(rscratch1, Address(offset));
+    lsr(out, rscratch1, 32);
+    subs(len, len, 1);
+    br(Assembler::NE, LOOP);
+    BIND(END);
+}
+
 /**
  * Emits code to update CRC-32 with a byte value according to constants in table
  *
@@ -3291,6 +3329,7 @@
   ldr(dst, Address(dst, ConstMethod::constants_offset()));
   ldr(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
   ldr(dst, Address(dst, mirror_offset));
+  resolve_oop_handle(dst);
 }
 
 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {

--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1265,6 +1265,7 @@
   void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
                        Register zlen, Register tmp1, Register tmp2, Register tmp3,
                        Register tmp4, Register tmp5, Register tmp6, Register tmp7);
+  void mul_add(Register out, Register in, Register offs, Register len, Register k);
   // ISB may be needed because of a safepoint
   void maybe_isb() { isb(); }

--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -3607,6 +3607,63 @@
     return start;
   }
 
+  address generate_squareToLen() {
+    // squareToLen algorithm for sizes 1..127 described in java code works
+    // faster than multiply_to_len on some CPUs and slower on others, but
+    // multiply_to_len shows a bit better overall results
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "squareToLen");
+    address start = __ pc();
+
+    const Register x     = r0;
+    const Register xlen  = r1;
+    const Register z     = r2;
+    const Register zlen  = r3;
+    const Register y     = r4; // == x
+    const Register ylen  = r5; // == xlen
+
+    const Register tmp1  = r10;
+    const Register tmp2  = r11;
+    const Register tmp3  = r12;
+    const Register tmp4  = r13;
+    const Register tmp5  = r14;
+    const Register tmp6  = r15;
+    const Register tmp7  = r16;
+
+    RegSet spilled_regs = RegSet::of(y, ylen);
+    BLOCK_COMMENT("Entry:");
+    __ enter();
+    __ push(spilled_regs, sp);
+    __ mov(y, x);
+    __ mov(ylen, xlen);
+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+    __ pop(spilled_regs, sp);
+    __ leave();
+    __ ret(lr);
+    return start;
+  }
+
+  address generate_mulAdd() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "mulAdd");
+
+    address start = __ pc();
+
+    const Register out     = r0;
+    const Register in      = r1;
+    const Register offset  = r2;
+    const Register len     = r3;
+    const Register k       = r4;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter();
+    __ mul_add(out, in, offset, len, k);
+    __ leave();
+    __ ret(lr);
+
+    return start;
+  }
+
   void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
                       FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
                       FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) {
@@ -4913,6 +4970,14 @@
       StubRoutines::_multiplyToLen = generate_multiplyToLen();
     }
 
+    if (UseSquareToLenIntrinsic) {
+      StubRoutines::_squareToLen = generate_squareToLen();
+    }
+
+    if (UseMulAddIntrinsic) {
+      StubRoutines::_mulAdd = generate_mulAdd();
+    }
+
     if (UseMontgomeryMultiplyIntrinsic) {
       StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
       MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);

--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -2297,6 +2297,7 @@
                                         ConstantPoolCacheEntry::f1_offset())));
     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ ldr(obj, Address(obj, mirror_offset));
+    __ resolve_oop_handle(obj);
   }
 }

--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -340,6 +340,14 @@
     UseMultiplyToLenIntrinsic = true;
   }
 
+  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
+    UseSquareToLenIntrinsic = true;
+  }
+
+  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
+    UseMulAddIntrinsic = true;
+  }
+
   if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) {
     UseBarriersForVolatile = (_features & CPU_DMB_ATOMICS) != 0;
   }

--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -2899,6 +2899,7 @@
   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
   ldr(mirror, Address(tmp, mirror_offset));
+  resolve_oop_handle(mirror);
 }

--- a/src/hotspot/cpu/arm/templateTable_arm.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/arm/templateTable_arm.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -2963,6 +2963,7 @@
              cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ ldr(Robj, Address(Robj, mirror_offset));
+    __ resolve_oop_handle(Robj);
   }
 }

--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -517,6 +517,9 @@
     XXPERMDI_OPCODE= (60u << OPCODE_SHIFT |   10u << 3),
     XXMRGHW_OPCODE = (60u << OPCODE_SHIFT |   18u << 3),
     XXMRGLW_OPCODE = (60u << OPCODE_SHIFT |   50u << 3),
+    XXSPLTW_OPCODE = (60u << OPCODE_SHIFT |  164u << 2),
+    XXLXOR_OPCODE  = (60u << OPCODE_SHIFT |  154u << 3),
+    XXLEQV_OPCODE  = (60u << OPCODE_SHIFT |  186u << 3),
 
     // Vector Permute and Formatting
     VPKPX_OPCODE   = (4u  << OPCODE_SHIFT |  782u     ),
@@ -1125,6 +1128,7 @@
   static int vsplti_sim(int        x)  { return  opp_u_field(x,             15, 11); } // for vsplti* instructions
   static int vsldoi_shb(int        x)  { return  opp_u_field(x,             25, 22); } // for vsldoi instruction
   static int vcmp_rc(   int        x)  { return  opp_u_field(x,             21, 21); } // for vcmp* instructions
+  static int xxsplt_uim(int        x)  { return  opp_u_field(x,             15, 14); } // for xxsplt* instructions
 
   //static int xo1(     int        x)  { return  opp_u_field(x,             29, 21); }// is contained in our opcodes
   //static int xo2(     int        x)  { return  opp_u_field(x,             30, 21); }// is contained in our opcodes
@@ -1308,6 +1312,7 @@
   inline void li(   Register d, int si16);
   inline void lis(  Register d, int si16);
   inline void addir(Register d, int si16, Register a);
+  inline void subi( Register d, Register a, int si16);
 
   static bool is_addi(int x) {
      return ADDI_OPCODE == (x & ADDI_OPCODE_MASK);
@@ -2154,6 +2159,11 @@
   inline void xxpermdi( VectorSRegister d, VectorSRegister a, VectorSRegister b, int dm);
   inline void xxmrghw(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
   inline void xxmrglw(  VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void mtvsrd(   VectorSRegister d, Register a);
+  inline void mtvsrwz(  VectorSRegister d, Register a);
+  inline void xxspltw(  VectorSRegister d, VectorSRegister b, int ui2);
+  inline void xxlxor(   VectorSRegister d, VectorSRegister a, VectorSRegister b);
+  inline void xxleqv(   VectorSRegister d, VectorSRegister a, VectorSRegister b);
 
   // VSX Extended Mnemonics
   inline void xxspltd(  VectorSRegister d, VectorSRegister a, int x);
@@ -2174,7 +2184,8 @@
   inline void vsbox(       VectorRegister d, VectorRegister a);
 
   // SHA (introduced with Power 8)
-  // Not yet implemented.
+  inline void vshasigmad(VectorRegister d, VectorRegister a, bool st, int six);
+  inline void vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six);
 
   // Vector Binary Polynomial Multiplication (introduced with Power 8)
   inline void vpmsumb(  VectorRegister d, VectorRegister a, VectorRegister b);
@@ -2285,6 +2296,11 @@
   inline void lvsl(  VectorRegister d, Register s2);
   inline void lvsr(  VectorRegister d, Register s2);
 
+  // Endianess specific concatenation of 2 loaded vectors.
+  inline void load_perm(VectorRegister perm, Register addr);
+  inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm);
+  inline void vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm);
+
   // RegisterOrConstant versions.
   // These emitters choose between the versions using two registers and
   // those with register and immediate, depending on the content of roc.

--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -164,6 +164,7 @@
 inline void Assembler::li(   Register d, int si16)             { Assembler::addi_r0ok( d, R0, si16); }
 inline void Assembler::lis(  Register d, int si16)             { Assembler::addis_r0ok(d, R0, si16); }
 inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
+inline void Assembler::subi( Register d, Register a, int si16) { Assembler::addi(d, a, -si16); }
 
 // PPC 1, section 3.3.9, Fixed-Point Compare Instructions
 inline void Assembler::cmpi(  ConditionRegister f, int l, Register a, int si16)   { emit_int32( CMPI_OPCODE  | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
@@ -760,9 +761,14 @@
 // Vector-Scalar (VSX) instructions.
 inline void Assembler::lxvd2x(  VectorSRegister d, Register s1)              { emit_int32( LXVD2X_OPCODE  | vsrt(d) | ra(0) | rb(s1)); }
 inline void Assembler::lxvd2x(  VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE  | vsrt(d) | ra0mem(s1) | rb(s2)); }
-inline void Assembler::stxvd2x( VectorSRegister d, Register s1)              { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
-inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
-inline void Assembler::mtvrd(   VectorRegister  d, Register a)               { emit_int32( MTVSRD_OPCODE  | vsrt(d->to_vsr()) | ra(a)); }
+inline void Assembler::stxvd2x( VectorSRegister d, Register s1)              { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra(0) | rb(s1)); }
+inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::mtvsrd(  VectorSRegister d, Register a)               { emit_int32( MTVSRD_OPCODE  | vsrt(d)  | ra(a)); }
+inline void Assembler::mtvsrwz( VectorSRegister d, Register a)               { emit_int32( MTVSRWZ_OPCODE | vsrt(d) | ra(a)); }
+inline void Assembler::xxspltw( VectorSRegister d, VectorSRegister b, int ui2)           { emit_int32( XXSPLTW_OPCODE | vsrt(d) | vsrb(b) | xxsplt_uim(uimm(ui2,2))); }
+inline void Assembler::xxlxor(  VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLXOR_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xxleqv(  VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLEQV_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::mtvrd(    VectorRegister d, Register a)               { emit_int32( MTVSRD_OPCODE  | vsrt(d->to_vsr()) | ra(a)); }
 inline void Assembler::mfvrd(   Register        a, VectorRegister d)         { emit_int32( MFVSRD_OPCODE  | vsrt(d->to_vsr()) | ra(a)); }
 inline void Assembler::mtvrwz(  VectorRegister  d, Register a)               { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
 inline void Assembler::mfvrwz(  Register        a, VectorRegister d)         { emit_int32( MFVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
@@ -925,7 +931,8 @@
 inline void Assembler::vsbox(       VectorRegister d, VectorRegister a)                   { emit_int32( VSBOX_OPCODE        | vrt(d) | vra(a)         ); }
 
 // SHA (introduced with Power 8)
-// Not yet implemented.
+inline void Assembler::vshasigmad(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAD_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
+inline void Assembler::vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAW_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
 
 // Vector Binary Polynomial Multiplication (introduced with Power 8)
 inline void Assembler::vpmsumb(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPMSUMB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
@@ -1034,6 +1041,30 @@
 inline void Assembler::lvsl(  VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE   | vrt(d) | rb(s2)); }
 inline void Assembler::lvsr(  VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE   | vrt(d) | rb(s2)); }
 
+inline void Assembler::load_perm(VectorRegister perm, Register addr) {
+#if defined(VM_LITTLE_ENDIAN)
+  lvsr(perm, addr);
+#else
+  lvsl(perm, addr);
+#endif
+}
+
+inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm) {
+#if defined(VM_LITTLE_ENDIAN)
+  vperm(first_dest, second, first_dest, perm);
+#else
+  vperm(first_dest, first_dest, second, perm);
+#endif
+}
+
+inline void Assembler::vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm) {
+#if defined(VM_LITTLE_ENDIAN)
+  vperm(dest, second, first, perm);
+#else
+  vperm(dest, first, second, perm);
+#endif
+}
+
 inline void Assembler::load_const(Register d, void* x, Register tmp) {
    load_const(d, (long)x, tmp);
 }

--- a/src/hotspot/cpu/ppc/globals_ppc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/globals_ppc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -32,7 +32,7 @@
 // Sets the default values for platform dependent flags used by the runtime system.
 // (see globals.hpp)
 
-define_pd_global(bool, ShareVtableStubs,      false); // Improves performance markedly for mtrt and compress.
+define_pd_global(bool, ShareVtableStubs,      true);
 define_pd_global(bool, NeedsDeoptSuspend,     false); // Only register window machines need this.
 
 
@@ -103,6 +103,9 @@
           "CPU Version: x for PowerX. Currently recognizes Power5 to "      \
           "Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
                                                                             \
+  product(bool, SuperwordUseVSX, false,                                     \
+          "Use Power8 VSX instructions for superword optimization.")        \
+                                                                            \
   /* Reoptimize code-sequences of calls at runtime, e.g. replace an */      \
   /* indirect call by a direct call.                                */      \
   product(bool, ReoptimizeCallSequences, true,                              \

--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -129,7 +129,7 @@
   }
 }
 
-int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
+address MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
   const int offset = MacroAssembler::offset_to_global_toc(addr);
 
   const address inst2_addr = a;
@@ -155,7 +155,7 @@
   assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
   set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset));
   set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset));
-  return (int)((intptr_t)addr - (intptr_t)inst1_addr);
+  return inst1_addr;
 }
 
 address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) {
@@ -201,7 +201,7 @@
 //    clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
 //    ori rx = rx | const.lo
 // Clrldi will be passed by.
-int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
+address MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
   assert(UseCompressedOops, "Should only patch compressed oops");
 
   const address inst2_addr = a;
@@ -227,7 +227,7 @@
 
   set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
   set_imm((int *)inst2_addr,        (xd)); // unsigned int
-  return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
+  return inst1_addr;
 }
 
 // Get compressed oop or klass constant.
@@ -3382,6 +3382,7 @@
   ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
   ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
   ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
+  resolve_oop_handle(mirror);
 }
 
 // Clear Array
@@ -5234,6 +5235,40 @@
   bind(L_post_third_loop_done);
 }   // multiply_128_x_128_loop
 
+void MacroAssembler::muladd(Register out, Register in,
+                            Register offset, Register len, Register k,
+                            Register tmp1, Register tmp2, Register carry) {
+
+  // Labels
+  Label LOOP, SKIP;
+
+  // Make sure length is positive.
+  cmpdi  (CCR0,    len,     0);
+
+  // Prepare variables
+  subi   (offset,  offset,  4);
+  li     (carry,   0);
+  ble    (CCR0,    SKIP);
+
+  mtctr  (len);
+  subi   (len,     len,     1    );
+  sldi   (len,     len,     2    );
+
+  // Main loop
+  bind(LOOP);
+  lwzx   (tmp1,    len,     in   );
+  lwzx   (tmp2,    offset,  out  );
+  mulld  (tmp1,    tmp1,    k    );
+  add    (tmp2,    carry,   tmp2 );
+  add    (tmp2,    tmp1,    tmp2 );
+  stwx   (tmp2,    offset,  out  );
+  srdi   (carry,   tmp2,    32   );
+  subi   (offset,  offset,  4    );
+  subi   (len,     len,     4    );
+  bdnz   (LOOP);
+  bind(SKIP);
+}
+
 void MacroAssembler::multiply_to_len(Register x, Register xlen,
                                      Register y, Register ylen,
                                      Register z, Register zlen,

--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -105,13 +105,15 @@
   };
 
   inline static bool is_calculate_address_from_global_toc_at(address a, address bound);
-  static int patch_calculate_address_from_global_toc_at(address a, address addr, address bound);
+  // Returns address of first instruction in sequence.
+  static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr);
   static address get_address_of_calculate_address_from_global_toc_at(address a, address addr);
 
 #ifdef _LP64
   // Patch narrow oop constant.
   inline static bool is_set_narrow_oop(address a, address bound);
-  static int patch_set_narrow_oop(address a, address bound, narrowOop data);
+  // Returns address of first instruction in sequence.
+  static address patch_set_narrow_oop(address a, address bound, narrowOop data);
   static narrowOop get_narrow_oop(address a, address bound);
 #endif
 
@@ -813,6 +815,8 @@
                                Register yz_idx, Register idx, Register carry,
                                Register product_high, Register product,
                                Register carry2, Register tmp);
+  void muladd(Register out, Register in, Register offset, Register len, Register k,
+              Register tmp1, Register tmp2, Register carry);
   void multiply_to_len(Register x, Register xlen,
                        Register y, Register ylen,
                        Register z, Register zlen,
@@ -862,6 +866,40 @@
   void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
                                   bool invertCRC);
 
+  // SHA-2 auxiliary functions and public interfaces
+ private:
+  void sha256_deque(const VectorRegister src,
+      const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
+  void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
+  void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
+  void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
+      const int total_ws, const Register k, const VectorRegister* kpws,
+      const int total_kpws);
+  void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
+      const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
+      const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
+      const Register j, const Register k);
+  void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
+      const VectorRegister c, const VectorRegister d, const VectorRegister e,
+      const VectorRegister f, const VectorRegister g, const VectorRegister h,
+      const Register hptr);
+
+  void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
+  void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
+  void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
+  void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
+  void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
+      const VectorRegister w2, const VectorRegister w3,
+      const VectorRegister w4, const VectorRegister w5,
+      const VectorRegister w6, const VectorRegister w7,
+      const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
+      const VectorRegister vRb, const Register k);
+
+ public:
+  void sha256(bool multi_block);
+  void sha512(bool multi_block);
+
+
   //
   // Debugging
   //

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,1136 @@
+// Copyright (c) 2017 Instituto de Pesquisas Eldorado. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+
+// Implemented according to "Descriptions of SHA-256, SHA-384, and SHA-512"
+// (http://www.iwar.org.uk/comsec/resources/cipher/sha256-384-512.pdf).
+
+#include "asm/macroAssembler.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+
+/**********************************************************************
+ * SHA 256
+ *********************************************************************/
+
+void MacroAssembler::sha256_deque(const VectorRegister src,
+                                  const VectorRegister dst1,
+                                  const VectorRegister dst2,
+                                  const VectorRegister dst3) {
+  vsldoi (dst1, src, src, 12);
+  vsldoi (dst2, src, src, 8);
+  vsldoi (dst3, src, src, 4);
+}
+
+void MacroAssembler::sha256_round(const VectorRegister* hs,
+                                  const int total_hs,
+                                  int& h_cnt,
+                                  const VectorRegister kpw) {
+  // convenience registers: cycle from 0-7 downwards
+  const VectorRegister a = hs[(total_hs + 0 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister b = hs[(total_hs + 1 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister c = hs[(total_hs + 2 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister d = hs[(total_hs + 3 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister e = hs[(total_hs + 4 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister f = hs[(total_hs + 5 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister g = hs[(total_hs + 6 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister h = hs[(total_hs + 7 - (h_cnt % total_hs)) % total_hs];
+  // temporaries
+  VectorRegister ch  = VR0;
+  VectorRegister maj = VR1;
+  VectorRegister bsa = VR2;
+  VectorRegister bse = VR3;
+  VectorRegister vt0 = VR4;
+  VectorRegister vt1 = VR5;
+  VectorRegister vt2 = VR6;
+  VectorRegister vt3 = VR7;
+
+  vsel       (ch,  g,   f, e);
+  vxor       (maj, a,   b);
+  vshasigmaw (bse, e,   1, 0xf);
+  vadduwm    (vt2, ch,  kpw);
+  vadduwm    (vt1, h,   bse);
+  vsel       (maj, b,   c, maj);
+  vadduwm    (vt3, vt1, vt2);
+  vshasigmaw (bsa, a,   1, 0);
+  vadduwm    (vt0, bsa, maj);
+
+  vadduwm    (d,   d,   vt3);
+  vadduwm    (h,   vt3, vt0);
+
+  // advance vector pointer to the next iteration
+  h_cnt++;
+}
+
+void MacroAssembler::sha256_load_h_vec(const VectorRegister a,
+                                       const VectorRegister e,
+                                       const Register hptr) {
+  // temporaries
+  Register tmp = R8;
+  VectorRegister vt0 = VR0;
+  VectorRegister vRb = VR6;
+  // labels
+  Label sha256_aligned;
+
+  andi_  (tmp,  hptr, 0xf);
+  lvx    (a,    hptr);
+  addi   (tmp,  hptr, 16);
+  lvx    (e,    tmp);
+  beq    (CCR0, sha256_aligned);
+
+  // handle unaligned accesses
+  load_perm(vRb, hptr);
+  addi   (tmp, hptr, 32);
+  vec_perm(a,   e,    vRb);
+
+  lvx    (vt0,  tmp);
+  vec_perm(e,   vt0,  vRb);
+
+  // aligned accesses
+  bind(sha256_aligned);
+}
+
+void MacroAssembler::sha256_load_w_plus_k_vec(const Register buf_in,
+                                              const VectorRegister* ws,
+                                              const int total_ws,
+                                              const Register k,
+                                              const VectorRegister* kpws,
+                                              const int total_kpws) {
+  Label w_aligned, after_w_load;
+
+  Register tmp       = R8;
+  VectorRegister vt0 = VR0;
+  VectorRegister vt1 = VR1;
+  VectorRegister vRb = VR6;
+
+  andi_ (tmp, buf_in, 0xF);
+  beq   (CCR0, w_aligned); // address ends with 0x0, not 0x8
+
+  // deal with unaligned addresses
+  lvx    (ws[0], buf_in);
+  load_perm(vRb, buf_in);
+
+  for (int n = 1; n < total_ws; n++) {
+    VectorRegister w_cur = ws[n];
+    VectorRegister w_prev = ws[n-1];
+
+    addi (tmp, buf_in, n * 16);
+    lvx  (w_cur, tmp);
+    vec_perm(w_prev, w_cur, vRb);
+  }
+  addi   (tmp, buf_in, total_ws * 16);
+  lvx    (vt0, tmp);
+  vec_perm(ws[total_ws-1], vt0, vRb);
+  b      (after_w_load);
+
+  bind(w_aligned);
+
+  // deal with aligned addresses
+  lvx(ws[0], buf_in);
+  for (int n = 1; n < total_ws; n++) {
+    VectorRegister w = ws[n];
+    addi (tmp, buf_in, n * 16);
+    lvx  (w, tmp);
+  }
+
+  bind(after_w_load);
+
+#if defined(VM_LITTLE_ENDIAN)
+  // Byte swapping within int values
+  li       (tmp, 8);
+  lvsl     (vt0, tmp);
+  vspltisb (vt1, 0xb);
+  vxor     (vt1, vt0, vt1);
+  for (int n = 0; n < total_ws; n++) {
+    VectorRegister w = ws[n];
+    vec_perm(w, w, vt1);
+  }
+#endif
+
+  // Loading k, which is always aligned to 16-bytes
+  lvx    (kpws[0], k);
+  for (int n = 1; n < total_kpws; n++) {
+    VectorRegister kpw = kpws[n];
+    addi (tmp, k, 16 * n);
+    lvx  (kpw, tmp);
+  }
+
+  // Add w to K
+  assert(total_ws == total_kpws, "Redesign the loop below");
+  for (int n = 0; n < total_kpws; n++) {
+    VectorRegister kpw = kpws[n];
+    VectorRegister w   = ws[n];
+
+    vadduwm  (kpw, kpw, w);
+  }
+}
+
+void MacroAssembler::sha256_calc_4w(const VectorRegister w0,
+                                    const VectorRegister w1,
+                                    const VectorRegister w2,
+                                    const VectorRegister w3,
+                                    const VectorRegister kpw0,
+                                    const VectorRegister kpw1,
+                                    const VectorRegister kpw2,
+                                    const VectorRegister kpw3,
+                                    const Register j,
+                                    const Register k) {
+  // Temporaries
+  const VectorRegister  vt0  = VR0;
+  const VectorRegister  vt1  = VR1;
+  const VectorSRegister vsrt1 = vt1->to_vsr();
+  const VectorRegister  vt2  = VR2;
+  const VectorRegister  vt3  = VR3;
+  const VectorSRegister vst3 = vt3->to_vsr();
+  const VectorRegister  vt4  = VR4;
+
+  // load to k[j]
+  lvx        (vt0, j,   k);
+
+  // advance j
+  addi       (j,   j,   16); // 16 bytes were read
+
+#if defined(VM_LITTLE_ENDIAN)
+  // b = w[j-15], w[j-14], w[j-13], w[j-12]
+  vsldoi     (vt1, w1,  w0, 12);
+
+  // c = w[j-7], w[j-6], w[j-5], w[j-4]
+  vsldoi     (vt2, w3,  w2, 12);
+
+#else
+  // b = w[j-15], w[j-14], w[j-13], w[j-12]
+  vsldoi     (vt1, w0,  w1, 4);
+
+  // c = w[j-7], w[j-6], w[j-5], w[j-4]
+  vsldoi     (vt2, w2,  w3, 4);
+#endif
+
+  // d = w[j-2], w[j-1], w[j-4], w[j-3]
+  vsldoi     (vt3, w3,  w3, 8);
+
+  // b = s0(w[j-15]) , s0(w[j-14]) , s0(w[j-13]) , s0(w[j-12])
+  vshasigmaw (vt1, vt1, 0,  0);
+
+  // d = s1(w[j-2]) , s1(w[j-1]) , s1(w[j-4]) , s1(w[j-3])
+  vshasigmaw (vt3, vt3, 0,  0xf);
+
+  // c = s0(w[j-15]) + w[j-7],
+  //     s0(w[j-14]) + w[j-6],
+  //     s0(w[j-13]) + w[j-5],
+  //     s0(w[j-12]) + w[j-4]
+  vadduwm    (vt2, vt1, vt2);
+
+  // c = s0(w[j-15]) + w[j-7] + w[j-16],
+  //     s0(w[j-14]) + w[j-6] + w[j-15],
+  //     s0(w[j-13]) + w[j-5] + w[j-14],
+  //     s0(w[j-12]) + w[j-4] + w[j-13]
+  vadduwm    (vt2, vt2, w0);
+
+  // e = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
+  //     s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
+  //     s0(w[j-13]) + w[j-5] + w[j-14] + s1(w[j-4]), // UNDEFINED
+  //     s0(w[j-12]) + w[j-4] + w[j-13] + s1(w[j-3])  // UNDEFINED
+  vadduwm    (vt4, vt2, vt3);
+
+  // At this point, e[0] and e[1] are the correct values to be stored at w[j]
+  // and w[j+1].
+  // e[2] and e[3] are not considered.
+  // b = s1(w[j]) , s1(s(w[j+1]) , UNDEFINED , UNDEFINED
+  vshasigmaw (vt1, vt4, 0,  0xf);
+
+  // v5 = s1(w[j-2]) , s1(w[j-1]) , s1(w[j]) , s1(w[j+1])
+#if defined(VM_LITTLE_ENDIAN)
+  xxmrgld    (vst3, vsrt1, vst3);
+#else
+  xxmrghd    (vst3, vst3, vsrt1);
+#endif
+
+  // c = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
+  //     s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
+  //     s0(w[j-13]) + w[j-5] + w[j-14] + s1(w[j]),   // w[j+2]
+  //     s0(w[j-12]) + w[j-4] + w[j-13] + s1(w[j+1])  // w[j+4]
+  vadduwm    (vt2, vt2, vt3);
+
+  // Updating w0 to w3 to hold the new previous 16 values from w.
+  vmr        (w0,  w1);
+  vmr        (w1,  w2);
+  vmr        (w2,  w3);
+  vmr        (w3,  vt2);
+
+  // store k + w to v9 (4 values at once)
+#if defined(VM_LITTLE_ENDIAN)
+  vadduwm    (kpw0, vt2, vt0);
+
+  vsldoi     (kpw1, kpw0, kpw0, 12);
+  vsldoi     (kpw2, kpw0, kpw0, 8);
+  vsldoi     (kpw3, kpw0, kpw0, 4);
+#else
+  vadduwm    (kpw3, vt2, vt0);
+
+  vsldoi     (kpw2, kpw3, kpw3, 12);
+  vsldoi     (kpw1, kpw3, kpw3, 8);
+  vsldoi     (kpw0, kpw3, kpw3, 4);
+#endif
+}
+
+void MacroAssembler::sha256_update_sha_state(const VectorRegister a,
+                                             const VectorRegister b_,
+                                             const VectorRegister c,
+                                             const VectorRegister d,
+                                             const VectorRegister e,
+                                             const VectorRegister f,
+                                             const VectorRegister g,
+                                             const VectorRegister h,
+                                             const Register hptr) {
+  // temporaries
+  VectorRegister vt0  = VR0;
+  VectorRegister vt1  = VR1;
+  VectorRegister vt2  = VR2;
+  VectorRegister vt3  = VR3;
+  VectorRegister vt4  = VR4;
+  VectorRegister vt5  = VR5;
+  VectorRegister vaux = VR6;
+  VectorRegister vRb  = VR6;
+  Register tmp        = R8;
+  Register of16       = R8;
+  Register of32       = R9;
+  Label state_load_aligned;
+
+  // Load hptr
+  andi_   (tmp, hptr, 0xf);
+  li      (of16, 16);
+  lvx     (vt0, hptr);
+  lvx     (vt5, of16, hptr);
+  beq     (CCR0, state_load_aligned);
+
+  // handle unaligned accesses
+  li      (of32, 32);
+  load_perm(vRb, hptr);
+
+  vec_perm(vt0, vt5,  vRb);        // vt0 = hptr[0]..hptr[3]
+
+  lvx     (vt1, hptr, of32);
+  vec_perm(vt5, vt1,  vRb);        // vt5 = hptr[4]..hptr[7]
+
+  // aligned accesses
+  bind(state_load_aligned);
+
+#if defined(VM_LITTLE_ENDIAN)
+  vmrglw  (vt1, b_, a);            // vt1 = {a, b, ?, ?}
+  vmrglw  (vt2, d, c);             // vt2 = {c, d, ?, ?}
+  vmrglw  (vt3, f, e);             // vt3 = {e, f, ?, ?}
+  vmrglw  (vt4, h, g);             // vt4 = {g, h, ?, ?}
+  xxmrgld (vt1->to_vsr(), vt2->to_vsr(), vt1->to_vsr()); // vt1 = {a, b, c, d}
+  xxmrgld (vt3->to_vsr(), vt4->to_vsr(), vt3->to_vsr()); // vt3 = {e, f, g, h}
+  vadduwm (a,   vt0, vt1);         // a = {a+hptr[0], b+hptr[1], c+hptr[2], d+hptr[3]}
+  vadduwm (e,   vt5, vt3);         // e = {e+hptr[4], f+hptr[5], g+hptr[6], h+hptr[7]}
+
+  // Save hptr back, works for any alignment
+  xxswapd (vt0->to_vsr(), a->to_vsr());
+  stxvd2x (vt0->to_vsr(), hptr);
+  xxswapd (vt5->to_vsr(), e->to_vsr());
+  stxvd2x (vt5->to_vsr(), of16, hptr);
+#else
+  vmrglw  (vt1, a, b_);            // vt1 = {a, b, ?, ?}
+  vmrglw  (vt2, c, d);             // vt2 = {c, d, ?, ?}
+  vmrglw  (vt3, e, f);             // vt3 = {e, f, ?, ?}
+  vmrglw  (vt4, g, h);             // vt4 = {g, h, ?, ?}
+  xxmrgld (vt1->to_vsr(), vt1->to_vsr(), vt2->to_vsr()); // vt1 = {a, b, c, d}
+  xxmrgld (vt3->to_vsr(), vt3->to_vsr(), vt4->to_vsr()); // vt3 = {e, f, g, h}
+  vadduwm (d,   vt0, vt1);         // d = {a+hptr[0], b+hptr[1], c+hptr[2], d+hptr[3]}
+  vadduwm (h,   vt5, vt3);         // h = {e+hptr[4], f+hptr[5], g+hptr[6], h+hptr[7]}
+
+  // Save hptr back, works for any alignment
+  stxvd2x (d->to_vsr(), hptr);
+  stxvd2x (h->to_vsr(), of16, hptr);
+#endif
+}
+
+static const uint32_t sha256_round_table[64] __attribute((aligned(16))) = {
+  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+static const uint32_t *sha256_round_consts = sha256_round_table;
+
+//   R3_ARG1   - byte[]  Input string with padding but in Big Endian
+//   R4_ARG2   - int[]   SHA.state (at first, the root of primes)
+//   R5_ARG3   - int     offset
+//   R6_ARG4   - int     limit
+//
+//   Internal Register usage:
+//   R7        - k
+//   R8        - tmp | j | of16
+//   R9        - of32
+//   VR0-VR8   - ch, maj, bsa, bse, vt0-vt3 | vt0-vt5, vaux/vRb
+//   VR9-VR16  - a-h
+//   VR17-VR20 - w0-w3
+//   VR21-VR23 - vRb | vaux0-vaux2
+//   VR24-VR27 - kpw0-kpw3
+void MacroAssembler::sha256(bool multi_block) {
+  static const ssize_t buf_size = 64;
+  static const uint8_t w_size = sizeof(sha256_round_table)/sizeof(uint32_t);
+#ifdef AIX
+  // malloc provides 16 byte alignment
+  if (((uintptr_t)sha256_round_consts & 0xF) != 0) {
+    uint32_t *new_round_consts = (uint32_t*)malloc(sizeof(sha256_round_table));
+    guarantee(new_round_consts, "oom");
+    memcpy(new_round_consts, sha256_round_consts, sizeof(sha256_round_table));
+    sha256_round_consts = (const uint32_t*)new_round_consts;
+  }
+#endif
+
+  Register buf_in = R3_ARG1;
+  Register state  = R4_ARG2;
+  Register ofs    = R5_ARG3;
+  Register limit  = R6_ARG4;
+
+  Label sha_loop, core_loop;
+
+  // Save non-volatile vector registers in the red zone
+  static const VectorRegister nv[] = {
+    VR20, VR21, VR22, VR23, VR24, VR25, VR26, VR27/*, VR28, VR29, VR30, VR31*/
+  };
+  static const uint8_t nv_size = sizeof(nv) / sizeof (VectorRegister);
+
+  for (int c = 0; c < nv_size; c++) {
+    Register tmp = R8;
+    li  (tmp, (c - (nv_size)) * 16);
+    stvx(nv[c], tmp, R1);
+  }
+
+  // Load hash state to registers
+  VectorRegister a = VR9;
+  VectorRegister b = VR10;
+  VectorRegister c = VR11;
+  VectorRegister d = VR12;
+  VectorRegister e = VR13;
+  VectorRegister f = VR14;
+  VectorRegister g = VR15;
+  VectorRegister h = VR16;
+  static const VectorRegister hs[] = {a, b, c, d, e, f, g, h};
+  static const int total_hs = sizeof(hs)/sizeof(VectorRegister);
+  // counter for cycling through hs vector to avoid register moves between iterations
+  int h_cnt = 0;
+
+  // Load a-h registers from the memory pointed by state
+#if defined(VM_LITTLE_ENDIAN)
+  sha256_load_h_vec(a, e, state);
+#else
+  sha256_load_h_vec(d, h, state);
+#endif
+
+  // keep k loaded also during MultiBlock loops
+  Register k = R7;
+  assert(((uintptr_t)sha256_round_consts & 0xF) == 0, "k alignment");
+  load_const_optimized(k, (address)sha256_round_consts, R0);
+
+  // Avoiding redundant loads
+  if (multi_block) {
+    align(OptoLoopAlignment);
+  }
+  bind(sha_loop);
+#if defined(VM_LITTLE_ENDIAN)
+  sha256_deque(a, b, c, d);
+  sha256_deque(e, f, g, h);
+#else
+  sha256_deque(d, c, b, a);
+  sha256_deque(h, g, f, e);
+#endif
+
+  // Load 16 elements from w out of the loop.
+  // Order of the int values is Endianess specific.
+  VectorRegister w0 = VR17;
+  VectorRegister w1 = VR18;
+  VectorRegister w2 = VR19;
+  VectorRegister w3 = VR20;
+  static const VectorRegister ws[] = {w0, w1, w2, w3};
+  static const int total_ws = sizeof(ws)/sizeof(VectorRegister);
+
+  VectorRegister kpw0 = VR24;
+  VectorRegister kpw1 = VR25;
+  VectorRegister kpw2 = VR26;
+  VectorRegister kpw3 = VR27;
+  static const VectorRegister kpws[] = {kpw0, kpw1, kpw2, kpw3};
+  static const int total_kpws = sizeof(kpws)/sizeof(VectorRegister);
+
+  sha256_load_w_plus_k_vec(buf_in, ws, total_ws, k, kpws, total_kpws);
+
+  // Cycle through the first 16 elements
+  assert(total_ws == total_kpws, "Redesign the loop below");
+  for (int n = 0; n < total_ws; n++) {
+    VectorRegister vaux0 = VR21;
+    VectorRegister vaux1 = VR22;
+    VectorRegister vaux2 = VR23;
+
+    sha256_deque(kpws[n], vaux0, vaux1, vaux2);
+
+#if defined(VM_LITTLE_ENDIAN)
+    sha256_round(hs, total_hs, h_cnt, kpws[n]);
+    sha256_round(hs, total_hs, h_cnt, vaux0);
+    sha256_round(hs, total_hs, h_cnt, vaux1);
+    sha256_round(hs, total_hs, h_cnt, vaux2);
+#else
+    sha256_round(hs, total_hs, h_cnt, vaux2);
+    sha256_round(hs, total_hs, h_cnt, vaux1);
+    sha256_round(hs, total_hs, h_cnt, vaux0);
+    sha256_round(hs, total_hs, h_cnt, kpws[n]);
+#endif
+  }
+
+  Register tmp = R8;
+  // loop the 16th to the 64th iteration by 8 steps
+  li   (tmp, (w_size - 16) / total_hs);
+  mtctr(tmp);
+
+  // j will be aligned to 4 for loading words.
+  // Whenever read, advance the pointer (e.g: when j is used in a function)
+  Register j = R8;
+  li   (j, 16*4);
+
+  align(OptoLoopAlignment);
+  bind(core_loop);
+
+  // due to VectorRegister rotate, always iterate in multiples of total_hs
+  for (int n = 0; n < total_hs/4; n++) {
+    sha256_calc_4w(w0, w1, w2, w3, kpw0, kpw1, kpw2, kpw3, j, k);
+    sha256_round(hs, total_hs, h_cnt, kpw0);
+    sha256_round(hs, total_hs, h_cnt, kpw1);
+    sha256_round(hs, total_hs, h_cnt, kpw2);
+    sha256_round(hs, total_hs, h_cnt, kpw3);
+  }
+
+  bdnz   (core_loop);
+
+  // Update hash state
+  sha256_update_sha_state(a, b, c, d, e, f, g, h, state);
+
+  if (multi_block) {
+    addi(buf_in, buf_in, buf_size);
+    addi(ofs, ofs, buf_size);
+    cmplw(CCR0, ofs, limit);
+    ble(CCR0, sha_loop);
+
+    // return ofs
+    mr(R3_RET, ofs);
+  }
+
+  // Restore non-volatile registers
+  for (int c = 0; c < nv_size; c++) {
+    Register tmp = R8;
+    li  (tmp, (c - (nv_size)) * 16);
+    lvx(nv[c], tmp, R1);
+  }
+}
+
+
+/**********************************************************************
+ * SHA 512
+ *********************************************************************/
+
+void MacroAssembler::sha512_load_w_vec(const Register buf_in,
+                                       const VectorRegister* ws,
+                                       const int total_ws) {
+  Register tmp       = R8;
+  VectorRegister vRb = VR8;
+  VectorRegister aux = VR9;
+  Label is_aligned, after_alignment;
+
+  andi_  (tmp, buf_in, 0xF);
+  beq    (CCR0, is_aligned); // address ends with 0x0, not 0x8
+
+  // deal with unaligned addresses
+  lvx    (ws[0], buf_in);
+  load_perm(vRb, buf_in);
+
+  for (int n = 1; n < total_ws; n++) {
+    VectorRegister w_cur = ws[n];
+    VectorRegister w_prev = ws[n-1];
+    addi (tmp, buf_in, n * 16);
+    lvx  (w_cur, tmp);
+    vec_perm(w_prev, w_cur, vRb);
+  }
+  addi   (tmp, buf_in, total_ws * 16);
+  lvx    (aux, tmp);
+  vec_perm(ws[total_ws-1], aux, vRb);
+  b      (after_alignment);
+
+  bind(is_aligned);
+  lvx  (ws[0], buf_in);
+  for (int n = 1; n < total_ws; n++) {
+    VectorRegister w = ws[n];
+    addi (tmp, buf_in, n * 16);
+    lvx  (w, tmp);
+  }
+
+  bind(after_alignment);
+}
+
+// Update hash state
+void MacroAssembler::sha512_update_sha_state(const Register state,
+                                             const VectorRegister* hs,
+                                             const int total_hs) {
+
+#if defined(VM_LITTLE_ENDIAN)
+  int start_idx = 0;
+#else
+  int start_idx = 1;
+#endif
+
+  // load initial hash from the memory pointed by state
+  VectorRegister ini_a = VR10;
+  VectorRegister ini_c = VR12;
+  VectorRegister ini_e = VR14;
+  VectorRegister ini_g = VR16;
+  static const VectorRegister inis[] = {ini_a, ini_c, ini_e, ini_g};
+  static const int total_inis = sizeof(inis)/sizeof(VectorRegister);
+
+  Label state_save_aligned, after_state_save_aligned;
+
+  Register addr      = R7;
+  Register tmp       = R8;
+  VectorRegister vRb = VR8;
+  VectorRegister aux = VR9;
+
+  andi_(tmp, state, 0xf);
+  beq(CCR0, state_save_aligned);
+  // deal with unaligned addresses
+
+  {
+    VectorRegister a = hs[0];
+    VectorRegister b_ = hs[1];
+    VectorRegister c = hs[2];
+    VectorRegister d = hs[3];
+    VectorRegister e = hs[4];
+    VectorRegister f = hs[5];
+    VectorRegister g = hs[6];
+    VectorRegister h = hs[7];
+    load_perm(vRb, state);
+    lvx    (ini_a, state);
+    addi   (addr, state, 16);
+
+    lvx    (ini_c, addr);
+    addi   (addr, state, 32);
+    vec_perm(ini_a, ini_c, vRb);
+
+    lvx    (ini_e, addr);
+    addi   (addr, state, 48);
+    vec_perm(ini_c, ini_e, vRb);
+
+    lvx    (ini_g, addr);
+    addi   (addr, state, 64);
+    vec_perm(ini_e, ini_g, vRb);
+
+    lvx    (aux, addr);
+    vec_perm(ini_g, aux, vRb);
+
+#if defined(VM_LITTLE_ENDIAN)
+    xxmrgld(a->to_vsr(), b_->to_vsr(), a->to_vsr());
+    xxmrgld(c->to_vsr(), d->to_vsr(), c->to_vsr());
+    xxmrgld(e->to_vsr(), f->to_vsr(), e->to_vsr());
+    xxmrgld(g->to_vsr(), h->to_vsr(), g->to_vsr());
+#else
+    xxmrgld(b_->to_vsr(), a->to_vsr(), b_->to_vsr());
+    xxmrgld(d->to_vsr(), c->to_vsr(), d->to_vsr());
+    xxmrgld(f->to_vsr(), e->to_vsr(), f->to_vsr());
+    xxmrgld(h->to_vsr(), g->to_vsr(), h->to_vsr());
+#endif
+
+    for (int n = start_idx; n < total_hs; n += 2) {
+      VectorRegister h_cur = hs[n];
+      VectorRegister ini_cur = inis[n/2];
+
+      vaddudm(h_cur, ini_cur, h_cur);
+    }
+
+    for (int n = start_idx; n < total_hs; n += 2) {
+      VectorRegister h_cur = hs[n];
+
+      mfvrd  (tmp, h_cur);
+#if defined(VM_LITTLE_ENDIAN)
+      std    (tmp, 8*n + 8, state);
+#else
+      std    (tmp, 8*n - 8, state);
+#endif
+      vsldoi (aux, h_cur, h_cur, 8);
+      mfvrd  (tmp, aux);
+      std    (tmp, 8*n + 0, state);
+    }
+
+    b      (after_state_save_aligned);
+  }
+
+  bind(state_save_aligned);
+  {
+    for (int n = 0; n < total_hs; n += 2) {
+#if defined(VM_LITTLE_ENDIAN)
+      VectorRegister h_cur = hs[n];
+      VectorRegister h_next = hs[n+1];
+#else
+      VectorRegister h_cur = hs[n+1];
+      VectorRegister h_next = hs[n];
+#endif
+      VectorRegister ini_cur = inis[n/2];
+
+      if (n/2 == 0) {
+        lvx(ini_cur, state);
+      } else {
+        addi(addr, state, (n/2) * 16);
+        lvx(ini_cur, addr);
+      }
+      xxmrgld(h_cur->to_vsr(), h_next->to_vsr(), h_cur->to_vsr());
+    }
+
+    for (int n = start_idx; n < total_hs; n += 2) {
+      VectorRegister h_cur = hs[n];
+      VectorRegister ini_cur = inis[n/2];
+
+      vaddudm(h_cur, ini_cur, h_cur);
+    }
+
+    for (int n = start_idx; n < total_hs; n += 2) {
+      VectorRegister h_cur = hs[n];
+
+      if (n/2 == 0) {
+        stvx(h_cur, state);
+      } else {
+        addi(addr, state, (n/2) * 16);
+        stvx(h_cur, addr);
+      }
+    }
+  }
+
+  bind(after_state_save_aligned);
+}
+
+// Use h_cnt to cycle through hs elements but also increment it at the end
+void MacroAssembler::sha512_round(const VectorRegister* hs,
+                                  const int total_hs, int& h_cnt,
+                                  const VectorRegister kpw) {
+
+  // convenience registers: cycle from 0-7 downwards
+  const VectorRegister a = hs[(total_hs + 0 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister b = hs[(total_hs + 1 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister c = hs[(total_hs + 2 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister d = hs[(total_hs + 3 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister e = hs[(total_hs + 4 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister f = hs[(total_hs + 5 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister g = hs[(total_hs + 6 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister h = hs[(total_hs + 7 - (h_cnt % total_hs)) % total_hs];
+  // temporaries
+  const VectorRegister Ch   = VR20;
+  const VectorRegister Maj  = VR21;
+  const VectorRegister bsa  = VR22;
+  const VectorRegister bse  = VR23;
+  const VectorRegister tmp1 = VR24;
+  const VectorRegister tmp2 = VR25;
+
+  vsel      (Ch,   g,    f,   e);
+  vxor      (Maj,  a,    b);
+  vshasigmad(bse,  e,    1,   0xf);
+  vaddudm   (tmp2, Ch,   kpw);
+  vaddudm   (tmp1, h,    bse);
+  vsel      (Maj,  b,    c,   Maj);
+  vaddudm   (tmp1, tmp1, tmp2);
+  vshasigmad(bsa,  a,    1,   0);
+  vaddudm   (tmp2, bsa,  Maj);
+  vaddudm   (d,    d,    tmp1);
+  vaddudm   (h,    tmp1, tmp2);
+
+  // advance vector pointer to the next iteration
+  h_cnt++;
+}
+
+void MacroAssembler::sha512_calc_2w(const VectorRegister w0,
+                                    const VectorRegister w1,
+                                    const VectorRegister w2,
+                                    const VectorRegister w3,
+                                    const VectorRegister w4,
+                                    const VectorRegister w5,
+                                    const VectorRegister w6,
+                                    const VectorRegister w7,
+                                    const VectorRegister kpw0,
+                                    const VectorRegister kpw1,
+                                    const Register j,
+                                    const VectorRegister vRb,
+                                    const Register k) {
+  // Temporaries
+  const VectorRegister VR_a = VR20;
+  const VectorRegister VR_b = VR21;
+  const VectorRegister VR_c = VR22;
+  const VectorRegister VR_d = VR23;
+
+  // load to k[j]
+  lvx        (VR_a, j,    k);
+  // advance j
+  addi       (j,    j,    16); // 16 bytes were read
+
+#if defined(VM_LITTLE_ENDIAN)
+  // v6 = w[j-15], w[j-14]
+  vperm      (VR_b, w1,   w0,  vRb);
+  // v12 = w[j-7], w[j-6]
+  vperm      (VR_c, w5,   w4,  vRb);
+#else
+  // v6 = w[j-15], w[j-14]
+  vperm      (VR_b, w0,   w1,  vRb);
+  // v12 = w[j-7], w[j-6]
+  vperm      (VR_c, w4,   w5,  vRb);
+#endif
+
+  // v6 = s0(w[j-15]) , s0(w[j-14])
+  vshasigmad (VR_b, VR_b,    0,   0);
+  // v5 = s1(w[j-2]) , s1(w[j-1])
+  vshasigmad (VR_d, w7,      0,   0xf);
+  // v6 = s0(w[j-15]) + w[j-7] , s0(w[j-14]) + w[j-6]
+  vaddudm    (VR_b, VR_b, VR_c);
+  // v8 = s1(w[j-2]) + w[j-16] , s1(w[j-1]) + w[j-15]
+  vaddudm    (VR_d, VR_d, w0);
+  // v9 = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
+  //      s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
+  vaddudm    (VR_c, VR_d, VR_b);
+  // Updating w0 to w7 to hold the new previous 16 values from w.
+  vmr        (w0,   w1);
+  vmr        (w1,   w2);
+  vmr        (w2,   w3);
+  vmr        (w3,   w4);
+  vmr        (w4,   w5);
+  vmr        (w5,   w6);
+  vmr        (w6,   w7);
+  vmr        (w7,   VR_c);
+
+#if defined(VM_LITTLE_ENDIAN)
+  // store k + w to kpw0 (2 values at once)
+  vaddudm    (kpw0, VR_c, VR_a);
+  // kpw1 holds (k + w)[1]
+  vsldoi     (kpw1, kpw0, kpw0, 8);
+#else
+  // store k + w to kpw0 (2 values at once)
+  vaddudm    (kpw1, VR_c, VR_a);
+  // kpw1 holds (k + w)[1]
+  vsldoi     (kpw0, kpw1, kpw1, 8);
+#endif
+}
+
+void MacroAssembler::sha512_load_h_vec(const Register state,
+                                       const VectorRegister* hs,
+                                       const int total_hs) {
+#if defined(VM_LITTLE_ENDIAN)
+  VectorRegister a   = hs[0];
+  VectorRegister g   = hs[6];
+  int start_idx = 0;
+#else
+  VectorRegister a   = hs[1];
+  VectorRegister g   = hs[7];
+  int start_idx = 1;
+#endif
+
+  Register addr      = R7;
+  VectorRegister vRb = VR8;
+  Register tmp       = R8;
+  Label state_aligned, after_state_aligned;
+
+  andi_(tmp, state, 0xf);
+  beq(CCR0, state_aligned);
+
+  // deal with unaligned addresses
+  VectorRegister aux = VR9;
+
+  lvx(hs[start_idx], state);
+  load_perm(vRb, state);
+
+  for (int n = start_idx + 2; n < total_hs; n += 2) {
+    VectorRegister h_cur   = hs[n];
+    VectorRegister h_prev2 = hs[n - 2];
+    addi(addr, state, (n/2) * 16);
+    lvx(h_cur, addr);
+    vec_perm(h_prev2, h_cur, vRb);
+  }
+  addi(addr, state, (total_hs/2) * 16);
+  lvx    (aux, addr);
+  vec_perm(hs[total_hs - 2 + start_idx], aux, vRb);
+  b      (after_state_aligned);
+
+  bind(state_aligned);
+
+  // deal with aligned addresses
+  lvx(hs[start_idx], state);
+
+  for (int n = start_idx + 2; n < total_hs; n += 2) {
+    VectorRegister h_cur = hs[n];
+    addi(addr, state, (n/2) * 16);
+    lvx(h_cur, addr);
+  }
+
+  bind(after_state_aligned);
+}
+
+static const uint64_t sha512_round_table[80] __attribute((aligned(16))) = {
+  0x428a2f98d728ae22, 0x7137449123ef65cd,
+  0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
+  0x3956c25bf348b538, 0x59f111f1b605d019,
+  0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
+  0xd807aa98a3030242, 0x12835b0145706fbe,
+  0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
+  0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
+  0x9bdc06a725c71235, 0xc19bf174cf692694,
+  0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
+  0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
+  0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
+  0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
+  0x983e5152ee66dfab, 0xa831c66d2db43210,
+  0xb00327c898fb213f, 0xbf597fc7beef0ee4,
+  0xc6e00bf33da88fc2, 0xd5a79147930aa725,
+  0x06ca6351e003826f, 0x142929670a0e6e70,
+  0x27b70a8546d22ffc, 0x2e1b21385c26c926,
+  0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
+  0x650a73548baf63de, 0x766a0abb3c77b2a8,
+  0x81c2c92e47edaee6, 0x92722c851482353b,
+  0xa2bfe8a14cf10364, 0xa81a664bbc423001,
+  0xc24b8b70d0f89791, 0xc76c51a30654be30,
+  0xd192e819d6ef5218, 0xd69906245565a910,
+  0xf40e35855771202a, 0x106aa07032bbd1b8,
+  0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
+  0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
+  0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
+  0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
+  0x748f82ee5defb2fc, 0x78a5636f43172f60,
+  0x84c87814a1f0ab72, 0x8cc702081a6439ec,
+  0x90befffa23631e28, 0xa4506cebde82bde9,
+  0xbef9a3f7b2c67915, 0xc67178f2e372532b,
+  0xca273eceea26619c, 0xd186b8c721c0c207,
+  0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
+  0x06f067aa72176fba, 0x0a637dc5a2c898a6,
+  0x113f9804bef90dae, 0x1b710b35131c471b,
+  0x28db77f523047d84, 0x32caab7b40c72493,
+  0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
+  0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
+  0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
+};
+static const uint64_t *sha512_round_consts = sha512_round_table;
+
+//   R3_ARG1   - byte[]  Input string with padding but in Big Endian
+//   R4_ARG2   - int[]   SHA.state (at first, the root of primes)
+//   R5_ARG3   - int     offset
+//   R6_ARG4   - int     limit
+//
+//   Internal Register usage:
+//   R7 R8 R9  - volatile temporaries
+//   VR0-VR7   - a-h
+//   VR8       - vRb
+//   VR9       - aux (highly volatile, use with care)
+//   VR10-VR17 - w0-w7 | ini_a-ini_h
+//   VR18      - vsp16 | kplusw0
+//   VR19      - vsp32 | kplusw1
+//   VR20-VR25 - sha512_calc_2w and sha512_round temporaries
+void MacroAssembler::sha512(bool multi_block) {
+  static const ssize_t buf_size = 128;
+  static const uint8_t w_size = sizeof(sha512_round_table)/sizeof(uint64_t);
+#ifdef AIX
+  // malloc provides 16 byte alignment
+  if (((uintptr_t)sha512_round_consts & 0xF) != 0) {
+    uint64_t *new_round_consts = (uint64_t*)malloc(sizeof(sha512_round_table));
+    guarantee(new_round_consts, "oom");
+    memcpy(new_round_consts, sha512_round_consts, sizeof(sha512_round_table));
+    sha512_round_consts = (const uint64_t*)new_round_consts;
+  }
+#endif
+
+  Register buf_in = R3_ARG1;
+  Register state  = R4_ARG2;
+  Register ofs    = R5_ARG3;
+  Register limit  = R6_ARG4;
+
+  Label sha_loop, core_loop;
+
+  // Save non-volatile vector registers in the red zone
+  static const VectorRegister nv[] = {
+    VR20, VR21, VR22, VR23, VR24, VR25/*, VR26, VR27, VR28, VR29, VR30, VR31*/
+  };
+  static const uint8_t nv_size = sizeof(nv) / sizeof (VectorRegister);
+
+  for (int c = 0; c < nv_size; c++) {
+    Register idx = R7;
+    li  (idx, (c - (nv_size)) * 16);
+    stvx(nv[c], idx, R1);
+  }
+
+  // Load hash state to registers
+  VectorRegister a = VR0;
+  VectorRegister b = VR1;
+  VectorRegister c = VR2;
+  VectorRegister d = VR3;
+  VectorRegister e = VR4;
+  VectorRegister f = VR5;
+  VectorRegister g = VR6;
+  VectorRegister h = VR7;
+  static const VectorRegister hs[] = {a, b, c, d, e, f, g, h};
+  static const int total_hs = sizeof(hs)/sizeof(VectorRegister);
+  // counter for cycling through hs vector to avoid register moves between iterations
+  int h_cnt = 0;
+
+  // Load a-h registers from the memory pointed by state
+  sha512_load_h_vec(state, hs, total_hs);
+
+  Register k = R9;
+  assert(((uintptr_t)sha512_round_consts & 0xF) == 0, "k alignment");
+  load_const_optimized(k, (address)sha512_round_consts, R0);
+
+  if (multi_block) {
+    align(OptoLoopAlignment);
+  }
+  bind(sha_loop);
+
+  for (int n = 0; n < total_hs; n += 2) {
+#if defined(VM_LITTLE_ENDIAN)
+    VectorRegister h_cur = hs[n];
+    VectorRegister h_next = hs[n + 1];
+#else
+    VectorRegister h_cur = hs[n + 1];
+    VectorRegister h_next = hs[n];
+#endif
+    vsldoi (h_next, h_cur, h_cur, 8);
+  }
+
+  // Load 16 elements from w out of the loop.
+  // Order of the long values is Endianess specific.
+  VectorRegister w0 = VR10;
+  VectorRegister w1 = VR11;
+  VectorRegister w2 = VR12;
+  VectorRegister w3 = VR13;
+  VectorRegister w4 = VR14;
+  VectorRegister w5 = VR15;
+  VectorRegister w6 = VR16;
+  VectorRegister w7 = VR17;
+  static const VectorRegister ws[] = {w0, w1, w2, w3, w4, w5, w6, w7};
+  static const int total_ws = sizeof(ws)/sizeof(VectorRegister);
+
+  // Load 16 w into vectors and setup vsl for vperm
+  sha512_load_w_vec(buf_in, ws, total_ws);
+
+#if defined(VM_LITTLE_ENDIAN)
+  VectorRegister vsp16 = VR18;
+  VectorRegister vsp32 = VR19;
+  VectorRegister shiftarg = VR9;
+
+  vspltisw(vsp16,    8);
+  vspltisw(shiftarg, 1);
+  vsl     (vsp16,    vsp16, shiftarg);
+  vsl     (vsp32,    vsp16, shiftarg);
+
+  VectorRegister vsp8 = VR9;
+  vspltish(vsp8,     8);
+
+  // Convert input from Big Endian to Little Endian
+  for (int c = 0; c < total_ws; c++) {
+    VectorRegister w = ws[c];
+    vrlh  (w, w, vsp8);
+  }
+  for (int c = 0; c < total_ws; c++) {
+    VectorRegister w = ws[c];
+    vrlw  (w, w, vsp16);
+  }
+  for (int c = 0; c < total_ws; c++) {
+    VectorRegister w = ws[c];
+    vrld  (w, w, vsp32);
+  }
+#endif
+
+  Register Rb        = R10;
+  VectorRegister vRb = VR8;
+  li      (Rb, 8);
+  load_perm(vRb, Rb);
+
+  VectorRegister kplusw0 = VR18;
+  VectorRegister kplusw1 = VR19;
+
+  Register addr      = R7;
+
+  for (int n = 0; n < total_ws; n++) {
+    VectorRegister w = ws[n];
+
+    if (n == 0) {
+      lvx  (kplusw0, k);
+    } else {
+      addi (addr, k, n * 16);
+      lvx  (kplusw0, addr);
+    }
+#if defined(VM_LITTLE_ENDIAN)
+    vaddudm(kplusw0, kplusw0, w);
+    vsldoi (kplusw1, kplusw0, kplusw0, 8);
+#else
+    vaddudm(kplusw1, kplusw0, w);
+    vsldoi (kplusw0, kplusw1, kplusw1, 8);
+#endif
+
+    sha512_round(hs, total_hs, h_cnt, kplusw0);
+    sha512_round(hs, total_hs, h_cnt, kplusw1);
+  }
+
+  Register tmp       = R8;
+  li    (tmp, (w_size-16)/total_hs);
+  mtctr (tmp);
+  // j will be aligned to 4 for loading words.
+  // Whenever read, advance the pointer (e.g: when j is used in a function)
+  Register j = tmp;
+  li     (j, 8*16);
+
+  align(OptoLoopAlignment);
+  bind(core_loop);
+
+  // due to VectorRegister rotate, always iterate in multiples of total_hs
+  for (int n = 0; n < total_hs/2; n++) {
+    sha512_calc_2w(w0, w1, w2, w3, w4, w5, w6, w7, kplusw0, kplusw1, j, vRb, k);
+    sha512_round(hs, total_hs, h_cnt, kplusw0);
+    sha512_round(hs, total_hs, h_cnt, kplusw1);
+  }
+
+  bdnz   (core_loop);
+
+  sha512_update_sha_state(state, hs, total_hs);
+
+  if (multi_block) {
+    addi(buf_in, buf_in, buf_size);
+    addi(ofs, ofs, buf_size);
+    cmplw(CCR0, ofs, limit);
+    ble(CCR0, sha_loop);
+
+    // return ofs
+    mr(R3_RET, ofs);
+  }
+
+  // Restore non-volatile registers
+  for (int c = 0; c < nv_size; c++) {
+    Register idx = R7;
+    li  (idx, (c - (nv_size)) * 16);
+    lvx(nv[c], idx, R1);
+  }
+}

--- a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -221,13 +221,13 @@
     // A calculation relative to the global TOC.
     if (MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr, cb->content_begin()) !=
         (address)data) {
-      const int invalidated_range =
-        MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
+      const address inst2_addr = addr;
+      const address inst1_addr =
+        MacroAssembler::patch_calculate_address_from_global_toc_at(inst2_addr, cb->content_begin(),
                                                                    (address)data);
-      const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
-      // FIXME:
-      const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
-      ICache::ppc64_flush_icache_bytes(start, range);
+      assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
+      const int range = inst2_addr - inst1_addr + BytesPerInstWord;
+      ICache::ppc64_flush_icache_bytes(inst1_addr, range);
     }
     next_address = addr + 1 * BytesPerInstWord;
   } else if (MacroAssembler::is_load_const_at(addr)) {
@@ -288,15 +288,15 @@
 }
 
 void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) {
-  address   addr = addr_at(0);
+  address   inst2_addr = addr_at(0);
   CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address());
-  if (MacroAssembler::get_narrow_oop(addr, cb->content_begin()) == (long)data) return;
-  const int invalidated_range =
-    MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data);
-  const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
-  // FIXME:
-  const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
-  ICache::ppc64_flush_icache_bytes(start, range);
+  if (MacroAssembler::get_narrow_oop(inst2_addr, cb->content_begin()) == (long)data)
+    return;
+  const address inst1_addr =
+    MacroAssembler::patch_set_narrow_oop(inst2_addr, cb->content_begin(), (long)data);
+  assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
+  const int range = inst2_addr - inst1_addr + BytesPerInstWord;
+  ICache::ppc64_flush_icache_bytes(inst1_addr, range);
 }
 
 // Do not use an assertion here. Let clients decide whether they only

--- a/src/hotspot/cpu/ppc/ppc.ad	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/ppc.ad	Tue Oct 10 16:29:04 2017 +0200
@@ -254,6 +254,73 @@
   reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
   reg_def SR_PPR(    SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg());     // v
 
+// ----------------------------
+// Vector-Scalar Registers
+// ----------------------------
+  reg_def VSR0 ( SOC, SOC, Op_VecX, 0, NULL);
+  reg_def VSR1 ( SOC, SOC, Op_VecX, 1, NULL);
+  reg_def VSR2 ( SOC, SOC, Op_VecX, 2, NULL);
+  reg_def VSR3 ( SOC, SOC, Op_VecX, 3, NULL);
+  reg_def VSR4 ( SOC, SOC, Op_VecX, 4, NULL);
+  reg_def VSR5 ( SOC, SOC, Op_VecX, 5, NULL);
+  reg_def VSR6 ( SOC, SOC, Op_VecX, 6, NULL);
+  reg_def VSR7 ( SOC, SOC, Op_VecX, 7, NULL);
+  reg_def VSR8 ( SOC, SOC, Op_VecX, 8, NULL);
+  reg_def VSR9 ( SOC, SOC, Op_VecX, 9, NULL);
+  reg_def VSR10 ( SOC, SOC, Op_VecX, 10, NULL);
+  reg_def VSR11 ( SOC, SOC, Op_VecX, 11, NULL);
+  reg_def VSR12 ( SOC, SOC, Op_VecX, 12, NULL);
+  reg_def VSR13 ( SOC, SOC, Op_VecX, 13, NULL);
+  reg_def VSR14 ( SOC, SOC, Op_VecX, 14, NULL);
+  reg_def VSR15 ( SOC, SOC, Op_VecX, 15, NULL);
+  reg_def VSR16 ( SOC, SOC, Op_VecX, 16, NULL);
+  reg_def VSR17 ( SOC, SOC, Op_VecX, 17, NULL);
+  reg_def VSR18 ( SOC, SOC, Op_VecX, 18, NULL);
+  reg_def VSR19 ( SOC, SOC, Op_VecX, 19, NULL);
+  reg_def VSR20 ( SOC, SOC, Op_VecX, 20, NULL);
+  reg_def VSR21 ( SOC, SOC, Op_VecX, 21, NULL);
+  reg_def VSR22 ( SOC, SOC, Op_VecX, 22, NULL);
+  reg_def VSR23 ( SOC, SOC, Op_VecX, 23, NULL);
+  reg_def VSR24 ( SOC, SOC, Op_VecX, 24, NULL);
+  reg_def VSR25 ( SOC, SOC, Op_VecX, 25, NULL);
+  reg_def VSR26 ( SOC, SOC, Op_VecX, 26, NULL);
+  reg_def VSR27 ( SOC, SOC, Op_VecX, 27, NULL);
+  reg_def VSR28 ( SOC, SOC, Op_VecX, 28, NULL);
+  reg_def VSR29 ( SOC, SOC, Op_VecX, 29, NULL);
+  reg_def VSR30 ( SOC, SOC, Op_VecX, 30, NULL);
+  reg_def VSR31 ( SOC, SOC, Op_VecX, 31, NULL);
+  reg_def VSR32 ( SOC, SOC, Op_VecX, 32, NULL);
+  reg_def VSR33 ( SOC, SOC, Op_VecX, 33, NULL);
+  reg_def VSR34 ( SOC, SOC, Op_VecX, 34, NULL);
+  reg_def VSR35 ( SOC, SOC, Op_VecX, 35, NULL);
+  reg_def VSR36 ( SOC, SOC, Op_VecX, 36, NULL);
+  reg_def VSR37 ( SOC, SOC, Op_VecX, 37, NULL);
+  reg_def VSR38 ( SOC, SOC, Op_VecX, 38, NULL);
+  reg_def VSR39 ( SOC, SOC, Op_VecX, 39, NULL);
+  reg_def VSR40 ( SOC, SOC, Op_VecX, 40, NULL);
+  reg_def VSR41 ( SOC, SOC, Op_VecX, 41, NULL);
+  reg_def VSR42 ( SOC, SOC, Op_VecX, 42, NULL);
+  reg_def VSR43 ( SOC, SOC, Op_VecX, 43, NULL);
+  reg_def VSR44 ( SOC, SOC, Op_VecX, 44, NULL);
+  reg_def VSR45 ( SOC, SOC, Op_VecX, 45, NULL);
+  reg_def VSR46 ( SOC, SOC, Op_VecX, 46, NULL);
+  reg_def VSR47 ( SOC, SOC, Op_VecX, 47, NULL);
+  reg_def VSR48 ( SOC, SOC, Op_VecX, 48, NULL);
+  reg_def VSR49 ( SOC, SOC, Op_VecX, 49, NULL);
+  reg_def VSR50 ( SOC, SOC, Op_VecX, 50, NULL);
+  reg_def VSR51 ( SOC, SOC, Op_VecX, 51, NULL);
+  reg_def VSR52 ( SOC, SOC, Op_VecX, 52, NULL);
+  reg_def VSR53 ( SOC, SOC, Op_VecX, 53, NULL);
+  reg_def VSR54 ( SOC, SOC, Op_VecX, 54, NULL);
+  reg_def VSR55 ( SOC, SOC, Op_VecX, 55, NULL);
+  reg_def VSR56 ( SOC, SOC, Op_VecX, 56, NULL);
+  reg_def VSR57 ( SOC, SOC, Op_VecX, 57, NULL);
+  reg_def VSR58 ( SOC, SOC, Op_VecX, 58, NULL);
+  reg_def VSR59 ( SOC, SOC, Op_VecX, 59, NULL);
+  reg_def VSR60 ( SOC, SOC, Op_VecX, 60, NULL);
+  reg_def VSR61 ( SOC, SOC, Op_VecX, 61, NULL);
+  reg_def VSR62 ( SOC, SOC, Op_VecX, 62, NULL);
+  reg_def VSR63 ( SOC, SOC, Op_VecX, 63, NULL);
 
 // ----------------------------
 // Specify priority of register selection within phases of register
@@ -385,6 +452,73 @@
 );
 
 alloc_class chunk3 (
+  VSR0,
+  VSR1,
+  VSR2,
+  VSR3,
+  VSR4,
+  VSR5,
+  VSR6,
+  VSR7,
+  VSR8,
+  VSR9,
+  VSR10,
+  VSR11,
+  VSR12,
+  VSR13,
+  VSR14,
+  VSR15,
+  VSR16,
+  VSR17,
+  VSR18,
+  VSR19,
+  VSR20,
+  VSR21,
+  VSR22,
+  VSR23,
+  VSR24,
+  VSR25,
+  VSR26,
+  VSR27,
+  VSR28,
+  VSR29,
+  VSR30,
+  VSR31,
+  VSR32,
+  VSR33,
+  VSR34,
+  VSR35,
+  VSR36,
+  VSR37,
+  VSR38,
+  VSR39,
+  VSR40,
+  VSR41,
+  VSR42,
+  VSR43,
+  VSR44,
+  VSR45,
+  VSR46,
+  VSR47,
+  VSR48,
+  VSR49,
+  VSR50,
+  VSR51,
+  VSR52,
+  VSR53,
+  VSR54,
+  VSR55,
+  VSR56,
+  VSR57,
+  VSR58,
+  VSR59,
+  VSR60,
+  VSR61,
+  VSR62,
+  VSR63
+);
+
+alloc_class chunk4 (
   // special registers
   // These registers are not allocated, but used for nodes generated by postalloc expand.
   SR_XER,
@@ -769,6 +903,45 @@
   F31, F31_H     // nv!
 );
 
+// ----------------------------
+// Vector-Scalar Register Class
+// ----------------------------
+
+reg_class vs_reg(
+  VSR32,
+  VSR33,
+  VSR34,
+  VSR35,
+  VSR36,
+  VSR37,
+  VSR38,
+  VSR39,
+  VSR40,
+  VSR41,
+  VSR42,
+  VSR43,
+  VSR44,
+  VSR45,
+  VSR46,
+  VSR47,
+  VSR48,
+  VSR49,
+  VSR50,
+  VSR51
+//  VSR52,     // nv!
+//  VSR53,     // nv!
+//  VSR54,     // nv!
+//  VSR55,     // nv!
+//  VSR56,     // nv!
+//  VSR57,     // nv!
+//  VSR58,     // nv!
+//  VSR59,     // nv!
+//  VSR60,     // nv!
+//  VSR61,     // nv!
+//  VSR62,     // nv!
+//  VSR63      // nv!
+);
+
  %}
 
 //----------DEFINITION BLOCK---------------------------------------------------
@@ -1502,7 +1675,7 @@
   if (reg < 64+64) return rc_float;
 
   // Between float regs & stack are the flags regs.
-  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
+  assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags");
 
   return rc_stack;
 }
@@ -2048,14 +2221,24 @@
 
 // Vector width in bytes.
 const int Matcher::vector_width_in_bytes(BasicType bt) {
-  assert(MaxVectorSize == 8, "");
-  return 8;
+  if (SuperwordUseVSX) {
+    assert(MaxVectorSize == 16, "");
+    return 16;
+  } else {
+    assert(MaxVectorSize == 8, "");
+    return 8;
+  }
 }
 
 // Vector ideal reg.
 const uint Matcher::vector_ideal_reg(int size) {
-  assert(MaxVectorSize == 8 && size == 8, "");
-  return Op_RegL;
+  if (SuperwordUseVSX) {
+    assert(MaxVectorSize == 16 && size == 16, "");
+    return Op_VecX;
+  } else {
+    assert(MaxVectorSize == 8 && size == 8, "");
+    return Op_RegL;
+  }
 }
 
 const uint Matcher::vector_shift_count_ideal_reg(int size) {
@@ -2075,7 +2258,7 @@
 
 // PPC doesn't support misaligned vectors store/load.
 const bool Matcher::misaligned_vectors_ok() {
-  return false;
+  return !AlignVector; // can be changed by flag
 }
 
 // PPC AES support not yet implemented
@@ -2217,10 +2400,31 @@
   F13_num
 };
 
+const MachRegisterNumbers vsarg_reg[64] = {
+  VSR0_num, VSR1_num, VSR2_num, VSR3_num,
+  VSR4_num, VSR5_num, VSR6_num, VSR7_num,
+  VSR8_num, VSR9_num, VSR10_num, VSR11_num,
+  VSR12_num, VSR13_num, VSR14_num, VSR15_num,
+  VSR16_num, VSR17_num, VSR18_num, VSR19_num,
+  VSR20_num, VSR21_num, VSR22_num, VSR23_num,
+  VSR24_num, VSR23_num, VSR24_num, VSR25_num,
+  VSR28_num, VSR29_num, VSR30_num, VSR31_num,
+  VSR32_num, VSR33_num, VSR34_num, VSR35_num,
+  VSR36_num, VSR37_num, VSR38_num, VSR39_num,
+  VSR40_num, VSR41_num, VSR42_num, VSR43_num,
+  VSR44_num, VSR45_num, VSR46_num, VSR47_num,
+  VSR48_num, VSR49_num, VSR50_num, VSR51_num,
+  VSR52_num, VSR53_num, VSR54_num, VSR55_num,
+  VSR56_num, VSR57_num, VSR58_num, VSR59_num,
+  VSR60_num, VSR61_num, VSR62_num, VSR63_num
+};
+
 const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
 
 const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
 
+const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
+
 // Return whether or not this register is ever used as an argument. This
 // function is used on startup to build the trampoline stubs in generateOptoStub.
 // Registers not mentioned will be killed by the VM call in the trampoline, and
@@ -2552,6 +2756,115 @@
   return nodes;
 }
 
+typedef struct {
+  loadConL_hiNode *_large_hi;
+  loadConL_loNode *_large_lo;
+  mtvsrdNode      *_moved;
+  xxspltdNode     *_replicated;
+  loadConLNode    *_small;
+  MachNode        *_last;
+} loadConLReplicatedNodesTuple;
+
+loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc, 
+                                                 vecXOper *dst, immI_0Oper *zero,
+                                                 OptoReg::Name reg_second, OptoReg::Name reg_first,
+                                                 OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
+  loadConLReplicatedNodesTuple nodes;
+
+  const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
+  if (large_constant_pool) {
+    // Create new nodes.
+    loadConL_hiNode *m1 = new  loadConL_hiNode();
+    loadConL_loNode *m2 = new  loadConL_loNode();
+    mtvsrdNode *m3 = new  mtvsrdNode();
+    xxspltdNode *m4 = new  xxspltdNode();
+
+    // inputs for new nodes
+    m1->add_req(NULL, toc);
+    m2->add_req(NULL, m1);
+    m3->add_req(NULL, m2);
+    m4->add_req(NULL, m3);
+
+    // operands for new nodes
+    m1->_opnds[0] = new  iRegLdstOper(); // dst
+    m1->_opnds[1] = immSrc;              // src
+    m1->_opnds[2] = new  iRegPdstOper(); // toc
+
+    m2->_opnds[0] = new  iRegLdstOper(); // dst
+    m2->_opnds[1] = immSrc;              // src
+    m2->_opnds[2] = new  iRegLdstOper(); // base
+
+    m3->_opnds[0] = new  vecXOper();     // dst
+    m3->_opnds[1] = new  iRegLdstOper(); // src
+
+    m4->_opnds[0] = new  vecXOper();     // dst
+    m4->_opnds[1] = new  vecXOper();     // src
+    m4->_opnds[2] = zero;
+
+    // Initialize ins_attrib TOC fields.
+    m1->_const_toc_offset = -1;
+    m2->_const_toc_offset_hi_node = m1;
+
+    // Initialize ins_attrib instruction offset.
+    m1->_cbuf_insts_offset = -1;
+
+    // register allocation for new nodes
+    ra_->set_pair(m1->_idx, reg_second, reg_first);
+    ra_->set_pair(m2->_idx, reg_second, reg_first);
+    ra_->set1(m3->_idx, reg_second);
+    ra_->set2(m3->_idx, reg_vec_first);
+    ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
+
+    // Create result.
+    nodes._large_hi = m1;
+    nodes._large_lo = m2;
+    nodes._moved = m3;
+    nodes._replicated = m4;
+    nodes._small = NULL;
+    nodes._last = nodes._replicated;
+    assert(m2->bottom_type()->isa_long(), "must be long");
+  } else {
+    loadConLNode *m2 = new  loadConLNode();
+    mtvsrdNode *m3 = new  mtvsrdNode();
+    xxspltdNode *m4 = new  xxspltdNode();
+
+    // inputs for new nodes
+    m2->add_req(NULL, toc);
+
+    // operands for new nodes
+    m2->_opnds[0] = new  iRegLdstOper(); // dst
+    m2->_opnds[1] = immSrc;              // src
+    m2->_opnds[2] = new  iRegPdstOper(); // toc
+
+    m3->_opnds[0] = new  vecXOper();     // dst
+    m3->_opnds[1] = new  iRegLdstOper(); // src
+
+    m4->_opnds[0] = new  vecXOper();     // dst
+    m4->_opnds[1] = new  vecXOper();     // src
+    m4->_opnds[2] = zero;
+
+    // Initialize ins_attrib instruction offset.
+    m2->_cbuf_insts_offset = -1;
+    ra_->set1(m3->_idx, reg_second);
+    ra_->set2(m3->_idx, reg_vec_first);
+    ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
+
+    // register allocation for new nodes
+    ra_->set_pair(m2->_idx, reg_second, reg_first);
+
+    // Create result.
+    nodes._large_hi = NULL;
+    nodes._large_lo = NULL;
+    nodes._small = m2;
+    nodes._moved = m3;
+    nodes._replicated = m4;
+    nodes._last = nodes._replicated;
+    assert(m2->bottom_type()->isa_long(), "must be long");
+  }
+
+  return nodes;
+}
+
 %} // source
 
 encode %{
@@ -3212,6 +3525,27 @@
     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
   %}
 
+  enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc) %{
+    // Create new nodes.
+
+    // Make an operand with the bit pattern to load as float.
+    immLOper *op_repl = new  immLOper((jlong)replicate_immF(op_src->constantF()));
+    immI_0Oper *op_zero = new  immI_0Oper(0);
+
+    loadConLReplicatedNodesTuple loadConLNodes =
+      loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
+                                OptoReg::Name(R20_H_num), OptoReg::Name(R20_num),
+                                OptoReg::Name(VSR11_num), OptoReg::Name(VSR10_num));
+
+    // Push new nodes.
+    if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
+    if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
+    if (loadConLNodes._moved)    { nodes->push(loadConLNodes._moved); }
+    if (loadConLNodes._last)     { nodes->push(loadConLNodes._last); }
+
+    assert(nodes->length() >= 1, "must have created at least 1 node");
+  %}
+
   // This enc_class is needed so that scheduler gets proper
   // input mapping for latency computation.
   enc_class enc_poll(immI dst, iRegLdst poll) %{
@@ -3840,6 +4174,14 @@
 //
 // Formats are generated automatically for constants and base registers.
 
+operand vecX() %{
+  constraint(ALLOC_IN_RC(vs_reg));
+  match(VecX);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 //----------Simple Operands----------------------------------------------------
 // Immediate Operands
 
@@ -5372,6 +5714,20 @@
   ins_pipe(pipe_class_memory);
 %}
 
+// Load Aligned Packed Byte
+instruct loadV16(vecX dst, indirect mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 16);
+  match(Set dst (LoadVector mem));
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "LXVD2X      $dst, $mem \t// load 16-byte Vector" %}
+  size(4);
+  ins_encode %{
+    __ lxvd2x($dst$$VectorSRegister, $mem$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // Load Range, range = array length (=jint)
 instruct loadRange(iRegIdst dst, memory mem) %{
   match(Set dst (LoadRange mem));
@@ -6368,6 +6724,20 @@
   ins_pipe(pipe_class_memory);
 %}
 
+// Store Packed Byte long register to memory
+instruct storeV16(indirect mem, vecX src) %{
+  predicate(n->as_StoreVector()->memory_size() == 16);
+  match(Set mem (StoreVector mem src));
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "STXVD2X     $mem, $src \t// store 16-byte Vector" %}
+  size(4);
+  ins_encode %{
+    __ stxvd2x($src$$VectorSRegister, $mem$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // Store Compressed Oop
 instruct storeN(memory dst, iRegN_P2N src) %{
   match(Set dst (StoreN dst src));
@@ -13239,6 +13609,26 @@
   ins_pipe(pipe_class_default);
 %}
 
+instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
+  effect(DEF temp1, USE src);
+  
+  size(4);
+  ins_encode %{
+    __ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
+  effect(DEF dst, USE src, USE imm1);
+
+  size(4);
+  ins_encode %{
+    __ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant); 
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 //---------- Replicate Vector Instructions ------------------------------------
 
 // Insrdi does replicate if src == dst.
@@ -13318,6 +13708,46 @@
   ins_pipe(pipe_class_default);
 %}
 
+instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
+  match(Set dst (ReplicateB src));
+  predicate(n->as_Vector()->length() == 16);
+
+  expand %{
+    iRegLdst tmpL;
+    vecX tmpV;
+    immI8  imm1 %{ (int)  1 %}
+    moveReg(tmpL, src);
+    repl56(tmpL);
+    repl48(tmpL);
+    mtvsrwz(tmpV, tmpL);
+    xxspltw(dst, tmpV, imm1);
+  %}
+%}
+
+instruct repl16B_immI0(vecX dst, immI_0 zero) %{
+  match(Set dst (ReplicateB zero));
+  predicate(n->as_Vector()->length() == 16);
+
+  format %{ "XXLXOR      $dst, $zero \t// replicate16B" %}
+  size(4);
+  ins_encode %{
+    __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
+  match(Set dst (ReplicateB src));
+  predicate(n->as_Vector()->length() == 16);
+
+  format %{ "XXLEQV      $dst, $src \t// replicate16B" %}
+  size(4);
+  ins_encode %{
+    __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
   match(Set dst (ReplicateS src));
   predicate(n->as_Vector()->length() == 4);
@@ -13352,6 +13782,46 @@
   ins_pipe(pipe_class_default);
 %}
 
+instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
+  match(Set dst (ReplicateS src));
+  predicate(n->as_Vector()->length() == 8);
+
+  expand %{
+    iRegLdst tmpL;
+    vecX tmpV;
+    immI8  zero %{ (int)  0 %} 
+    moveReg(tmpL, src);
+    repl48(tmpL);
+    repl32(tmpL);
+    mtvsrd(tmpV, tmpL);
+    xxpermdi(dst, tmpV, tmpV, zero);
+  %}
+%}
+
+instruct repl8S_immI0(vecX dst, immI_0 zero) %{
+  match(Set dst (ReplicateS zero));
+  predicate(n->as_Vector()->length() == 8);
+
+  format %{ "XXLXOR      $dst, $zero \t// replicate8S" %}
+  size(4);
+  ins_encode %{
+    __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
+  match(Set dst (ReplicateS src));
+  predicate(n->as_Vector()->length() == 8);
+
+  format %{ "XXLEQV      $dst, $src \t// replicate16B" %}
+  size(4);
+  ins_encode %{
+    __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
   match(Set dst (ReplicateI src));
   predicate(n->as_Vector()->length() == 2);
@@ -13386,6 +13856,46 @@
   ins_pipe(pipe_class_default);
 %}
 
+instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
+  match(Set dst (ReplicateI src));
+  predicate(n->as_Vector()->length() == 4);
+  ins_cost(2 * DEFAULT_COST);
+
+  expand %{ 
+    iRegLdst tmpL;
+    vecX tmpV;
+    immI8  zero %{ (int)  0 %} 
+    moveReg(tmpL, src);
+    repl32(tmpL);
+    mtvsrd(tmpV, tmpL);
+    xxpermdi(dst, tmpV, tmpV, zero);
+  %}
+%}
+
+instruct repl4I_immI0(vecX dst, immI_0 zero) %{
+  match(Set dst (ReplicateI zero));
+  predicate(n->as_Vector()->length() == 4);
+
+  format %{ "XXLXOR      $dst, $zero \t// replicate4I" %}
+  size(4);
+  ins_encode %{
+    __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
+  match(Set dst (ReplicateI src));
+  predicate(n->as_Vector()->length() == 4);
+
+  format %{ "XXLEQV      $dst, $dst, $dst \t// replicate4I" %}
+  size(4);
+  ins_encode %{
+    __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // Move float to int register via stack, replicate.
 instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
   match(Set dst (ReplicateF src));
@@ -13484,6 +13994,154 @@
 %}
 
 
+instruct repl4F_reg_Ex(vecX dst, regF src) %{
+  match(Set dst (ReplicateF src));
+  predicate(n->as_Vector()->length() == 4);
+  ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
+  expand %{
+    stackSlotL tmpS;
+    iRegIdst tmpI;
+    iRegLdst tmpL;
+    vecX tmpV;
+    immI8  zero %{ (int)  0 %} 
+
+    moveF2I_reg_stack(tmpS, src);   // Move float to stack.
+    moveF2I_stack_reg(tmpI, tmpS);  // Move stack to int reg.
+    moveReg(tmpL, tmpI);             // Move int to long reg.
+    repl32(tmpL);                    // Replicate bitpattern.
+    mtvsrd(tmpV, tmpL);
+    xxpermdi(dst, tmpV, tmpV, zero);
+  %}
+%}
+
+instruct repl4F_immF_Ex(vecX dst, immF src) %{
+  match(Set dst (ReplicateF src));
+  predicate(n->as_Vector()->length() == 4);
+  ins_cost(10 * DEFAULT_COST);
+
+  postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase) );
+%}
+
+instruct repl4F_immF0(vecX dst, immF_0 zero) %{
+  match(Set dst (ReplicateF zero));
+  predicate(n->as_Vector()->length() == 4);
+
+  format %{ "XXLXOR      $dst, $zero \t// replicate4F" %}
+  ins_encode %{
+    __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct repl2D_reg_Ex(vecX dst, regD src) %{
+  match(Set dst (ReplicateD src));
+  predicate(n->as_Vector()->length() == 2);
+  expand %{
+    stackSlotL tmpS;
+    iRegLdst tmpL;
+    iRegLdst tmp;
+    vecX tmpV;
+    immI8  zero %{ (int)  0 %} 
+    moveD2L_reg_stack(tmpS, src);
+    moveD2L_stack_reg(tmpL, tmpS);
+    mtvsrd(tmpV, tmpL);
+    xxpermdi(dst, tmpV, tmpV, zero);
+  %}
+%}
+
+instruct repl2D_immI0(vecX dst, immI_0 zero) %{
+  match(Set dst (ReplicateD zero));
+  predicate(n->as_Vector()->length() == 2);
+
+  format %{ "XXLXOR      $dst, $zero \t// replicate2D" %}
+  size(4);
+  ins_encode %{
+    __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct repl2D_immIminus1(vecX dst, immI_minus1 src) %{
+  match(Set dst (ReplicateD src));
+  predicate(n->as_Vector()->length() == 2);
+
+  format %{ "XXLEQV      $dst, $src \t// replicate16B" %}
+  size(4);
+  ins_encode %{
+    __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct mtvsrd(vecX dst, iRegLsrc src) %{
+  predicate(false);
+  effect(DEF dst, USE src);
+
+  format %{ "MTVSRD      $dst, $src \t// Move to 16-byte register"%} 
+  size(4);
+  ins_encode %{
+    __ mtvsrd($dst$$VectorSRegister, $src$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
+  effect(DEF dst, USE src, USE zero);
+
+  format %{ "XXSPLATD      $dst, $src, $zero \t// Permute 16-byte register"%}
+  size(4);
+  ins_encode %{
+    __ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
+  %} 
+  ins_pipe(pipe_class_default);
+%}
+
+instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
+  effect(DEF dst, USE src1, USE src2, USE zero);
+
+  format %{ "XXPERMDI      $dst, $src1, $src2, $zero \t// Permute 16-byte register"%}
+  size(4);
+  ins_encode %{
+    __ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
+  %} 
+  ins_pipe(pipe_class_default);
+%}
+
+instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
+  match(Set dst (ReplicateL src));
+  predicate(n->as_Vector()->length() == 2);
+  expand %{
+    vecX tmpV;
+    immI8  zero %{ (int)  0 %} 
+    mtvsrd(tmpV, src); 
+    xxpermdi(dst, tmpV, tmpV, zero);
+  %}
+%}
+
+instruct repl2L_immI0(vecX dst, immI_0 zero) %{
+  match(Set dst (ReplicateL zero));
+  predicate(n->as_Vector()->length() == 2);
+
+  format %{ "XXLXOR      $dst, $zero \t// replicate2L" %}
+  size(4);
+  ins_encode %{
+    __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
+  match(Set dst (ReplicateL src));
+  predicate(n->as_Vector()->length() == 2);
+
+  format %{ "XXLEQV      $dst, $src \t// replicate16B" %}
+  size(4);
+  ins_encode %{
+    __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // ============================================================================
 // Safepoint Instruction

--- a/src/hotspot/cpu/ppc/register_definitions_ppc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/register_definitions_ppc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -31,3 +31,5 @@
 REGISTER_DEFINITION(Register, noreg);
 
 REGISTER_DEFINITION(FloatRegister, fnoreg);
+
+REGISTER_DEFINITION(VectorSRegister, vsnoreg);

--- a/src/hotspot/cpu/ppc/register_ppc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/register_ppc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -677,7 +677,7 @@
       * 2                                          // register halves
       + ConditionRegisterImpl::number_of_registers // condition code registers
       + SpecialRegisterImpl::number_of_registers   // special registers
-      + VectorRegisterImpl::number_of_registers    // VSX registers
+      + VectorSRegisterImpl::number_of_registers   // VSX registers
   };
 
   static const int max_gpr;

--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -479,8 +479,8 @@
 
 // Is vector's size (in bytes) bigger than a size saved by default?
 bool SharedRuntime::is_wide_vector(int size) {
-  // Note, MaxVectorSize == 8 on PPC64.
-  assert(size <= 8, "%d bytes vectors are not supported", size);
+  // Note, MaxVectorSize == 8/16 on PPC64.
+  assert(size <= (SuperwordUseVSX ? 16 : 8), "%d bytes vectors are not supported", size);
   return size > 8;
 }
 
@@ -2234,9 +2234,6 @@
   __ release();
   // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
   __ stw(R0, thread_(thread_state));
-  if (UseMembar) {
-    __ fence();
-  }
 
 
   // The JNI call
@@ -2393,9 +2390,6 @@
   __ release();
   // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
   __ stw(R0, thread_(thread_state));
-  if (UseMembar) {
-    __ fence();
-  }
   __ bind(after_transition);
 
   // Reguard any pages if necessary.

--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -2667,7 +2667,7 @@
     return start;
   }
 
-  // Arguments for generated stub (little endian only):
+  // Arguments for generated stub:
   //   R3_ARG1   - source byte array address
   //   R4_ARG2   - destination byte array address
   //   R5_ARG3   - round key array
@@ -2686,7 +2686,6 @@
     Register keylen         = R8;
     Register temp           = R9;
     Register keypos         = R10;
-    Register hex            = R11;
     Register fifteen        = R12;
 
     VectorRegister vRet     = VR0;
@@ -2706,164 +2705,170 @@
     VectorRegister vTmp3    = VR11;
     VectorRegister vTmp4    = VR12;
 
-    VectorRegister vLow     = VR13;
-    VectorRegister vHigh    = VR14;
-
-    __ li              (hex, 16);
     __ li              (fifteen, 15);
-    __ vspltisb        (fSplt, 0x0f);
 
     // load unaligned from[0-15] to vsRet
     __ lvx             (vRet, from);
     __ lvx             (vTmp1, fifteen, from);
     __ lvsl            (fromPerm, from);
+#ifdef VM_LITTLE_ENDIAN
+    __ vspltisb        (fSplt, 0x0f);
     __ vxor            (fromPerm, fromPerm, fSplt);
+#endif
     __ vperm           (vRet, vRet, vTmp1, fromPerm);
 
     // load keylen (44 or 52 or 60)
     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
 
     // to load keys
-    __ lvsr            (keyPerm, key);
-    __ vxor            (vTmp2, vTmp2, vTmp2);
+    __ load_perm       (keyPerm, key);
+#ifdef VM_LITTLE_ENDIAN
     __ vspltisb        (vTmp2, -16);
     __ vrld            (keyPerm, keyPerm, vTmp2);
     __ vrld            (keyPerm, keyPerm, vTmp2);
     __ vsldoi          (keyPerm, keyPerm, keyPerm, 8);
-
-    // load the 1st round key to vKey1
-    __ li              (keypos, 0);
+#endif
+
+    // load the 1st round key to vTmp1
+    __ lvx             (vTmp1, key);
+    __ li              (keypos, 16);
     __ lvx             (vKey1, keypos, key);
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey1, vTmp1, vKey1, keyPerm);
+    __ vec_perm        (vTmp1, vKey1, keyPerm);
 
     // 1st round
-    __ vxor (vRet, vRet, vKey1);
+    __ vxor            (vRet, vRet, vTmp1);
 
     // load the 2nd round key to vKey1
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 32);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vKey2, keyPerm);
 
     // load the 3rd round key to vKey2
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
+    __ li              (keypos, 48);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, keyPerm);
 
     // load the 4th round key to vKey3
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 64);
+    __ lvx             (vKey4, keypos, key);
+    __ vec_perm        (vKey3, vKey4, keyPerm);
 
     // load the 5th round key to vKey4
-    __ addi            (keypos, keypos, 16);
+    __ li              (keypos, 80);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey4, vTmp1, keyPerm);
 
     // 2nd - 5th rounds
-    __ vcipher (vRet, vRet, vKey1);
-    __ vcipher (vRet, vRet, vKey2);
-    __ vcipher (vRet, vRet, vKey3);
-    __ vcipher (vRet, vRet, vKey4);
+    __ vcipher         (vRet, vRet, vKey1);
+    __ vcipher         (vRet, vRet, vKey2);
+    __ vcipher         (vRet, vRet, vKey3);
+    __ vcipher         (vRet, vRet, vKey4);
 
     // load the 6th round key to vKey1
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 96);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
 
     // load the 7th round key to vKey2
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
+    __ li              (keypos, 112);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, keyPerm);
 
     // load the 8th round key to vKey3
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 128);
+    __ lvx             (vKey4, keypos, key);
+    __ vec_perm        (vKey3, vKey4, keyPerm);
 
     // load the 9th round key to vKey4
-    __ addi            (keypos, keypos, 16);
+    __ li              (keypos, 144);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey4, vTmp1, keyPerm);
 
     // 6th - 9th rounds
-    __ vcipher (vRet, vRet, vKey1);
-    __ vcipher (vRet, vRet, vKey2);
-    __ vcipher (vRet, vRet, vKey3);
-    __ vcipher (vRet, vRet, vKey4);
+    __ vcipher         (vRet, vRet, vKey1);
+    __ vcipher         (vRet, vRet, vKey2);
+    __ vcipher         (vRet, vRet, vKey3);
+    __ vcipher         (vRet, vRet, vKey4);
 
     // load the 10th round key to vKey1
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 160);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
 
     // load the 11th round key to vKey2
-    __ addi            (keypos, keypos, 16);
+    __ li              (keypos, 176);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey2, vTmp1, keyPerm);
 
     // if all round keys are loaded, skip next 4 rounds
     __ cmpwi           (CCR0, keylen, 44);
     __ beq             (CCR0, L_doLast);
 
     // 10th - 11th rounds
-    __ vcipher (vRet, vRet, vKey1);
-    __ vcipher (vRet, vRet, vKey2);
+    __ vcipher         (vRet, vRet, vKey1);
+    __ vcipher         (vRet, vRet, vKey2);
 
     // load the 12th round key to vKey1
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 192);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
 
     // load the 13th round key to vKey2
-    __ addi            (keypos, keypos, 16);
+    __ li              (keypos, 208);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey2, vTmp1, keyPerm);
 
     // if all round keys are loaded, skip next 2 rounds
     __ cmpwi           (CCR0, keylen, 52);
     __ beq             (CCR0, L_doLast);
 
     // 12th - 13th rounds
-    __ vcipher (vRet, vRet, vKey1);
-    __ vcipher (vRet, vRet, vKey2);
+    __ vcipher         (vRet, vRet, vKey1);
+    __ vcipher         (vRet, vRet, vKey2);
 
     // load the 14th round key to vKey1
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 224);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
 
     // load the 15th round key to vKey2
-    __ addi            (keypos, keypos, 16);
+    __ li              (keypos, 240);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey2, vTmp1, keyPerm);
 
     __ bind(L_doLast);
 
     // last two rounds
-    __ vcipher (vRet, vRet, vKey1);
-    __ vcipherlast (vRet, vRet, vKey2);
-
-    __ neg             (temp, to);
-    __ lvsr            (toPerm, temp);
-    __ vspltisb        (vTmp2, -1);
-    __ vxor            (vTmp1, vTmp1, vTmp1);
-    __ vperm           (vTmp2, vTmp2, vTmp1, toPerm);
-    __ vxor            (toPerm, toPerm, fSplt);
+    __ vcipher         (vRet, vRet, vKey1);
+    __ vcipherlast     (vRet, vRet, vKey2);
+
+    // store result (unaligned)
+#ifdef VM_LITTLE_ENDIAN
+    __ lvsl            (toPerm, to);
+#else
+    __ lvsr            (toPerm, to);
+#endif
+    __ vspltisb        (vTmp3, -1);
+    __ vspltisb        (vTmp4, 0);
     __ lvx             (vTmp1, to);
-    __ vperm           (vRet, vRet, vRet, toPerm);
-    __ vsel            (vTmp1, vTmp1, vRet, vTmp2);
-    __ lvx             (vTmp4, fifteen, to);
+    __ lvx             (vTmp2, fifteen, to);
+#ifdef VM_LITTLE_ENDIAN
+    __ vperm           (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
+    __ vxor            (toPerm, toPerm, fSplt);       // swap bytes
+#else
+    __ vperm           (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
+#endif
+    __ vperm           (vTmp4, vRet, vRet, toPerm);   // rotate data
+    __ vsel            (vTmp2, vTmp4, vTmp2, vTmp3);
+    __ vsel            (vTmp1, vTmp1, vTmp4, vTmp3);
+    __ stvx            (vTmp2, fifteen, to);          // store this one first (may alias)
     __ stvx            (vTmp1, to);
-    __ vsel            (vRet, vRet, vTmp4, vTmp2);
-    __ stvx            (vRet, fifteen, to);
 
     __ blr();
      return start;
   }
 
-  // Arguments for generated stub (little endian only):
+  // Arguments for generated stub:
   //   R3_ARG1   - source byte array address
   //   R4_ARG2   - destination byte array address
   //   R5_ARG3   - K (key) in little endian int array
@@ -2885,7 +2890,6 @@
     Register keylen         = R8;
     Register temp           = R9;
     Register keypos         = R10;
-    Register hex            = R11;
     Register fifteen        = R12;
 
     VectorRegister vRet     = VR0;
@@ -2906,30 +2910,30 @@
     VectorRegister vTmp3    = VR12;
     VectorRegister vTmp4    = VR13;
 
-    VectorRegister vLow     = VR14;
-    VectorRegister vHigh    = VR15;
-
-    __ li              (hex, 16);
     __ li              (fifteen, 15);
-    __ vspltisb        (fSplt, 0x0f);
 
     // load unaligned from[0-15] to vsRet
     __ lvx             (vRet, from);
     __ lvx             (vTmp1, fifteen, from);
     __ lvsl            (fromPerm, from);
+#ifdef VM_LITTLE_ENDIAN
+    __ vspltisb        (fSplt, 0x0f);
     __ vxor            (fromPerm, fromPerm, fSplt);
+#endif
     __ vperm           (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
 
     // load keylen (44 or 52 or 60)
     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
 
     // to load keys
-    __ lvsr            (keyPerm, key);
+    __ load_perm       (keyPerm, key);
+#ifdef VM_LITTLE_ENDIAN
     __ vxor            (vTmp2, vTmp2, vTmp2);
     __ vspltisb        (vTmp2, -16);
     __ vrld            (keyPerm, keyPerm, vTmp2);
     __ vrld            (keyPerm, keyPerm, vTmp2);
     __ vsldoi          (keyPerm, keyPerm, keyPerm, 8);
+#endif
 
     __ cmpwi           (CCR0, keylen, 44);
     __ beq             (CCR0, L_do44);
@@ -2937,32 +2941,32 @@
     __ cmpwi           (CCR0, keylen, 52);
     __ beq             (CCR0, L_do52);
 
-    // load the 15th round key to vKey11
+    // load the 15th round key to vKey1
     __ li              (keypos, 240);
-    __ lvx             (vTmp1, keypos, key);
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
-
-    // load the 14th round key to vKey10
-    __ addi            (keypos, keypos, -16);
+    __ lvx             (vKey1, keypos, key);
+    __ li              (keypos, 224);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vKey2, vKey1, keyPerm);
+
+    // load the 14th round key to vKey2
+    __ li              (keypos, 208);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
+
+    // load the 13th round key to vKey3
+    __ li              (keypos, 192);
+    __ lvx             (vKey4, keypos, key);
+    __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
+
+    // load the 12th round key to vKey4
+    __ li              (keypos, 176);
+    __ lvx             (vKey5, keypos, key);
+    __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
+
+    // load the 11th round key to vKey5
+    __ li              (keypos, 160);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
-
-    // load the 13th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
-
-    // load the 12th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey4, vTmp2, vTmp1, keyPerm);
-
-    // load the 11th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey5, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);
 
     // 1st - 5th rounds
     __ vxor            (vRet, vRet, vKey1);
@@ -2975,22 +2979,22 @@
 
     __ bind            (L_do52);
 
-    // load the 13th round key to vKey11
+    // load the 13th round key to vKey1
     __ li              (keypos, 208);
+    __ lvx             (vKey1, keypos, key);
+    __ li              (keypos, 192);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vKey2, vKey1, keyPerm);
+
+    // load the 12th round key to vKey2
+    __ li              (keypos, 176);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
+
+    // load the 11th round key to vKey3
+    __ li              (keypos, 160);
     __ lvx             (vTmp1, keypos, key);
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
-
-    // load the 12th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
-
-    // load the 11th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey3, vTmp1, vKey3, keyPerm);
 
     // 1st - 3rd rounds
     __ vxor            (vRet, vRet, vKey1);
@@ -3001,42 +3005,42 @@
 
     __ bind            (L_do44);
 
-    // load the 11th round key to vKey11
+    // load the 11th round key to vKey1
     __ li              (keypos, 176);
+    __ lvx             (vKey1, keypos, key);
+    __ li              (keypos, 160);
     __ lvx             (vTmp1, keypos, key);
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey1, vTmp1, vKey1, keyPerm);
 
     // 1st round
     __ vxor            (vRet, vRet, vKey1);
 
     __ bind            (L_doLast);
 
-    // load the 10th round key to vKey10
-    __ addi            (keypos, keypos, -16);
+    // load the 10th round key to vKey1
+    __ li              (keypos, 144);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vKey2, vTmp1, keyPerm);
+
+    // load the 9th round key to vKey2
+    __ li              (keypos, 128);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
+
+    // load the 8th round key to vKey3
+    __ li              (keypos, 112);
+    __ lvx             (vKey4, keypos, key);
+    __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
+
+    // load the 7th round key to vKey4
+    __ li              (keypos, 96);
+    __ lvx             (vKey5, keypos, key);
+    __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
+
+    // load the 6th round key to vKey5
+    __ li              (keypos, 80);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
-
-    // load the 9th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
-
-    // load the 8th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
-
-    // load the 7th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
-
-    // load the 6th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey5, vTmp2, vTmp1, keyPerm);
+    __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);
 
     // last 10th - 6th rounds
     __ vncipher        (vRet, vRet, vKey1);
@@ -3045,30 +3049,29 @@
     __ vncipher        (vRet, vRet, vKey4);
     __ vncipher        (vRet, vRet, vKey5);
 
-    // load the 5th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
-
-    // load the 4th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
-
-    // load the 3rd round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
-
-    // load the 2nd round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey4, vTmp2, vTmp1, keyPerm);
-
-    // load the 1st round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey5, vTmp1, vTmp2, keyPerm);
+    // load the 5th round key to vKey1
+    __ li              (keypos, 64);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vKey2, vTmp1, keyPerm);
+
+    // load the 4th round key to vKey2
+    __ li              (keypos, 48);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
+
+    // load the 3rd round key to vKey3
+    __ li              (keypos, 32);
+    __ lvx             (vKey4, keypos, key);
+    __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
+
+    // load the 2nd round key to vKey4
+    __ li              (keypos, 16);
+    __ lvx             (vKey5, keypos, key);
+    __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
+
+    // load the 1st round key to vKey5
+    __ lvx             (vTmp1, key);
+    __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);
 
     // last 5th - 1th rounds
     __ vncipher        (vRet, vRet, vKey1);
@@ -3077,24 +3080,54 @@
     __ vncipher        (vRet, vRet, vKey4);
     __ vncipherlast    (vRet, vRet, vKey5);
 
-    __ neg             (temp, to);
-    __ lvsr            (toPerm, temp);
-    __ vspltisb        (vTmp2, -1);
-    __ vxor            (vTmp1, vTmp1, vTmp1);
-    __ vperm           (vTmp2, vTmp2, vTmp1, toPerm);
-    __ vxor            (toPerm, toPerm, fSplt);
+    // store result (unaligned)
+#ifdef VM_LITTLE_ENDIAN
+    __ lvsl            (toPerm, to);
+#else
+    __ lvsr            (toPerm, to);
+#endif
+    __ vspltisb        (vTmp3, -1);
+    __ vspltisb        (vTmp4, 0);
     __ lvx             (vTmp1, to);
-    __ vperm           (vRet, vRet, vRet, toPerm);
-    __ vsel            (vTmp1, vTmp1, vRet, vTmp2);
-    __ lvx             (vTmp4, fifteen, to);
+    __ lvx             (vTmp2, fifteen, to);
+#ifdef VM_LITTLE_ENDIAN
+    __ vperm           (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
+    __ vxor            (toPerm, toPerm, fSplt);       // swap bytes
+#else
+    __ vperm           (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
+#endif
+    __ vperm           (vTmp4, vRet, vRet, toPerm);   // rotate data
+    __ vsel            (vTmp2, vTmp4, vTmp2, vTmp3);
+    __ vsel            (vTmp1, vTmp1, vTmp4, vTmp3);
+    __ stvx            (vTmp2, fifteen, to);          // store this one first (may alias)
     __ stvx            (vTmp1, to);
-    __ vsel            (vRet, vRet, vTmp4, vTmp2);
-    __ stvx            (vRet, fifteen, to);
 
     __ blr();
      return start;
   }
 
+  address generate_sha256_implCompress(bool multi_block, const char *name) {
+    assert(UseSHA, "need SHA instructions");
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    __ sha256 (multi_block);
+
+    __ blr();
+    return start;
+  }
+
+  address generate_sha512_implCompress(bool multi_block, const char *name) {
+    assert(UseSHA, "need SHA instructions");
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    __ sha512 (multi_block);
+
+    __ blr();
+    return start;
+  }
+
   void generate_arraycopy_stubs() {
     // Note: the disjoint stubs must be generated first, some of
     // the conjoint stubs use them.
@@ -3306,6 +3339,267 @@
       BLOCK_COMMENT("} Stub body");
   }
 
+  /**
+  *  Arguments:
+  *
+  *  Input:
+  *   R3_ARG1    - out address
+  *   R4_ARG2    - in address
+  *   R5_ARG3    - offset
+  *   R6_ARG4    - len
+  *   R7_ARG5    - k
+  *  Output:
+  *   R3_RET     - carry
+  */
+  address generate_mulAdd() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "mulAdd");
+
+    address start = __ function_entry();
+
+    // C2 does not sign extend signed parameters to full 64 bits registers:
+    __ rldic (R5_ARG3, R5_ARG3, 2, 32);  // always positive
+    __ clrldi(R6_ARG4, R6_ARG4, 32);     // force zero bits on higher word
+    __ clrldi(R7_ARG5, R7_ARG5, 32);     // force zero bits on higher word
+
+    __ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
+
+    // Moves output carry to return register
+    __ mr    (R3_RET,  R10);
+
+    __ blr();
+
+    return start;
+  }
+
+  /**
+  *  Arguments:
+  *
+  *  Input:
+  *   R3_ARG1    - in address
+  *   R4_ARG2    - in length
+  *   R5_ARG3    - out address
+  *   R6_ARG4    - out length
+  */
+  address generate_squareToLen() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "squareToLen");
+
+    address start = __ function_entry();
+
+    // args - higher word is cleaned (unsignedly) due to int to long casting
+    const Register in        = R3_ARG1;
+    const Register in_len    = R4_ARG2;
+    __ clrldi(in_len, in_len, 32);
+    const Register out       = R5_ARG3;
+    const Register out_len   = R6_ARG4;
+    __ clrldi(out_len, out_len, 32);
+
+    // output
+    const Register ret       = R3_RET;
+
+    // temporaries
+    const Register lplw_s    = R7;
+    const Register in_aux    = R8;
+    const Register out_aux   = R9;
+    const Register piece     = R10;
+    const Register product   = R14;
+    const Register lplw      = R15;
+    const Register i_minus1  = R16;
+    const Register carry     = R17;
+    const Register offset    = R18;
+    const Register off_aux   = R19;
+    const Register t         = R20;
+    const Register mlen      = R21;
+    const Register len       = R22;
+    const Register a         = R23;
+    const Register b         = R24;
+    const Register i         = R25;
+    const Register c         = R26;
+    const Register cs        = R27;
+
+    // Labels
+    Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
+    Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
+
+    // Save non-volatile regs (frameless).
+    int current_offs = -8;
+    __ std(R28, current_offs, R1_SP); current_offs -= 8;
+    __ std(R27, current_offs, R1_SP); current_offs -= 8;
+    __ std(R26, current_offs, R1_SP); current_offs -= 8;
+    __ std(R25, current_offs, R1_SP); current_offs -= 8;
+    __ std(R24, current_offs, R1_SP); current_offs -= 8;
+    __ std(R23, current_offs, R1_SP); current_offs -= 8;
+    __ std(R22, current_offs, R1_SP); current_offs -= 8;
+    __ std(R21, current_offs, R1_SP); current_offs -= 8;
+    __ std(R20, current_offs, R1_SP); current_offs -= 8;
+    __ std(R19, current_offs, R1_SP); current_offs -= 8;
+    __ std(R18, current_offs, R1_SP); current_offs -= 8;
+    __ std(R17, current_offs, R1_SP); current_offs -= 8;
+    __ std(R16, current_offs, R1_SP); current_offs -= 8;
+    __ std(R15, current_offs, R1_SP); current_offs -= 8;
+    __ std(R14, current_offs, R1_SP);
+
+    // Store the squares, right shifted one bit (i.e., divided by 2)
+    __ subi   (out_aux,   out,       8);
+    __ subi   (in_aux,    in,        4);
+    __ cmpwi  (CCR0,      in_len,    0);
+    // Initialize lplw outside of the loop
+    __ xorr   (lplw,      lplw,      lplw);
+    __ ble    (CCR0,      SKIP_LOOP_SQUARE);    // in_len <= 0
+    __ mtctr  (in_len);
+
+    __ bind(LOOP_SQUARE);
+    __ lwzu   (piece,     4,         in_aux);
+    __ mulld  (product,   piece,     piece);
+    // shift left 63 bits and only keep the MSB
+    __ rldic  (lplw_s,    lplw,      63, 0);
+    __ mr     (lplw,      product);
+    // shift right 1 bit without sign extension
+    __ srdi   (product,   product,   1);
+    // join them to the same register and store it
+    __ orr    (product,   lplw_s,    product);
+#ifdef VM_LITTLE_ENDIAN
+    // Swap low and high words for little endian
+    __ rldicl (product,   product,   32, 0);
+#endif
+    __ stdu   (product,   8,         out_aux);
+    __ bdnz   (LOOP_SQUARE);
+
+    __ bind(SKIP_LOOP_SQUARE);
+
+    // Add in off-diagonal sums
+    __ cmpwi  (CCR0,      in_len,    0);
+    __ ble    (CCR0,      SKIP_DIAGONAL_SUM);
+    // Avoid CTR usage here in order to use it at mulAdd
+    __ subi   (i_minus1,  in_len,    1);
+    __ li     (offset,    4);
+
+    __ bind(LOOP_DIAGONAL_SUM);
+
+    __ sldi   (off_aux,   out_len,   2);
+    __ sub    (off_aux,   off_aux,   offset);
+
+    __ mr     (len,       i_minus1);
+    __ sldi   (mlen,      i_minus1,  2);
+    __ lwzx   (t,         in,        mlen);
+
+    __ muladd (out, in, off_aux, len, t, a, b, carry);
+
+    // begin<addOne>
+    // off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
+    __ addi   (mlen,      mlen,      4);
+    __ sldi   (a,         out_len,   2);
+    __ subi   (a,         a,         4);
+    __ sub    (a,         a,         mlen);
+    __ subi   (off_aux,   offset,    4);
+    __ sub    (off_aux,   a,         off_aux);
+
+    __ lwzx   (b,         off_aux,   out);
+    __ add    (b,         b,         carry);
+    __ stwx   (b,         off_aux,   out);
+
+    // if (((uint64_t)s >> 32) != 0) {
+    __ srdi_  (a,         b,         32);
+    __ beq    (CCR0,      SKIP_ADDONE);
+
+    // while (--mlen >= 0) {
+    __ bind(LOOP_ADDONE);
+    __ subi   (mlen,      mlen,      4);
+    __ cmpwi  (CCR0,      mlen,      0);
+    __ beq    (CCR0,      SKIP_ADDONE);
+
+    // if (--offset_aux < 0) { // Carry out of number
+    __ subi   (off_aux,   off_aux,   4);
+    __ cmpwi  (CCR0,      off_aux,   0);
+    __ blt    (CCR0,      SKIP_ADDONE);
+
+    // } else {
+    __ lwzx   (b,         off_aux,   out);
+    __ addi   (b,         b,         1);
+    __ stwx   (b,         off_aux,   out);
+    __ cmpwi  (CCR0,      b,         0);
+    __ bne    (CCR0,      SKIP_ADDONE);
+    __ b      (LOOP_ADDONE);
+
+    __ bind(SKIP_ADDONE);
+    // } } } end<addOne>
+
+    __ addi   (offset,    offset,    8);
+    __ subi   (i_minus1,  i_minus1,  1);
+    __ cmpwi  (CCR0,      i_minus1,  0);
+    __ bge    (CCR0,      LOOP_DIAGONAL_SUM);
+
+    __ bind(SKIP_DIAGONAL_SUM);
+
+    // Shift back up and set low bit
+    // Shifts 1 bit left up to len positions. Assumes no leading zeros
+    // begin<primitiveLeftShift>
+    __ cmpwi  (CCR0,      out_len,   0);
+    __ ble    (CCR0,      SKIP_LSHIFT);
+    __ li     (i,         0);
+    __ lwz    (c,         0,         out);
+    __ subi   (b,         out_len,   1);
+    __ mtctr  (b);
+
+    __ bind(LOOP_LSHIFT);
+    __ mr     (b,         c);
+    __ addi   (cs,        i,         4);
+    __ lwzx   (c,         out,       cs);
+
+    __ sldi   (b,         b,         1);
+    __ srwi   (cs,        c,         31);
+    __ orr    (b,         b,         cs);
+    __ stwx   (b,         i,         out);
+
+    __ addi   (i,         i,         4);
+    __ bdnz   (LOOP_LSHIFT);
+
+    __ sldi   (c,         out_len,   2);
+    __ subi   (c,         c,         4);
+    __ lwzx   (b,         out,       c);
+    __ sldi   (b,         b,         1);
+    __ stwx   (b,         out,       c);
+
+    __ bind(SKIP_LSHIFT);
+    // end<primitiveLeftShift>
+
+    // Set low bit
+    __ sldi   (i,         in_len,    2);
+    __ subi   (i,         i,         4);
+    __ lwzx   (i,         in,        i);
+    __ sldi   (c,         out_len,   2);
+    __ subi   (c,         c,         4);
+    __ lwzx   (b,         out,       c);
+
+    __ andi   (i,         i,         1);
+    __ orr    (i,         b,         i);
+
+    __ stwx   (i,         out,       c);
+
+    // Restore non-volatile regs.
+    current_offs = -8;
+    __ ld(R28, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R27, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R26, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R25, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R24, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R23, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R22, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R21, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R20, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R19, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R18, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R17, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R16, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R15, current_offs, R1_SP); current_offs -= 8;
+    __ ld(R14, current_offs, R1_SP);
+
+    __ mr(ret, out);
+    __ blr();
+
+    return start;
+  }
 
   /**
    * Arguments:
@@ -3500,6 +3794,12 @@
     }
 #endif
 
+    if (UseSquareToLenIntrinsic) {
+      StubRoutines::_squareToLen = generate_squareToLen();
+    }
+    if (UseMulAddIntrinsic) {
+      StubRoutines::_mulAdd = generate_mulAdd();
+    }
     if (UseMontgomeryMultiplyIntrinsic) {
       StubRoutines::_montgomeryMultiply
         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
@@ -3514,6 +3814,14 @@
       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
     }
 
+    if (UseSHA256Intrinsics) {
+      StubRoutines::_sha256_implCompress   = generate_sha256_implCompress(false, "sha256_implCompress");
+      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
+    }
+    if (UseSHA512Intrinsics) {
+      StubRoutines::_sha512_implCompress   = generate_sha512_implCompress(false, "sha512_implCompress");
+      StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
+    }
   }
 
  public:

--- a/src/hotspot/cpu/ppc/stubRoutines_ppc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/stubRoutines_ppc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -34,7 +34,7 @@
 
 enum platform_dependent_constants {
   code_size1 = 20000,          // simply increase if too small (assembler will crash if too small)
-  code_size2 = 20000           // simply increase if too small (assembler will crash if too small)
+  code_size2 = 24000           // simply increase if too small (assembler will crash if too small)
 };
 
 // CRC32 Intrinsics.

--- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1470,10 +1470,6 @@
   // TODO PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
   __ stw(R0, thread_(thread_state));
 
-  if (UseMembar) {
-    __ fence();
-  }
-
   //=============================================================================
   // Call the native method. Argument registers must not have been
   // overwritten since "__ call_stub(signature_handler);" (except for
@@ -1594,9 +1590,6 @@
   __ li(R0/*thread_state*/, _thread_in_Java);
   __ release();
   __ stw(R0/*thread_state*/, thread_(thread_state));
-  if (UseMembar) {
-    __ fence();
-  }
 
   if (CheckJNICalls) {
     // clear_pending_jni_exception_check

--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -2224,6 +2224,7 @@
   if (is_static) {
     __ ld(Robj, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f1_offset()), Rcache);
     __ ld(Robj, in_bytes(Klass::java_mirror_offset()), Robj);
+    __ resolve_oop_handle(Robj);
     // Acquire not needed here. Following access has an address dependency on this value.
   }
 }

--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -107,13 +107,23 @@
   // TODO: PPC port PdScheduling::power6SectorSize = 0x20;
   }
 
-  MaxVectorSize = 8;
+  if (PowerArchitecturePPC64 >= 8) {
+    if (FLAG_IS_DEFAULT(SuperwordUseVSX)) {
+      FLAG_SET_ERGO(bool, SuperwordUseVSX, true);
+    }
+  } else {
+    if (SuperwordUseVSX) {
+      warning("SuperwordUseVSX specified, but needs at least Power8.");
+      FLAG_SET_DEFAULT(SuperwordUseVSX, false);
+    }
+  }
+  MaxVectorSize = SuperwordUseVSX ? 16 : 8;
 #endif
 
   // Create and print feature-string.
   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
   jio_snprintf(buf, sizeof(buf),
-               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_fsqrt()   ? " fsqrt"   : ""),
                (has_isel()    ? " isel"    : ""),
                (has_lxarxeh() ? " lxarxeh" : ""),
@@ -130,7 +140,8 @@
                (has_mfdscr()  ? " mfdscr"  : ""),
                (has_vsx()     ? " vsx"     : ""),
                (has_ldbrx()   ? " ldbrx"   : ""),
-               (has_stdbrx()  ? " stdbrx"  : "")
+               (has_stdbrx()  ? " stdbrx"  : ""),
+               (has_vshasig() ? " sha"     : "")
                // Make sure number of %s matches num_features!
               );
   _features_string = os::strdup(buf);
@@ -200,7 +211,6 @@
   }
 
   // The AES intrinsic stubs require AES instruction support.
-#if defined(VM_LITTLE_ENDIAN)
   if (has_vcipher()) {
     if (FLAG_IS_DEFAULT(UseAES)) {
       UseAES = true;
@@ -221,18 +231,6 @@
     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   }
 
-#else
-  if (UseAES) {
-    warning("AES instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAES, false);
-  }
-  if (UseAESIntrinsics) {
-    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
-      warning("AES intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
-  }
-#endif
-
   if (UseAESCTRIntrinsics) {
     warning("AES/CTR intrinsics are not available on this CPU");
     FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
@@ -247,17 +245,49 @@
     FLAG_SET_DEFAULT(UseFMA, true);
   }
 
-  if (UseSHA) {
-    warning("SHA instructions are not available on this CPU");
+  if (has_vshasig()) {
+    if (FLAG_IS_DEFAULT(UseSHA)) {
+      UseSHA = true;
+    }
+  } else if (UseSHA) {
+    if (!FLAG_IS_DEFAULT(UseSHA))
+      warning("SHA instructions are not available on this CPU");
     FLAG_SET_DEFAULT(UseSHA, false);
   }
-  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
-    warning("SHA intrinsics are not available on this CPU");
+
+  if (UseSHA1Intrinsics) {
+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+  }
+
+  if (UseSHA && has_vshasig()) {
+    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+    }
+  } else if (UseSHA256Intrinsics) {
+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+  }
+
+  if (UseSHA && has_vshasig()) {
+    if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+    }
+  } else if (UseSHA512Intrinsics) {
+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
 
+  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
+  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
+    UseSquareToLenIntrinsic = true;
+  }
+  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
+    UseMulAddIntrinsic = true;
+  }
   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
     UseMultiplyToLenIntrinsic = true;
   }
@@ -657,6 +687,7 @@
   a->lxvd2x(VSR0, R3_ARG1);                    // code[14] -> vsx
   a->ldbrx(R7, R3_ARG1, R4_ARG2);              // code[15] -> ldbrx
   a->stdbrx(R7, R3_ARG1, R4_ARG2);             // code[16] -> stdbrx
+  a->vshasigmaw(VR0, VR1, 1, 0xF);             // code[17] -> vshasig
   a->blr();
 
   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@@ -708,6 +739,7 @@
   if (code[feature_cntr++]) features |= vsx_m;
   if (code[feature_cntr++]) features |= ldbrx_m;
   if (code[feature_cntr++]) features |= stdbrx_m;
+  if (code[feature_cntr++]) features |= vshasig_m;
 
   // Print the detection code.
   if (PrintAssembly) {

--- a/src/hotspot/cpu/ppc/vm_version_ppc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -49,6 +49,7 @@
     vsx,
     ldbrx,
     stdbrx,
+    vshasig,
     num_features // last entry to count features
   };
   enum Feature_Flag_Set {
@@ -64,6 +65,7 @@
     vand_m                = (1 << vand   ),
     lqarx_m               = (1 << lqarx  ),
     vcipher_m             = (1 << vcipher),
+    vshasig_m             = (1 << vshasig),
     vpmsumb_m             = (1 << vpmsumb),
     tcheck_m              = (1 << tcheck ),
     mfdscr_m              = (1 << mfdscr ),
@@ -106,6 +108,7 @@
   static bool has_vsx()     { return (_features & vsx_m) != 0; }
   static bool has_ldbrx()   { return (_features & ldbrx_m) != 0; }
   static bool has_stdbrx()  { return (_features & stdbrx_m) != 0; }
+  static bool has_vshasig() { return (_features & vshasig_m) != 0; }
   static bool has_mtfprd()  { return has_vpmsumb(); } // alias for P8
 
   // Assembler testing

--- a/src/hotspot/cpu/s390/assembler_s390.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/assembler_s390.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -250,7 +250,6 @@
   bool is_RSform()  { return has_base() && !has_index() && is_disp12(); }
   bool is_RSYform() { return has_base() && !has_index() && is_disp20(); }
   bool is_RXform()  { return has_base() &&  has_index() && is_disp12(); }
-  bool is_RXEform() { return has_base() &&  has_index() && is_disp12(); }
   bool is_RXYform() { return has_base() &&  has_index() && is_disp20(); }
 
   bool uses(Register r) { return _base == r || _index == r; };
@@ -1093,7 +1092,201 @@
 #define TRTT_ZOPC   (unsigned  int)(0xb9 << 24 | 0x90 << 16)
 
 
-// Miscellaneous Operations
+//---------------------------
+//--  Vector Instructions  --
+//---------------------------
+
+//---<  Vector Support Instructions  >---
+
+//---  Load (memory)  ---
+
+#define VLM_ZOPC    (unsigned long)(0xe7L << 40 | 0x36L << 0)   // load full vreg range (n * 128 bit)
+#define VL_ZOPC     (unsigned long)(0xe7L << 40 | 0x06L << 0)   // load full vreg (128 bit)
+#define VLEB_ZOPC   (unsigned long)(0xe7L << 40 | 0x00L << 0)   // load vreg element (8 bit)
+#define VLEH_ZOPC   (unsigned long)(0xe7L << 40 | 0x01L << 0)   // load vreg element (16 bit)
+#define VLEF_ZOPC   (unsigned long)(0xe7L << 40 | 0x03L << 0)   // load vreg element (32 bit)
+#define VLEG_ZOPC   (unsigned long)(0xe7L << 40 | 0x02L << 0)   // load vreg element (64 bit)
+
+#define VLREP_ZOPC  (unsigned long)(0xe7L << 40 | 0x05L << 0)   // load and replicate into all vector elements
+#define VLLEZ_ZOPC  (unsigned long)(0xe7L << 40 | 0x04L << 0)   // load logical element and zero.
+
+// vector register gather
+#define VGEF_ZOPC   (unsigned long)(0xe7L << 40 | 0x13L << 0)   // gather element (32 bit), V1(M3) = [D2(V2(M3),B2)]
+#define VGEG_ZOPC   (unsigned long)(0xe7L << 40 | 0x12L << 0)   // gather element (64 bit), V1(M3) = [D2(V2(M3),B2)]
+// vector register scatter
+#define VSCEF_ZOPC  (unsigned long)(0xe7L << 40 | 0x1bL << 0)   // vector scatter element FW
+#define VSCEG_ZOPC  (unsigned long)(0xe7L << 40 | 0x1aL << 0)   // vector scatter element DW
+
+#define VLBB_ZOPC   (unsigned long)(0xe7L << 40 | 0x07L << 0)   // load vreg to block boundary (load to alignment).
+#define VLL_ZOPC    (unsigned long)(0xe7L << 40 | 0x37L << 0)   // load vreg with length.
+
+//---  Load (register)  ---
+
+#define VLR_ZOPC    (unsigned long)(0xe7L << 40 | 0x56L << 0)   // copy full vreg (128 bit)
+#define VLGV_ZOPC   (unsigned long)(0xe7L << 40 | 0x21L << 0)   // copy vreg element -> GR
+#define VLVG_ZOPC   (unsigned long)(0xe7L << 40 | 0x22L << 0)   // copy GR -> vreg element
+#define VLVGP_ZOPC  (unsigned long)(0xe7L << 40 | 0x62L << 0)   // copy GR2, GR3 (disjoint pair) -> vreg
+
+// vector register pack: cut in half the size the source vector elements
+#define VPK_ZOPC    (unsigned long)(0xe7L << 40 | 0x94L << 0)   // just cut
+#define VPKS_ZOPC   (unsigned long)(0xe7L << 40 | 0x97L << 0)   // saturate as signed values
+#define VPKLS_ZOPC  (unsigned long)(0xe7L << 40 | 0x95L << 0)   // saturate as unsigned values
+
+// vector register unpack: double in size the source vector elements
+#define VUPH_ZOPC   (unsigned long)(0xe7L << 40 | 0xd7L << 0)   // signed, left half of the source vector elements
+#define VUPLH_ZOPC  (unsigned long)(0xe7L << 40 | 0xd5L << 0)   // unsigned, left half of the source vector elements
+#define VUPL_ZOPC   (unsigned long)(0xe7L << 40 | 0xd6L << 0)   // signed, right half of the source vector elements
+#define VUPLL_ZOPC  (unsigned long)(0xe7L << 40 | 0xd4L << 0)   // unsigned, right half of the source vector element
+
+// vector register merge
+#define VMRH_ZOPC   (unsigned long)(0xe7L << 40 | 0x61L << 0)   // register merge high (left half of source registers)
+#define VMRL_ZOPC   (unsigned long)(0xe7L << 40 | 0x60L << 0)   // register merge low (right half of source registers)
+
+// vector register permute
+#define VPERM_ZOPC  (unsigned long)(0xe7L << 40 | 0x8cL << 0)   // vector permute
+#define VPDI_ZOPC   (unsigned long)(0xe7L << 40 | 0x84L << 0)   // vector permute DW immediate
+
+// vector register replicate
+#define VREP_ZOPC   (unsigned long)(0xe7L << 40 | 0x4dL << 0)   // vector replicate
+#define VREPI_ZOPC  (unsigned long)(0xe7L << 40 | 0x45L << 0)   // vector replicate immediate
+#define VSEL_ZOPC   (unsigned long)(0xe7L << 40 | 0x8dL << 0)   // vector select
+
+#define VSEG_ZOPC   (unsigned long)(0xe7L << 40 | 0x5fL << 0)   // vector sign-extend to DW (rightmost element in each DW).
+
+//---  Load (immediate)  ---
+
+#define VLEIB_ZOPC  (unsigned long)(0xe7L << 40 | 0x40L << 0)   // load vreg element (16 bit imm to 8 bit)
+#define VLEIH_ZOPC  (unsigned long)(0xe7L << 40 | 0x41L << 0)   // load vreg element (16 bit imm to 16 bit)
+#define VLEIF_ZOPC  (unsigned long)(0xe7L << 40 | 0x43L << 0)   // load vreg element (16 bit imm to 32 bit)
+#define VLEIG_ZOPC  (unsigned long)(0xe7L << 40 | 0x42L << 0)   // load vreg element (16 bit imm to 64 bit)
+
+//---  Store  ---
+
+#define VSTM_ZOPC   (unsigned long)(0xe7L << 40 | 0x3eL << 0)   // store full vreg range (n * 128 bit)
+#define VST_ZOPC    (unsigned long)(0xe7L << 40 | 0x0eL << 0)   // store full vreg (128 bit)
+#define VSTEB_ZOPC  (unsigned long)(0xe7L << 40 | 0x08L << 0)   // store vreg element (8 bit)
+#define VSTEH_ZOPC  (unsigned long)(0xe7L << 40 | 0x09L << 0)   // store vreg element (16 bit)
+#define VSTEF_ZOPC  (unsigned long)(0xe7L << 40 | 0x0bL << 0)   // store vreg element (32 bit)
+#define VSTEG_ZOPC  (unsigned long)(0xe7L << 40 | 0x0aL << 0)   // store vreg element (64 bit)
+#define VSTL_ZOPC   (unsigned long)(0xe7L << 40 | 0x3fL << 0)   // store vreg with length.
+
+//---  Misc  ---
+
+#define VGM_ZOPC    (unsigned long)(0xe7L << 40 | 0x46L << 0)   // generate bit  mask, [start..end] = '1', else '0'
+#define VGBM_ZOPC   (unsigned long)(0xe7L << 40 | 0x44L << 0)   // generate byte mask, bits(imm16) -> bytes
+
+//---<  Vector Arithmetic Instructions  >---
+
+// Load
+#define VLC_ZOPC    (unsigned long)(0xe7L << 40 | 0xdeL << 0)   // V1 := -V2,   element size = 2**m
+#define VLP_ZOPC    (unsigned long)(0xe7L << 40 | 0xdfL << 0)   // V1 := |V2|,  element size = 2**m
+
+// ADD
+#define VA_ZOPC     (unsigned long)(0xe7L << 40 | 0xf3L << 0)   // V1 := V2 + V3, element size = 2**m
+#define VACC_ZOPC   (unsigned long)(0xe7L << 40 | 0xf1L << 0)   // V1 := carry(V2 + V3), element size = 2**m
+
+// SUB
+#define VS_ZOPC     (unsigned long)(0xe7L << 40 | 0xf7L << 0)   // V1 := V2 - V3, element size = 2**m
+#define VSCBI_ZOPC  (unsigned long)(0xe7L << 40 | 0xf5L << 0)   // V1 := borrow(V2 - V3), element size = 2**m
+
+// MUL
+#define VML_ZOPC    (unsigned long)(0xe7L << 40 | 0xa2L << 0)   // V1 := V2 * V3, element size = 2**m
+#define VMH_ZOPC    (unsigned long)(0xe7L << 40 | 0xa3L << 0)   // V1 := V2 * V3, element size = 2**m
+#define VMLH_ZOPC   (unsigned long)(0xe7L << 40 | 0xa1L << 0)   // V1 := V2 * V3, element size = 2**m, unsigned
+#define VME_ZOPC    (unsigned long)(0xe7L << 40 | 0xa6L << 0)   // V1 := V2 * V3, element size = 2**m
+#define VMLE_ZOPC   (unsigned long)(0xe7L << 40 | 0xa4L << 0)   // V1 := V2 * V3, element size = 2**m, unsigned
+#define VMO_ZOPC    (unsigned long)(0xe7L << 40 | 0xa7L << 0)   // V1 := V2 * V3, element size = 2**m
+#define VMLO_ZOPC   (unsigned long)(0xe7L << 40 | 0xa5L << 0)   // V1 := V2 * V3, element size = 2**m, unsigned
+
+// MUL & ADD
+#define VMAL_ZOPC   (unsigned long)(0xe7L << 40 | 0xaaL << 0)   // V1 := V2 * V3 + V4, element size = 2**m
+#define VMAH_ZOPC   (unsigned long)(0xe7L << 40 | 0xabL << 0)   // V1 := V2 * V3 + V4, element size = 2**m
+#define VMALH_ZOPC  (unsigned long)(0xe7L << 40 | 0xa9L << 0)   // V1 := V2 * V3 + V4, element size = 2**m, unsigned
+#define VMAE_ZOPC   (unsigned long)(0xe7L << 40 | 0xaeL << 0)   // V1 := V2 * V3 + V4, element size = 2**m
+#define VMALE_ZOPC  (unsigned long)(0xe7L << 40 | 0xacL << 0)   // V1 := V2 * V3 + V4, element size = 2**m, unsigned
+#define VMAO_ZOPC   (unsigned long)(0xe7L << 40 | 0xafL << 0)   // V1 := V2 * V3 + V4, element size = 2**m
+#define VMALO_ZOPC  (unsigned long)(0xe7L << 40 | 0xadL << 0)   // V1 := V2 * V3 + V4, element size = 2**m, unsigned
+
+// Vector SUM
+#define VSUM_ZOPC   (unsigned long)(0xe7L << 40 | 0x64L << 0)   // V1[j] := toFW(sum(V2[i]) + V3[j]), subelements: byte or HW
+#define VSUMG_ZOPC  (unsigned long)(0xe7L << 40 | 0x65L << 0)   // V1[j] := toDW(sum(V2[i]) + V3[j]), subelements: HW or FW
+#define VSUMQ_ZOPC  (unsigned long)(0xe7L << 40 | 0x67L << 0)   // V1[j] := toQW(sum(V2[i]) + V3[j]), subelements: FW or DW
+
+// Average
+#define VAVG_ZOPC   (unsigned long)(0xe7L << 40 | 0xf2L << 0)   // V1 := (V2+V3+1)/2, signed,   element size = 2**m
+#define VAVGL_ZOPC  (unsigned long)(0xe7L << 40 | 0xf0L << 0)   // V1 := (V2+V3+1)/2, unsigned, element size = 2**m
+
+// VECTOR Galois Field Multiply Sum
+#define VGFM_ZOPC   (unsigned long)(0xe7L << 40 | 0xb4L << 0)
+#define VGFMA_ZOPC  (unsigned long)(0xe7L << 40 | 0xbcL << 0)
+
+//---<  Vector Logical Instructions  >---
+
+// AND
+#define VN_ZOPC     (unsigned long)(0xe7L << 40 | 0x68L << 0)   // V1 := V2 & V3,  element size = 2**m
+#define VNC_ZOPC    (unsigned long)(0xe7L << 40 | 0x69L << 0)   // V1 := V2 & ~V3, element size = 2**m
+
+// XOR
+#define VX_ZOPC     (unsigned long)(0xe7L << 40 | 0x6dL << 0)   // V1 := V2 ^ V3,  element size = 2**m
+
+// NOR
+#define VNO_ZOPC    (unsigned long)(0xe7L << 40 | 0x6bL << 0)   // V1 := !(V2 | V3),  element size = 2**m
+
+// OR
+#define VO_ZOPC     (unsigned long)(0xe7L << 40 | 0x6aL << 0)   // V1 := V2 | V3,  element size = 2**m
+
+// Comparison (element-wise)
+#define VCEQ_ZOPC   (unsigned long)(0xe7L << 40 | 0xf8L << 0)   // V1 := (V2 == V3) ? 0xffff : 0x0000, element size = 2**m
+#define VCH_ZOPC    (unsigned long)(0xe7L << 40 | 0xfbL << 0)   // V1 := (V2  > V3) ? 0xffff : 0x0000, element size = 2**m, signed
+#define VCHL_ZOPC   (unsigned long)(0xe7L << 40 | 0xf9L << 0)   // V1 := (V2  > V3) ? 0xffff : 0x0000, element size = 2**m, unsigned
+
+// Max/Min (element-wise)
+#define VMX_ZOPC    (unsigned long)(0xe7L << 40 | 0xffL << 0)   // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, signed
+#define VMXL_ZOPC   (unsigned long)(0xe7L << 40 | 0xfdL << 0)   // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, unsigned
+#define VMN_ZOPC    (unsigned long)(0xe7L << 40 | 0xfeL << 0)   // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, signed
+#define VMNL_ZOPC   (unsigned long)(0xe7L << 40 | 0xfcL << 0)   // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, unsigned
+
+// Leading/Trailing Zeros, population count
+#define VCLZ_ZOPC   (unsigned long)(0xe7L << 40 | 0x53L << 0)   // V1 := leadingzeros(V2),  element size = 2**m
+#define VCTZ_ZOPC   (unsigned long)(0xe7L << 40 | 0x52L << 0)   // V1 := trailingzeros(V2), element size = 2**m
+#define VPOPCT_ZOPC (unsigned long)(0xe7L << 40 | 0x50L << 0)   // V1 := popcount(V2), bytewise!!
+
+// Rotate/Shift
+#define VERLLV_ZOPC (unsigned long)(0xe7L << 40 | 0x73L << 0)   // V1 := rotateleft(V2), rotate count in V3 element
+#define VERLL_ZOPC  (unsigned long)(0xe7L << 40 | 0x33L << 0)   // V1 := rotateleft(V3), rotate count from d2(b2).
+#define VERIM_ZOPC  (unsigned long)(0xe7L << 40 | 0x72L << 0)   // Rotate then insert under mask. Read Principles of Operation!!
+
+#define VESLV_ZOPC  (unsigned long)(0xe7L << 40 | 0x70L << 0)   // V1 := SLL(V2, V3), unsigned, element-wise
+#define VESL_ZOPC   (unsigned long)(0xe7L << 40 | 0x30L << 0)   // V1 := SLL(V3), unsigned, shift count from d2(b2).
+
+#define VESRAV_ZOPC (unsigned long)(0xe7L << 40 | 0x7AL << 0)   // V1 := SRA(V2, V3), signed, element-wise
+#define VESRA_ZOPC  (unsigned long)(0xe7L << 40 | 0x3AL << 0)   // V1 := SRA(V3), signed, shift count from d2(b2).
+#define VESRLV_ZOPC (unsigned long)(0xe7L << 40 | 0x78L << 0)   // V1 := SRL(V2, V3), unsigned, element-wise
+#define VESRL_ZOPC  (unsigned long)(0xe7L << 40 | 0x38L << 0)   // V1 := SRL(V3), unsigned, shift count from d2(b2).
+
+#define VSL_ZOPC    (unsigned long)(0xe7L << 40 | 0x74L << 0)   // V1 := SLL(V2), unsigned, bit-count
+#define VSLB_ZOPC   (unsigned long)(0xe7L << 40 | 0x75L << 0)   // V1 := SLL(V2), unsigned, byte-count
+#define VSLDB_ZOPC  (unsigned long)(0xe7L << 40 | 0x77L << 0)   // V1 := SLL((V2,V3)), unsigned, byte-count
+
+#define VSRA_ZOPC   (unsigned long)(0xe7L << 40 | 0x7eL << 0)   // V1 := SRA(V2), signed, bit-count
+#define VSRAB_ZOPC  (unsigned long)(0xe7L << 40 | 0x7fL << 0)   // V1 := SRA(V2), signed, byte-count
+#define VSRL_ZOPC   (unsigned long)(0xe7L << 40 | 0x7cL << 0)   // V1 := SRL(V2), unsigned, bit-count
+#define VSRLB_ZOPC  (unsigned long)(0xe7L << 40 | 0x7dL << 0)   // V1 := SRL(V2), unsigned, byte-count
+
+// Test under Mask
+#define VTM_ZOPC    (unsigned long)(0xe7L << 40 | 0xd8L << 0)   // Like TM, set CC according to state of selected bits.
+
+//---<  Vector String Instructions  >---
+#define VFAE_ZOPC   (unsigned long)(0xe7L << 40 | 0x82L << 0)   // Find any element
+#define VFEE_ZOPC   (unsigned long)(0xe7L << 40 | 0x80L << 0)   // Find element equal
+#define VFENE_ZOPC  (unsigned long)(0xe7L << 40 | 0x81L << 0)   // Find element not equal
+#define VSTRC_ZOPC  (unsigned long)(0xe7L << 40 | 0x8aL << 0)   // String range compare
+#define VISTR_ZOPC  (unsigned long)(0xe7L << 40 | 0x5cL << 0)   // Isolate String
+
+
+//--------------------------------
+//--  Miscellaneous Operations  --
+//--------------------------------
 
 // Execute
 #define EX_ZOPC     (unsigned  int)(68L << 24)
@@ -1244,10 +1437,18 @@
     // unsigned arithmetic calculation instructions
     // Mask bit#0 is not used by these instructions.
     // There is no indication of overflow for these instr.
-    bcondLogZero             =  2,
-    bcondLogNotZero          =  5,
+    bcondLogZero_NoCarry     =  8,
+    bcondLogZero_Carry       =  2,
+    // bcondLogZero_Borrow      =  8,  // This CC is never generated.
+    bcondLogZero_NoBorrow    =  2,
+    bcondLogZero             =  bcondLogZero_Carry | bcondLogZero_NoCarry,
+    bcondLogNotZero_NoCarry  =  4,
+    bcondLogNotZero_Carry    =  1,
     bcondLogNotZero_Borrow   =  4,
     bcondLogNotZero_NoBorrow =  1,
+    bcondLogNotZero          =  bcondLogNotZero_Carry | bcondLogNotZero_NoCarry,
+    bcondLogCarry            =  bcondLogZero_Carry | bcondLogNotZero_Carry,
+    bcondLogBorrow           =  /* bcondLogZero_Borrow | */ bcondLogNotZero_Borrow,
     // string search instructions
     bcondFound       =  4,
     bcondNotFound    =  2,
@@ -1280,6 +1481,29 @@
     to_minus_infinity = 7
   };
 
+  // Vector Register Element Type.
+  enum VRegElemType {
+    VRET_BYTE   = 0,
+    VRET_HW     = 1,
+    VRET_FW     = 2,
+    VRET_DW     = 3,
+    VRET_QW     = 4
+  };
+
+  // Vector Operation Result Control.
+  //   This is a set of flags used in some vector instructions to control
+  //   the result (side) effects of instruction execution.
+  enum VOpRC {
+    VOPRC_CCSET    = 0b0001, // set the CC.
+    VOPRC_CCIGN    = 0b0000, // ignore, don't set CC.
+    VOPRC_ZS       = 0b0010, // Zero Search. Additional, elementwise, comparison against zero.
+    VOPRC_NOZS     = 0b0000, // No Zero Search.
+    VOPRC_RTBYTEIX = 0b0100, // generate byte index to lowest element with true comparison.
+    VOPRC_RTBITVEC = 0b0000, // generate bit vector, all 1s for true, all 0s for false element comparisons.
+    VOPRC_INVERT   = 0b1000, // invert comparison results.
+    VOPRC_NOINVERT = 0b0000  // use comparison results as is, do not invert.
+  };
+
   // Inverse condition code, i.e. determine "15 - cc" for a given condition code cc.
   static branch_condition inverse_condition(branch_condition cc);
   static branch_condition inverse_float_condition(branch_condition cc);
@@ -1376,6 +1600,65 @@
     return r;
   }
 
+  static int64_t rsmask_48( Address a) { assert(a.is_RSform(),  "bad address format"); return rsmask_48( a.disp12(), a.base()); }
+  static int64_t rxmask_48( Address a) {      if (a.is_RXform())  { return rxmask_48( a.disp12(), a.index(), a.base()); }
+                                         else if (a.is_RSform())  { return rsmask_48( a.disp12(),            a.base()); }
+                                         else                     { guarantee(false, "bad address format");  return 0;  }
+                                       }
+  static int64_t rsymask_48(Address a) { assert(a.is_RSYform(), "bad address format"); return rsymask_48(a.disp20(), a.base()); }
+  static int64_t rxymask_48(Address a) {      if (a.is_RXYform()) { return rxymask_48( a.disp20(), a.index(), a.base()); }
+                                         else if (a.is_RSYform()) { return rsymask_48( a.disp20(),            a.base()); }
+                                         else                     { guarantee(false, "bad address format");  return 0;   }
+                                       }
+
+  static int64_t rsmask_48( int64_t d2, Register b2)              { return uimm12(d2, 20, 48)                   | regz(b2, 16, 48); }
+  static int64_t rxmask_48( int64_t d2, Register x2, Register b2) { return uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48); }
+  static int64_t rsymask_48(int64_t d2, Register b2)              { return simm20(d2)                           | regz(b2, 16, 48); }
+  static int64_t rxymask_48(int64_t d2, Register x2, Register b2) { return simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48); }
+
+  // Address calculated from d12(vx,b) - vx is vector index register.
+  static int64_t rvmask_48( int64_t d2, VectorRegister x2, Register b2) { return uimm12(d2, 20, 48) | vreg(x2, 12) | regz(b2, 16, 48); }
+
+  static int64_t vreg_mask(VectorRegister v, int pos) {
+    return vreg(v, pos) | v->RXB_mask(pos);
+  }
+
+  // Vector Element Size Control. 4-bit field which indicates the size of the vector elements.
+  static int64_t vesc_mask(int64_t size, int min_size, int max_size, int pos) {
+    // min_size - minimum element size. Not all instructions support element sizes beginning with "byte".
+    // max_size - maximum element size. Not all instructions support element sizes up to "QW".
+    assert((min_size <= size) && (size <= max_size), "element size control out of range");
+    return uimm4(size, pos, 48);
+  }
+
+  // Vector Element IndeX. 4-bit field which indexes the target vector element.
+  static int64_t veix_mask(int64_t ix, int el_size, int pos) {
+    // el_size - size of the vector element. This is a VRegElemType enum value.
+    // ix      - vector element index.
+    int max_ix = -1;
+    switch (el_size) {
+      case VRET_BYTE: max_ix = 15; break;
+      case VRET_HW:   max_ix =  7; break;
+      case VRET_FW:   max_ix =  3; break;
+      case VRET_DW:   max_ix =  1; break;
+      case VRET_QW:   max_ix =  0; break;
+      default:        guarantee(false, "bad vector element size %d", el_size); break;
+    }
+    assert((0 <= ix) && (ix <= max_ix), "element size out of range (0 <= %ld <= %d)", ix, max_ix);
+    return uimm4(ix, pos, 48);
+  }
+
+  // Vector Operation Result Control. 4-bit field.
+  static int64_t voprc_any(int64_t flags, int pos, int64_t allowed_flags = 0b1111) {
+    assert((flags & allowed_flags) == flags, "Invalid VOPRC_* flag combination: %d", (int)flags);
+    return uimm4(flags, pos, 48);
+  }
+
+  // Vector Operation Result Control. Condition code setting.
+  static int64_t voprc_ccmask(int64_t flags, int pos) {
+    return voprc_any(flags, pos, VOPRC_CCIGN | VOPRC_CCSET);
+  }
+
  public:
 
   //--------------------------------------------------
@@ -1453,6 +1736,8 @@
   static long imm24(int64_t i24, int s, int len)   { return imm(i24, 24) << (len-s-24); }
   static long imm32(int64_t i32, int s, int len)   { return imm(i32, 32) << (len-s-32); }
 
+  static long vreg(VectorRegister v, int pos)      { const int len = 48; return u_field(v->encoding()&0x0f, (len-pos)-1, (len-pos)-4) | v->RXB_mask(pos); }
+
   static long fregt(FloatRegister r, int s, int len) { return freg(r,s,len); }
   static long freg( FloatRegister r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
 
@@ -1840,13 +2125,16 @@
   inline void z_alsi( const Address& d, int64_t i2);              // add logical   *(d) += i2_imm8           ; uint32  -- z10
   inline void z_algsi(const Address& d, int64_t i2);              // add logical   *(d) += i2_imm8           ; uint64  -- z10
 
-  // negate
+  // sign adjustment
   inline void z_lcr(  Register r1, Register r2 = noreg);              // neg r1 = -r2   ; int32
   inline void z_lcgr( Register r1, Register r2 = noreg);              // neg r1 = -r2   ; int64
   inline void z_lcgfr(Register r1, Register r2);                      // neg r1 = -r2   ; int64 <- int32
   inline void z_lnr(  Register r1, Register r2 = noreg);              // neg r1 = -|r2| ; int32
   inline void z_lngr( Register r1, Register r2 = noreg);              // neg r1 = -|r2| ; int64
   inline void z_lngfr(Register r1, Register r2);                      // neg r1 = -|r2| ; int64 <- int32
+  inline void z_lpr(  Register r1, Register r2 = noreg);              //     r1 =  |r2| ; int32
+  inline void z_lpgr( Register r1, Register r2 = noreg);              //     r1 =  |r2| ; int64
+  inline void z_lpgfr(Register r1, Register r2);                      //     r1 =  |r2| ; int64 <- int32
 
   // subtract intstructions
   // sub registers
@@ -2125,6 +2413,422 @@
   inline void z_trtt(Register r1, Register r2, int64_t m3);
 
 
+  //---------------------------
+  //--  Vector Instructions  --
+  //---------------------------
+
+  //---<  Vector Support Instructions  >---
+
+  // Load (transfer from memory)
+  inline void z_vlm(   VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vl(    VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vleb(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vleh(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vlef(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vleg(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+
+  // Gather/Scatter
+  inline void z_vgef(  VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+  inline void z_vgeg(  VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+
+  inline void z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+  inline void z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+
+  // load and replicate
+  inline void z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vlrepb(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vlreph(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vlrepf(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vlrepg(VectorRegister v1, int64_t d2, Register x2, Register b2);
+
+  inline void z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vllezb(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vllezh(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vllezf(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vllezg(VectorRegister v1, int64_t d2, Register x2, Register b2);
+
+  inline void z_vlbb(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vll(   VectorRegister v1, Register r3, int64_t d2, Register b2);
+
+  // Load (register to register)
+  inline void z_vlr(   VectorRegister v1, VectorRegister v2);
+
+  inline void z_vlgv(  Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
+  inline void z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2);
+
+  inline void z_vlvg(  VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4);
+  inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2);
+  inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2);
+  inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2);
+  inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2);
+
+  inline void z_vlvgp( VectorRegister v1, Register r2, Register r3);
+
+  // vector register pack
+  inline void z_vpk(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vpkh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpkf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpkg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  inline void z_vpks(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+  inline void z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpkshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpksfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpksgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  inline void z_vpkls(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+  inline void z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // vector register unpack (sign-extended)
+  inline void z_vuph(   VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vuphb(  VectorRegister v1, VectorRegister v2);
+  inline void z_vuphh(  VectorRegister v1, VectorRegister v2);
+  inline void z_vuphf(  VectorRegister v1, VectorRegister v2);
+  inline void z_vupl(   VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vuplb(  VectorRegister v1, VectorRegister v2);
+  inline void z_vuplh(  VectorRegister v1, VectorRegister v2);
+  inline void z_vuplf(  VectorRegister v1, VectorRegister v2);
+
+  // vector register unpack (zero-extended)
+  inline void z_vuplh(  VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vuplhb( VectorRegister v1, VectorRegister v2);
+  inline void z_vuplhh( VectorRegister v1, VectorRegister v2);
+  inline void z_vuplhf( VectorRegister v1, VectorRegister v2);
+  inline void z_vupll(  VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vupllb( VectorRegister v1, VectorRegister v2);
+  inline void z_vupllh( VectorRegister v1, VectorRegister v2);
+  inline void z_vupllf( VectorRegister v1, VectorRegister v2);
+
+  // vector register merge high/low
+  inline void z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmrhb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrhh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrhf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrhg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  inline void z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmrlb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrlh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrlf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrlg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // vector register permute
+  inline void z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+  inline void z_vpdi(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t        m4);
+
+  // vector register replicate
+  inline void z_vrep(  VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4);
+  inline void z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2);
+  inline void z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2);
+  inline void z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2);
+  inline void z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2);
+  inline void z_vrepi( VectorRegister v1, int64_t imm2,      int64_t m3);
+  inline void z_vrepib(VectorRegister v1, int64_t imm2);
+  inline void z_vrepih(VectorRegister v1, int64_t imm2);
+  inline void z_vrepif(VectorRegister v1, int64_t imm2);
+  inline void z_vrepig(VectorRegister v1, int64_t imm2);
+
+  inline void z_vsel(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+  inline void z_vseg(  VectorRegister v1, VectorRegister v2, int64_t imm3);
+
+  // Load (immediate)
+  inline void z_vleib( VectorRegister v1, int64_t imm2, int64_t m3);
+  inline void z_vleih( VectorRegister v1, int64_t imm2, int64_t m3);
+  inline void z_vleif( VectorRegister v1, int64_t imm2, int64_t m3);
+  inline void z_vleig( VectorRegister v1, int64_t imm2, int64_t m3);
+
+  // Store
+  inline void z_vstm(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vst(   VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vstl(  VectorRegister v1, Register r3, int64_t d2, Register b2);
+
+  // Misc
+  inline void z_vgm(   VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4);
+  inline void z_vgmb(  VectorRegister v1, int64_t imm2, int64_t imm3);
+  inline void z_vgmh(  VectorRegister v1, int64_t imm2, int64_t imm3);
+  inline void z_vgmf(  VectorRegister v1, int64_t imm2, int64_t imm3);
+  inline void z_vgmg(  VectorRegister v1, int64_t imm2, int64_t imm3);
+
+  inline void z_vgbm(  VectorRegister v1, int64_t imm2);
+  inline void z_vzero( VectorRegister v1); // preferred method to set vreg to all zeroes
+  inline void z_vone(  VectorRegister v1); // preferred method to set vreg to all ones
+
+  //---<  Vector Arithmetic Instructions  >---
+
+  // Load
+  inline void z_vlc(    VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vlcb(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlch(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlcf(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlcg(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlp(    VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vlpb(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlph(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlpf(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlpg(   VectorRegister v1, VectorRegister v2);
+
+  // ADD
+  inline void z_va(     VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vab(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vah(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vaf(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vag(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vaq(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vacc(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vaccb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vacch(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vaccf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vaccg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vaccq(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // SUB
+  inline void z_vs(     VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vsb(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsh(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsf(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsg(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsq(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vscbi(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // MULTIPLY
+  inline void z_vml(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmh(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmlh(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vme(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmle(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmo(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmlo(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+
+  // MULTIPLY & ADD
+  inline void z_vmal(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmah(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmalh(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmae(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmale(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmao(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmalo(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+
+  // VECTOR SUM
+  inline void z_vsum(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vsumb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsumh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsumg(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsumq(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // Average
+  inline void z_vavg(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vavgb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavgh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavgf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavgg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavgl(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // VECTOR Galois Field Multiply Sum
+  inline void z_vgfm(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vgfmb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vgfmh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vgfmf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vgfmg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  // VECTOR Galois Field Multiply Sum and Accumulate
+  inline void z_vgfma(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+  inline void z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+  inline void z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+  inline void z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+
+  //---<  Vector Logical Instructions  >---
+
+  // AND
+  inline void z_vn(     VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vnc(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // XOR
+  inline void z_vx(     VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // NOR
+  inline void z_vno(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // OR
+  inline void z_vo(     VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // Comparison (element-wise)
+  inline void z_vceq(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+  inline void z_vceqb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vch(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+  inline void z_vchb(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchh(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchf(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchg(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchbs(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchhs(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchfs(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchgs(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+  inline void z_vchlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // Max/Min (element-wise)
+  inline void z_vmx(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmxb(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxh(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxf(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxg(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmxlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmn(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmnb(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnh(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnf(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmng(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmnlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // Leading/Trailing Zeros, population count
+  inline void z_vclz(   VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vclzb(  VectorRegister v1, VectorRegister v2);
+  inline void z_vclzh(  VectorRegister v1, VectorRegister v2);
+  inline void z_vclzf(  VectorRegister v1, VectorRegister v2);
+  inline void z_vclzg(  VectorRegister v1, VectorRegister v2);
+  inline void z_vctz(   VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vctzb(  VectorRegister v1, VectorRegister v2);
+  inline void z_vctzh(  VectorRegister v1, VectorRegister v2);
+  inline void z_vctzf(  VectorRegister v1, VectorRegister v2);
+  inline void z_vctzg(  VectorRegister v1, VectorRegister v2);
+  inline void z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3);
+
+  // Rotate/Shift
+  inline void z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4);
+  inline void z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_verll(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4);
+  inline void z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_verim(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5);
+  inline void z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+  inline void z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+  inline void z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+  inline void z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+
+  inline void z_veslv(  VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4);
+  inline void z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesl(   VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4);
+  inline void z_veslb(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_veslh(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_veslf(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_veslg(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+
+  inline void z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4);
+  inline void z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesra(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4);
+  inline void z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4);
+  inline void z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesrl(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4);
+  inline void z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+
+  inline void z_vsl(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vslb(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsldb(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+
+  inline void z_vsra(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsrab(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsrl(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsrlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // Test under Mask
+  inline void z_vtm(    VectorRegister v1, VectorRegister v2);
+
+  //---<  Vector String Instructions  >---
+  inline void z_vfae(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5);   // Find any element
+  inline void z_vfaeb(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+  inline void z_vfaeh(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+  inline void z_vfaef(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+  inline void z_vfee(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5);   // Find element equal
+  inline void z_vfeeb(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+  inline void z_vfeeh(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+  inline void z_vfeef(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+  inline void z_vfene(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5);   // Find element not equal
+  inline void z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+  inline void z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+  inline void z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+  inline void z_vstrc(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6);   // String range compare
+  inline void z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
+  inline void z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
+  inline void z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
+  inline void z_vistr(  VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5);                      // Isolate String
+  inline void z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5);
+  inline void z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5);
+  inline void z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5);
+  inline void z_vistrbs(VectorRegister v1, VectorRegister v2);
+  inline void z_vistrhs(VectorRegister v1, VectorRegister v2);
+  inline void z_vistrfs(VectorRegister v1, VectorRegister v2);
+
+
   // Floatingpoint instructions
   // ==========================

--- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -309,6 +309,9 @@
 inline void Assembler::z_lnr(  Register r1, Register r2) { emit_16( LNR_ZOPC   | regt( r1,  8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
 inline void Assembler::z_lngr( Register r1, Register r2) { emit_32( LNGR_ZOPC  | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
 inline void Assembler::z_lngfr(Register r1, Register r2) { emit_32( LNGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+inline void Assembler::z_lpr(  Register r1, Register r2) { emit_16( LPR_ZOPC   | regt( r1,  8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
+inline void Assembler::z_lpgr( Register r1, Register r2) { emit_32( LPGR_ZOPC  | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+inline void Assembler::z_lpgfr(Register r1, Register r2) { emit_32( LPGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
 
 inline void Assembler::z_lrvr( Register r1, Register r2) { emit_32( LRVR_ZOPC  | regt(r1, 24, 32) | reg(r2, 28, 32)); }
 inline void Assembler::z_lrvgr(Register r1, Register r2) { emit_32( LRVGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
@@ -702,6 +705,421 @@
 inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
 
 
+//---------------------------
+//--  Vector Instructions  --
+//---------------------------
+
+//---<  Vector Support Instructions  >---
+
+// Load (transfer from memory)
+inline void Assembler::z_vlm(    VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {emit_48(VLM_ZOPC   | vreg(v1,  8)     | vreg(v3, 12)     | rsmask_48(d2,     b2)); }
+inline void Assembler::z_vl(     VectorRegister v1, int64_t d2, Register x2, Register b2)             {emit_48(VL_ZOPC    | vreg(v1,  8)                        | rxmask_48(d2, x2, b2)); }
+inline void Assembler::z_vleb(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEB_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
+inline void Assembler::z_vleh(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEH_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW,   32)); }
+inline void Assembler::z_vlef(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEF_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW,   32)); }
+inline void Assembler::z_vleg(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEG_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW,   32)); }
+
+// Gather/Scatter
+inline void Assembler::z_vgef(   VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEF_ZOPC  | vreg(v1,  8)                 | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW,   32)); }
+inline void Assembler::z_vgeg(   VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEG_ZOPC  | vreg(v1,  8)                 | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW,   32)); }
+
+inline void Assembler::z_vscef(  VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEF_ZOPC | vreg(v1,  8)                 | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW,   32)); }
+inline void Assembler::z_vsceg(  VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEG_ZOPC | vreg(v1,  8)                 | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW,   32)); }
+
+// load and replicate
+inline void Assembler::z_vlrep(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLREP_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlrepb( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vlrep(v1, d2, x2, b2, VRET_BYTE); }// load byte and replicate to all vector elements of type 'B'
+inline void Assembler::z_vlreph( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vlrep(v1, d2, x2, b2, VRET_HW); }  // load HW   and replicate to all vector elements of type 'H'
+inline void Assembler::z_vlrepf( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vlrep(v1, d2, x2, b2, VRET_FW); }  // load FW   and replicate to all vector elements of type 'F'
+inline void Assembler::z_vlrepg( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vlrep(v1, d2, x2, b2, VRET_DW); }  // load DW   and replicate to all vector elements of type 'G'
+
+inline void Assembler::z_vllez(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLLEZ_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vllezb( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vllez(v1, d2, x2, b2, VRET_BYTE); }// load logical byte into left DW of VR, zero all other bit positions.
+inline void Assembler::z_vllezh( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vllez(v1, d2, x2, b2, VRET_HW); }  // load logical HW   into left DW of VR, zero all other bit positions.
+inline void Assembler::z_vllezf( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vllez(v1, d2, x2, b2, VRET_FW); }  // load logical FW   into left DW of VR, zero all other bit positions.
+inline void Assembler::z_vllezg( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vllez(v1, d2, x2, b2, VRET_DW); }  // load logical DW   into left DW of VR, zero all other bit positions.
+
+inline void Assembler::z_vlbb(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLBB_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_vll(    VectorRegister v1, Register r3, int64_t d2, Register b2)             {emit_48(VLL_ZOPC   | vreg(v1,  8)     |  reg(r3, 12, 48) | rsmask_48(d2,     b2)); }
+
+// Load (register to register)
+inline void Assembler::z_vlr (   VectorRegister v1, VectorRegister v2)                                {emit_48(VLR_ZOPC   | vreg(v1,  8)     | vreg(v2, 12)); }
+
+inline void Assembler::z_vlgv(   Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VLGV_ZOPC  |  reg(r1,  8, 48) | vreg(v3, 12)     | rsmask_48(d2,     b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlgvb(  Register r1, VectorRegister v3, int64_t d2, Register b2)             {z_vlgv(r1, v3, d2, b2, VRET_BYTE); } // load byte from VR element (index d2(b2)) into GR (logical)
+inline void Assembler::z_vlgvh(  Register r1, VectorRegister v3, int64_t d2, Register b2)             {z_vlgv(r1, v3, d2, b2, VRET_HW); }   // load HW   from VR element (index d2(b2)) into GR (logical)
+inline void Assembler::z_vlgvf(  Register r1, VectorRegister v3, int64_t d2, Register b2)             {z_vlgv(r1, v3, d2, b2, VRET_FW); }   // load FW   from VR element (index d2(b2)) into GR (logical)
+inline void Assembler::z_vlgvg(  Register r1, VectorRegister v3, int64_t d2, Register b2)             {z_vlgv(r1, v3, d2, b2, VRET_DW); }   // load DW   from VR element (index d2(b2)) into GR.
+
+inline void Assembler::z_vlvg(   VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC  | vreg(v1,  8)     |  reg(r3, 12, 48) | rsmask_48(d2,     b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlvgb(  VectorRegister v1, Register r3, int64_t d2, Register b2)             {z_vlvg(v1, r3, d2, b2, VRET_BYTE); }
+inline void Assembler::z_vlvgh(  VectorRegister v1, Register r3, int64_t d2, Register b2)             {z_vlvg(v1, r3, d2, b2, VRET_HW); }
+inline void Assembler::z_vlvgf(  VectorRegister v1, Register r3, int64_t d2, Register b2)             {z_vlvg(v1, r3, d2, b2, VRET_FW); }
+inline void Assembler::z_vlvgg(  VectorRegister v1, Register r3, int64_t d2, Register b2)             {z_vlvg(v1, r3, d2, b2, VRET_DW); }
+
+inline void Assembler::z_vlvgp(  VectorRegister v1, Register r2, Register r3)                         {emit_48(VLVGP_ZOPC | vreg(v1,  8)     |  reg(r2, 12, 48) |  reg(r3, 16, 48)); }
+
+// vector register pack
+inline void Assembler::z_vpk(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPK_ZOPC   | vreg(v1,  8)     | vreg(v2, 12)     | vreg(v3, 16)     | vesc_mask(m4, VRET_HW, VRET_DW, 32)); }
+inline void Assembler::z_vpkh(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpk(v1, v2, v3, VRET_HW); }       // vector element type 'H'
+inline void Assembler::z_vpkf(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpk(v1, v2, v3, VRET_FW); }       // vector element type 'F'
+inline void Assembler::z_vpkg(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpk(v1, v2, v3, VRET_DW); }       // vector element type 'G'
+
+inline void Assembler::z_vpks(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKS_ZOPC  | vreg(v1,  8) |  vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
+inline void Assembler::z_vpksh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCIGN); }   // vector element type 'H', don't set CC
+inline void Assembler::z_vpksf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCIGN); }   // vector element type 'F', don't set CC
+inline void Assembler::z_vpksg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCIGN); }   // vector element type 'G', don't set CC
+inline void Assembler::z_vpkshs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCSET); }   // vector element type 'H', set CC
+inline void Assembler::z_vpksfs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCSET); }   // vector element type 'F', set CC
+inline void Assembler::z_vpksgs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCSET); }   // vector element type 'G', set CC
+
+inline void Assembler::z_vpkls(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKLS_ZOPC | vreg(v1,  8) |  vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
+inline void Assembler::z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCIGN); }  // vector element type 'H', don't set CC
+inline void Assembler::z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCIGN); }  // vector element type 'F', don't set CC
+inline void Assembler::z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCIGN); }  // vector element type 'G', don't set CC
+inline void Assembler::z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCSET); }  // vector element type 'H', set CC
+inline void Assembler::z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCSET); }  // vector element type 'F', set CC
+inline void Assembler::z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCSET); }  // vector element type 'G', set CC
+
+// vector register unpack (sign-extended)
+inline void Assembler::z_vuph(   VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VUPH_ZOPC  | vreg(v1,  8)     | vreg(v2, 12)     | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vuphb(  VectorRegister v1, VectorRegister v2)                                {z_vuph(v1, v2, VRET_BYTE); }        // vector element type 'B'
+inline void Assembler::z_vuphh(  VectorRegister v1, VectorRegister v2)                                {z_vuph(v1, v2, VRET_HW); }          // vector element type 'H'
+inline void Assembler::z_vuphf(  VectorRegister v1, VectorRegister v2)                                {z_vuph(v1, v2, VRET_FW); }          // vector element type 'F'
+inline void Assembler::z_vupl(   VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VUPL_ZOPC  | vreg(v1,  8)     | vreg(v2, 12)     | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vuplb(  VectorRegister v1, VectorRegister v2)                                {z_vupl(v1, v2, VRET_BYTE); }        // vector element type 'B'
+inline void Assembler::z_vuplh(  VectorRegister v1, VectorRegister v2)                                {z_vupl(v1, v2, VRET_HW); }          // vector element type 'H'
+inline void Assembler::z_vuplf(  VectorRegister v1, VectorRegister v2)                                {z_vupl(v1, v2, VRET_FW); }          // vector element type 'F'
+
+// vector register unpack (zero-extended)
+inline void Assembler::z_vuplh(  VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VUPLH_ZOPC | vreg(v1,  8)     | vreg(v2, 12)     | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vuplhb( VectorRegister v1, VectorRegister v2)                                {z_vuplh(v1, v2, VRET_BYTE); }       // vector element type 'B'
+inline void Assembler::z_vuplhh( VectorRegister v1, VectorRegister v2)                                {z_vuplh(v1, v2, VRET_HW); }         // vector element type 'H'
+inline void Assembler::z_vuplhf( VectorRegister v1, VectorRegister v2)                                {z_vuplh(v1, v2, VRET_FW); }         // vector element type 'F'
+inline void Assembler::z_vupll(  VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VUPLL_ZOPC | vreg(v1,  8)     | vreg(v2, 12)     | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vupllb( VectorRegister v1, VectorRegister v2)                                {z_vupll(v1, v2, VRET_BYTE); }       // vector element type 'B'
+inline void Assembler::z_vupllh( VectorRegister v1, VectorRegister v2)                                {z_vupll(v1, v2, VRET_HW); }         // vector element type 'H'
+inline void Assembler::z_vupllf( VectorRegister v1, VectorRegister v2)                                {z_vupll(v1, v2, VRET_FW); }         // vector element type 'F'
+
+// vector register merge high/low
+inline void Assembler::z_vmrh(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRH_ZOPC  | vreg(v1,  8)     | vreg(v2, 12)     | vreg(v3, 16)     | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmrhb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vmrhh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vmrhf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vmrhg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+
+inline void Assembler::z_vmrl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRL_ZOPC  | vreg(v1,  8)     | vreg(v2, 12)     | vreg(v3, 16)     | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmrlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vmrlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vmrlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vmrlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+
+// vector register permute
+inline void Assembler::z_vperm(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VPERM_ZOPC | vreg(v1,  8) |  vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
+inline void Assembler::z_vpdi(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t        m4) {emit_48(VPDI_ZOPC  | vreg(v1,  8) |  vreg(v2, 12) | vreg(v3, 16) | uimm4(m4, 32, 48)); }
+
+// vector register replicate
+inline void Assembler::z_vrep(   VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4)      {emit_48(VREP_ZOPC  | vreg(v1,  8)     | vreg(v3, 12)     | simm16(imm2, 16, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vrepb(  VectorRegister v1, VectorRegister v3, int64_t imm2)                  {z_vrep(v1, v3, imm2, VRET_BYTE); }  // vector element type 'B'
+inline void Assembler::z_vreph(  VectorRegister v1, VectorRegister v3, int64_t imm2)                  {z_vrep(v1, v3, imm2, VRET_HW); }    // vector element type 'H'
+inline void Assembler::z_vrepf(  VectorRegister v1, VectorRegister v3, int64_t imm2)                  {z_vrep(v1, v3, imm2, VRET_FW); }    // vector element type 'F'
+inline void Assembler::z_vrepg(  VectorRegister v1, VectorRegister v3, int64_t imm2)                  {z_vrep(v1, v3, imm2, VRET_DW); }    // vector element type 'G'
+inline void Assembler::z_vrepi(  VectorRegister v1, int64_t imm2,      int64_t m3)                    {emit_48(VREPI_ZOPC | vreg(v1,  8)                        | simm16(imm2, 16, 48) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vrepib( VectorRegister v1, int64_t imm2)                                     {z_vrepi(v1, imm2, VRET_BYTE); }     // vector element type 'B'
+inline void Assembler::z_vrepih( VectorRegister v1, int64_t imm2)                                     {z_vrepi(v1, imm2, VRET_HW); }       // vector element type 'B'
+inline void Assembler::z_vrepif( VectorRegister v1, int64_t imm2)                                     {z_vrepi(v1, imm2, VRET_FW); }       // vector element type 'B'
+inline void Assembler::z_vrepig( VectorRegister v1, int64_t imm2)                                     {z_vrepi(v1, imm2, VRET_DW); }       // vector element type 'B'
+
+inline void Assembler::z_vsel(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VSEL_ZOPC  | vreg(v1,  8) |  vreg(v2, 12) |  vreg(v3, 16) |  vreg(v4, 32)); }
+inline void Assembler::z_vseg(   VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VSEG_ZOPC  | vreg(v1,  8)     | vreg(v2, 12)     | uimm4(m3, 32, 48)); }
+
+// Load (immediate)
+inline void Assembler::z_vleib(  VectorRegister v1, int64_t imm2, int64_t m3)                         {emit_48(VLEIB_ZOPC | vreg(v1,  8)                        | simm16(imm2, 32, 48)  | veix_mask(m3, VRET_BYTE, 32)); }
+inline void Assembler::z_vleih(  VectorRegister v1, int64_t imm2, int64_t m3)                         {emit_48(VLEIH_ZOPC | vreg(v1,  8)                        | simm16(imm2, 32, 48)  | veix_mask(m3, VRET_HW,   32)); }
+inline void Assembler::z_vleif(  VectorRegister v1, int64_t imm2, int64_t m3)                         {emit_48(VLEIF_ZOPC | vreg(v1,  8)                        | simm16(imm2, 32, 48)  | veix_mask(m3, VRET_FW,   32)); }
+inline void Assembler::z_vleig(  VectorRegister v1, int64_t imm2, int64_t m3)                         {emit_48(VLEIG_ZOPC | vreg(v1,  8)                        | simm16(imm2, 32, 48)  | veix_mask(m3, VRET_DW,   32)); }
+
+// Store
+inline void Assembler::z_vstm(   VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {emit_48(VSTM_ZOPC  | vreg(v1,  8)     | vreg(v3, 12)     | rsmask_48(d2,     b2)); }
+inline void Assembler::z_vst(    VectorRegister v1, int64_t d2, Register x2, Register b2)             {emit_48(VST_ZOPC   | vreg(v1,  8)                        | rxmask_48(d2, x2, b2)); }
+inline void Assembler::z_vsteb(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEB_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
+inline void Assembler::z_vsteh(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEH_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW,   32)); }
+inline void Assembler::z_vstef(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEF_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW,   32)); }
+inline void Assembler::z_vsteg(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEG_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW,   32)); }
+inline void Assembler::z_vstl(   VectorRegister v1, Register r3, int64_t d2, Register b2)             {emit_48(VSTL_ZOPC  | vreg(v1,  8)     |  reg(r3, 12, 48) | rsmask_48(d2,     b2)); }
+
+// Misc
+inline void Assembler::z_vgm(    VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4)           {emit_48(VGM_ZOPC   | vreg(v1,  8)     | uimm8( imm2, 16, 48) | uimm8(imm3, 24, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vgmb(   VectorRegister v1, int64_t imm2, int64_t imm3)                       {z_vgm(v1, imm2, imm3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vgmh(   VectorRegister v1, int64_t imm2, int64_t imm3)                       {z_vgm(v1, imm2, imm3, VRET_HW); }   // vector element type 'H'
+inline void Assembler::z_vgmf(   VectorRegister v1, int64_t imm2, int64_t imm3)                       {z_vgm(v1, imm2, imm3, VRET_FW); }   // vector element type 'F'
+inline void Assembler::z_vgmg(   VectorRegister v1, int64_t imm2, int64_t imm3)                       {z_vgm(v1, imm2, imm3, VRET_DW); }   // vector element type 'G'
+
+inline void Assembler::z_vgbm(   VectorRegister v1, int64_t imm2)                                     {emit_48(VGBM_ZOPC  | vreg(v1,  8)     | uimm16(imm2, 16, 48)); }
+inline void Assembler::z_vzero(  VectorRegister v1)                                                   {z_vgbm(v1, 0); }      // preferred method to set vreg to all zeroes
+inline void Assembler::z_vone(   VectorRegister v1)                                                   {z_vgbm(v1, 0xffff); } // preferred method to set vreg to all ones
+
+//---<  Vector Arithmetic Instructions  >---
+
+// Load
+inline void Assembler::z_vlc(    VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VLC_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlcb(   VectorRegister v1, VectorRegister v2)                                {z_vlc(v1, v2, VRET_BYTE); }         // vector element type 'B'
+inline void Assembler::z_vlch(   VectorRegister v1, VectorRegister v2)                                {z_vlc(v1, v2, VRET_HW); }           // vector element type 'H'
+inline void Assembler::z_vlcf(   VectorRegister v1, VectorRegister v2)                                {z_vlc(v1, v2, VRET_FW); }           // vector element type 'F'
+inline void Assembler::z_vlcg(   VectorRegister v1, VectorRegister v2)                                {z_vlc(v1, v2, VRET_DW); }           // vector element type 'G'
+inline void Assembler::z_vlp(    VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VLP_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlpb(   VectorRegister v1, VectorRegister v2)                                {z_vlp(v1, v2, VRET_BYTE); }         // vector element type 'B'
+inline void Assembler::z_vlph(   VectorRegister v1, VectorRegister v2)                                {z_vlp(v1, v2, VRET_HW); }           // vector element type 'H'
+inline void Assembler::z_vlpf(   VectorRegister v1, VectorRegister v2)                                {z_vlp(v1, v2, VRET_FW); }           // vector element type 'F'
+inline void Assembler::z_vlpg(   VectorRegister v1, VectorRegister v2)                                {z_vlp(v1, v2, VRET_DW); }           // vector element type 'G'
+
+// ADD
+inline void Assembler::z_va(     VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VA_ZOPC    | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vab(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_va(v1, v2, v3, VRET_BYTE); }      // vector element type 'B'
+inline void Assembler::z_vah(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_va(v1, v2, v3, VRET_HW); }        // vector element type 'H'
+inline void Assembler::z_vaf(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_va(v1, v2, v3, VRET_FW); }        // vector element type 'F'
+inline void Assembler::z_vag(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_va(v1, v2, v3, VRET_DW); }        // vector element type 'G'
+inline void Assembler::z_vaq(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_va(v1, v2, v3, VRET_QW); }        // vector element type 'Q'
+inline void Assembler::z_vacc(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VACC_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vaccb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vacc(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vacch(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vacc(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vaccf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vacc(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vaccg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vacc(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+inline void Assembler::z_vaccq(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vacc(v1, v2, v3, VRET_QW); }      // vector element type 'Q'
+
+// SUB
+inline void Assembler::z_vs(     VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VS_ZOPC    | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vsb(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vs(v1, v2, v3, VRET_BYTE); }      // vector element type 'B'
+inline void Assembler::z_vsh(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vs(v1, v2, v3, VRET_HW); }        // vector element type 'H'
+inline void Assembler::z_vsf(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vs(v1, v2, v3, VRET_FW); }        // vector element type 'F'
+inline void Assembler::z_vsg(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vs(v1, v2, v3, VRET_DW); }        // vector element type 'G'
+inline void Assembler::z_vsq(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vs(v1, v2, v3, VRET_QW); }        // vector element type 'Q'
+inline void Assembler::z_vscbi(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSCBI_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vscbi(v1, v2, v3, VRET_BYTE); }   // vector element type 'B'
+inline void Assembler::z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vscbi(v1, v2, v3, VRET_HW); }     // vector element type 'H'
+inline void Assembler::z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vscbi(v1, v2, v3, VRET_FW); }     // vector element type 'F'
+inline void Assembler::z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vscbi(v1, v2, v3, VRET_DW); }     // vector element type 'G'
+inline void Assembler::z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vscbi(v1, v2, v3, VRET_QW); }     // vector element type 'Q'
+
+// MULTIPLY
+inline void Assembler::z_vml(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VML_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmh(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMH_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmlh(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLH_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vme(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VME_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmle(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLE_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmo(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMO_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmlo(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLO_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+
+// MULTIPLY & ADD
+inline void Assembler::z_vmal(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAL_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmah(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAH_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmalh(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALH_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmae(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAE_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmale(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALE_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmao(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAO_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmalo(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALO_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+
+// VECTOR SUM
+inline void Assembler::z_vsum(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUM_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_HW, 32)); }
+inline void Assembler::z_vsumb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsum(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vsumh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsum(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vsumg(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMG_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW,   VRET_FW, 32)); }
+inline void Assembler::z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsumg(v1, v2, v3, VRET_HW); }     // vector element type 'B'
+inline void Assembler::z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsumg(v1, v2, v3, VRET_FW); }     // vector element type 'H'
+inline void Assembler::z_vsumq(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMQ_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW,   VRET_DW, 32)); }
+inline void Assembler::z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsumq(v1, v2, v3, VRET_FW); }     // vector element type 'B'
+inline void Assembler::z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsumq(v1, v2, v3, VRET_DW); }     // vector element type 'H'
+
+// Average
+inline void Assembler::z_vavg(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVG_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vavgb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavg(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vavgh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavg(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vavgf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavg(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vavgg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavg(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+inline void Assembler::z_vavgl(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVGL_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavgl(v1, v2, v3, VRET_BYTE); }   // vector element type 'B'
+inline void Assembler::z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavgl(v1, v2, v3, VRET_HW); }     // vector element type 'H'
+inline void Assembler::z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavgl(v1, v2, v3, VRET_FW); }     // vector element type 'F'
+inline void Assembler::z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavgl(v1, v2, v3, VRET_DW); }     // vector element type 'G'
+
+// VECTOR Galois Field Multiply Sum
+inline void Assembler::z_vgfm(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VGFM_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vgfmb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vgfm(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vgfmh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vgfm(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vgfmf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vgfm(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vgfmg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vgfm(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+inline void Assembler::z_vgfma(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VGFMA_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v3, 16) | vesc_mask(m5, VRET_BYTE, VRET_DW, 20)); }
+inline void Assembler::z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_HW); }   // vector element type 'H'
+inline void Assembler::z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_FW); }   // vector element type 'F'
+inline void Assembler::z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_DW); }   // vector element type 'G'
+
+//---<  Vector Logical Instructions  >---
+
+// AND
+inline void Assembler::z_vn(     VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VN_ZOPC    | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vnc(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VNC_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// XOR
+inline void Assembler::z_vx(     VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VX_ZOPC    | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// NOR
+inline void Assembler::z_vno(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VNO_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// OR
+inline void Assembler::z_vo(     VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VO_ZOPC    | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// Comparison (element-wise)
+inline void Assembler::z_vceq(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCEQ_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
+inline void Assembler::z_vceqb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
+inline void Assembler::z_vceqh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_HW,   VOPRC_CCIGN); } // vector element type 'H', don't set CC
+inline void Assembler::z_vceqf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_FW,   VOPRC_CCIGN); } // vector element type 'F', don't set CC
+inline void Assembler::z_vceqg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_DW,   VOPRC_CCIGN); } // vector element type 'G', don't set CC
+inline void Assembler::z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
+inline void Assembler::z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_HW,   VOPRC_CCSET); } // vector element type 'H', don't set CC
+inline void Assembler::z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_FW,   VOPRC_CCSET); } // vector element type 'F', don't set CC
+inline void Assembler::z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_DW,   VOPRC_CCSET); } // vector element type 'G', don't set CC
+inline void Assembler::z_vch(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCH_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
+inline void Assembler::z_vchb(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_BYTE,  VOPRC_CCIGN); }  // vector element type 'B', don't set CC
+inline void Assembler::z_vchh(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_HW,    VOPRC_CCIGN); }  // vector element type 'H', don't set CC
+inline void Assembler::z_vchf(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_FW,    VOPRC_CCIGN); }  // vector element type 'F', don't set CC
+inline void Assembler::z_vchg(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_DW,    VOPRC_CCIGN); }  // vector element type 'G', don't set CC
+inline void Assembler::z_vchbs(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_BYTE,  VOPRC_CCSET); }  // vector element type 'B', don't set CC
+inline void Assembler::z_vchhs(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_HW,    VOPRC_CCSET); }  // vector element type 'H', don't set CC
+inline void Assembler::z_vchfs(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_FW,    VOPRC_CCSET); }  // vector element type 'F', don't set CC
+inline void Assembler::z_vchgs(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_DW,    VOPRC_CCSET); }  // vector element type 'G', don't set CC
+inline void Assembler::z_vchl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCHL_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
+inline void Assembler::z_vchlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); }  // vector element type 'B', don't set CC
+inline void Assembler::z_vchlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_HW,   VOPRC_CCIGN); }  // vector element type 'H', don't set CC
+inline void Assembler::z_vchlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_FW,   VOPRC_CCIGN); }  // vector element type 'F', don't set CC
+inline void Assembler::z_vchlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_DW,   VOPRC_CCIGN); }  // vector element type 'G', don't set CC
+inline void Assembler::z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); }  // vector element type 'B', don't set CC
+inline void Assembler::z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_HW,   VOPRC_CCSET); }  // vector element type 'H', don't set CC
+inline void Assembler::z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_FW,   VOPRC_CCSET); }  // vector element type 'F', don't set CC
+inline void Assembler::z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_DW,   VOPRC_CCSET); }  // vector element type 'G', don't set CC
+
+// Max/Min (element-wise)
+inline void Assembler::z_vmx(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMX_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmxb(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmx(v1, v2, v3, VRET_BYTE); }     // vector element type 'B'
+inline void Assembler::z_vmxh(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmx(v1, v2, v3, VRET_HW); }       // vector element type 'H'
+inline void Assembler::z_vmxf(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmx(v1, v2, v3, VRET_FW); }       // vector element type 'F'
+inline void Assembler::z_vmxg(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmx(v1, v2, v3, VRET_DW); }       // vector element type 'G'
+inline void Assembler::z_vmxl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMXL_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmxlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmxl(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vmxlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmxl(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vmxlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmxl(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vmxlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmxl(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+inline void Assembler::z_vmn(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMN_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmnb(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmn(v1, v2, v3, VRET_BYTE); }     // vector element type 'B'
+inline void Assembler::z_vmnh(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmn(v1, v2, v3, VRET_HW); }       // vector element type 'H'
+inline void Assembler::z_vmnf(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmn(v1, v2, v3, VRET_FW); }       // vector element type 'F'
+inline void Assembler::z_vmng(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmn(v1, v2, v3, VRET_DW); }       // vector element type 'G'
+inline void Assembler::z_vmnl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMNL_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmnlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmnl(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vmnlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmnl(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vmnlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmnl(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vmnlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmnl(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+
+// Leading/Trailing Zeros, population count
+inline void Assembler::z_vclz(   VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VCLZ_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vclzb(  VectorRegister v1, VectorRegister v2)                                {z_vclz(v1, v2, VRET_BYTE); }        // vector element type 'B'
+inline void Assembler::z_vclzh(  VectorRegister v1, VectorRegister v2)                                {z_vclz(v1, v2, VRET_HW); }          // vector element type 'H'
+inline void Assembler::z_vclzf(  VectorRegister v1, VectorRegister v2)                                {z_vclz(v1, v2, VRET_FW); }          // vector element type 'F'
+inline void Assembler::z_vclzg(  VectorRegister v1, VectorRegister v2)                                {z_vclz(v1, v2, VRET_DW); }          // vector element type 'G'
+inline void Assembler::z_vctz(   VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VCTZ_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vctzb(  VectorRegister v1, VectorRegister v2)                                {z_vctz(v1, v2, VRET_BYTE); }        // vector element type 'B'
+inline void Assembler::z_vctzh(  VectorRegister v1, VectorRegister v2)                                {z_vctz(v1, v2, VRET_HW); }          // vector element type 'H'
+inline void Assembler::z_vctzf(  VectorRegister v1, VectorRegister v2)                                {z_vctz(v1, v2, VRET_FW); }          // vector element type 'F'
+inline void Assembler::z_vctzg(  VectorRegister v1, VectorRegister v2)                                {z_vctz(v1, v2, VRET_DW); }          // vector element type 'G'
+inline void Assembler::z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VPOPCT_ZOPC| vreg(v1,  8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+
+// Rotate/Shift
+inline void Assembler::z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4) {emit_48(VERLLV_ZOPC| vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_verllv(v1, v2, v3, VRET_BYTE); }  // vector element type 'B'
+inline void Assembler::z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_verllv(v1, v2, v3, VRET_HW); }    // vector element type 'H'
+inline void Assembler::z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_verllv(v1, v2, v3, VRET_FW); }    // vector element type 'F'
+inline void Assembler::z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_verllv(v1, v2, v3, VRET_DW); }    // vector element type 'G'
+inline void Assembler::z_verll(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4) {emit_48(VERLL_ZOPC | vreg(v1,  8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_verll(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
+inline void Assembler::z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_verll(v1, v3, d2, b2, VRET_HW);}  // vector element type 'H'
+inline void Assembler::z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_verll(v1, v3, d2, b2, VRET_FW);}  // vector element type 'F'
+inline void Assembler::z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_verll(v1, v3, d2, b2, VRET_DW);}  // vector element type 'G'
+inline void Assembler::z_verim(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5) {emit_48(VERLL_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | uimm8(imm4, 24, 48) | vesc_mask(m5, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_BYTE); }   // vector element type 'B'
+inline void Assembler::z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_HW); }     // vector element type 'H'
+inline void Assembler::z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_FW); }     // vector element type 'F'
+inline void Assembler::z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_DW); }     // vector element type 'G'
+
+inline void Assembler::z_veslv(  VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4) {emit_48(VESLV_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_veslv(v1, v2, v3, VRET_BYTE); }   // vector element type 'B'
+inline void Assembler::z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_veslv(v1, v2, v3, VRET_HW); }     // vector element type 'H'
+inline void Assembler::z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_veslv(v1, v2, v3, VRET_FW); }     // vector element type 'F'
+inline void Assembler::z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_veslv(v1, v2, v3, VRET_DW); }     // vector element type 'G'
+inline void Assembler::z_vesl(   VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4) {emit_48(VESL_ZOPC  | vreg(v1,  8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_veslb(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesl(v1, v3, d2, b2, VRET_BYTE);} // vector element type 'B'
+inline void Assembler::z_veslh(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesl(v1, v3, d2, b2, VRET_HW);}   // vector element type 'H'
+inline void Assembler::z_veslf(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesl(v1, v3, d2, b2, VRET_FW);}   // vector element type 'F'
+inline void Assembler::z_veslg(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesl(v1, v3, d2, b2, VRET_DW);}   // vector element type 'G'
+
+inline void Assembler::z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4) {emit_48(VESRAV_ZOPC| vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrav(v1, v2, v3, VRET_BYTE); }  // vector element type 'B'
+inline void Assembler::z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrav(v1, v2, v3, VRET_HW); }    // vector element type 'H'
+inline void Assembler::z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrav(v1, v2, v3, VRET_FW); }    // vector element type 'F'
+inline void Assembler::z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrav(v1, v2, v3, VRET_DW); }    // vector element type 'G'
+inline void Assembler::z_vesra(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4) {emit_48(VESRA_ZOPC | vreg(v1,  8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesra(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
+inline void Assembler::z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesra(v1, v3, d2, b2, VRET_HW);}  // vector element type 'H'
+inline void Assembler::z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesra(v1, v3, d2, b2, VRET_FW);}  // vector element type 'F'
+inline void Assembler::z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesra(v1, v3, d2, b2, VRET_DW);}  // vector element type 'G'
+inline void Assembler::z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4) {emit_48(VESRLV_ZOPC| vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrlv(v1, v2, v3, VRET_BYTE); }  // vector element type 'B'
+inline void Assembler::z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrlv(v1, v2, v3, VRET_HW); }    // vector element type 'H'
+inline void Assembler::z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrlv(v1, v2, v3, VRET_FW); }    // vector element type 'F'
+inline void Assembler::z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrlv(v1, v2, v3, VRET_DW); }    // vector element type 'G'
+inline void Assembler::z_vesrl(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4) {emit_48(VESRL_ZOPC | vreg(v1,  8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesrl(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
+inline void Assembler::z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesrl(v1, v3, d2, b2, VRET_HW);}  // vector element type 'H'
+inline void Assembler::z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesrl(v1, v3, d2, b2, VRET_FW);}  // vector element type 'F'
+inline void Assembler::z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesrl(v1, v3, d2, b2, VRET_DW);}  // vector element type 'G'
+
+inline void Assembler::z_vsl(    VectorRegister v1, VectorRegister v2, VectorRegister v3)               {emit_48(VSL_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vslb(   VectorRegister v1, VectorRegister v2, VectorRegister v3)               {emit_48(VSLB_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsldb(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {emit_48(VSLDB_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48)); }
+
+inline void Assembler::z_vsra(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VSRA_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsrab(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VSRAB_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsrl(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VSRL_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsrlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VSRLB_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// Test under Mask
+inline void Assembler::z_vtm(    VectorRegister v1, VectorRegister v2)                                {emit_48(VTM_ZOPC   | vreg(v1,  8) | vreg(v2, 12)); }
+
+//---<  Vector String Instructions  >---
+inline void Assembler::z_vfae(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFAE_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); }  // Find any element
+inline void Assembler::z_vfaeb(  VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t cc5) {z_vfae(v1, v2, v3, VRET_BYTE, cc5); }
+inline void Assembler::z_vfaeh(  VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t cc5) {z_vfae(v1, v2, v3, VRET_HW,   cc5); }
+inline void Assembler::z_vfaef(  VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t cc5) {z_vfae(v1, v2, v3, VRET_FW,   cc5); }
+inline void Assembler::z_vfee(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFEE_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); }  // Find element equal
+inline void Assembler::z_vfeeb(  VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t cc5) {z_vfee(v1, v2, v3, VRET_BYTE, cc5); }
+inline void Assembler::z_vfeeh(  VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t cc5) {z_vfee(v1, v2, v3, VRET_HW,   cc5); }
+inline void Assembler::z_vfeef(  VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t cc5) {z_vfee(v1, v2, v3, VRET_FW,   cc5); }
+inline void Assembler::z_vfene(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFENE_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); }  // Find element not equal
+inline void Assembler::z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t cc5) {z_vfene(v1, v2, v3, VRET_BYTE, cc5); }
+inline void Assembler::z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t cc5) {z_vfene(v1, v2, v3, VRET_HW,   cc5); }
+inline void Assembler::z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t cc5) {z_vfene(v1, v2, v3, VRET_FW,   cc5); }
+inline void Assembler::z_vstrc(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6) {emit_48(VSTRC_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)     | vesc_mask(imm5, VRET_BYTE, VRET_FW, 20) | voprc_any(cc6, 24) ); }  // String range compare
+inline void Assembler::z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4,               int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_BYTE, cc6); }
+inline void Assembler::z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4,               int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_HW,   cc6); }
+inline void Assembler::z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4,               int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_FW,   cc6); }
+inline void Assembler::z_vistr(  VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5) {emit_48(VISTR_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vesc_mask(imm3, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); }  // isolate string
+inline void Assembler::z_vistrb( VectorRegister v1, VectorRegister v2,               int64_t cc5) {z_vistr(v1, v2, VRET_BYTE, cc5); }
+inline void Assembler::z_vistrh( VectorRegister v1, VectorRegister v2,               int64_t cc5) {z_vistr(v1, v2, VRET_HW,   cc5); }
+inline void Assembler::z_vistrf( VectorRegister v1, VectorRegister v2,               int64_t cc5) {z_vistr(v1, v2, VRET_FW,   cc5); }
+inline void Assembler::z_vistrbs(VectorRegister v1, VectorRegister v2)                            {z_vistr(v1, v2, VRET_BYTE, VOPRC_CCSET); }
+inline void Assembler::z_vistrhs(VectorRegister v1, VectorRegister v2)                            {z_vistr(v1, v2, VRET_HW,   VOPRC_CCSET); }
+inline void Assembler::z_vistrfs(VectorRegister v1, VectorRegister v2)                            {z_vistr(v1, v2, VRET_FW,   VOPRC_CCSET); }
+
+
 //-------------------------------
 // FLOAT INSTRUCTIONS
 //-------------------------------

--- a/src/hotspot/cpu/s390/globals_s390.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/globals_s390.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
 // Sorted according to sparc.
 
 // z/Architecture remembers branch targets, so don't share vtables.
-define_pd_global(bool,  ShareVtableStubs,            false);
+define_pd_global(bool,  ShareVtableStubs,            true);
 define_pd_global(bool,  NeedsDeoptSuspend,           false); // Only register window machines need this.
 
 define_pd_global(bool,  ImplicitNullChecks,          true);  // Generate code for implicit null checks.

--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -4671,6 +4671,7 @@
   mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
   mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
   mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
+  resolve_oop_handle(mirror);
 }
 
 //---------------------------------------------------------------

--- a/src/hotspot/cpu/s390/register_definitions_s390.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/register_definitions_s390.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,3 +35,5 @@
 REGISTER_DEFINITION(Register, noreg);
 
 REGISTER_DEFINITION(FloatRegister, fnoreg);
+
+REGISTER_DEFINITION(VectorRegister, vnoreg);

--- a/src/hotspot/cpu/s390/register_s390.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/register_s390.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,3 +46,13 @@
   };
   return is_valid() ? names[encoding()] : "fnoreg";
 }
+
+const char* VectorRegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "Z_V0",  "Z_V1",  "Z_V2",  "Z_V3",  "Z_V4",  "Z_V5",  "Z_V6",  "Z_V7",
+    "Z_V8",  "Z_V9",  "Z_V10", "Z_V11", "Z_V12", "Z_V13", "Z_V14", "Z_V15",
+    "Z_V16", "Z_V17", "Z_V18", "Z_V19", "Z_V20", "Z_V21", "Z_V22", "Z_V23",
+    "Z_V24", "Z_V25", "Z_V26", "Z_V27", "Z_V28", "Z_V29", "Z_V30", "Z_V31"
+  };
+  return is_valid() ? names[encoding()] : "fnoreg";
+}

--- a/src/hotspot/cpu/s390/register_s390.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/register_s390.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,11 +34,6 @@
 
 typedef VMRegImpl* VMReg;
 
-// Use Register as shortcut.
-class RegisterImpl;
-typedef RegisterImpl* Register;
-
-// The implementation of integer registers for z/Architecture.
 
 // z/Architecture registers, see "LINUX for zSeries ELF ABI Supplement", IBM March 2001
 //
@@ -57,6 +52,17 @@
 //   f1,f3,f5,f7 General purpose (volatile)
 //   f8-f15      General purpose (nonvolatile)
 
+
+//===========================
+//===  Integer Registers  ===
+//===========================
+
+// Use Register as shortcut.
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+// The implementation of integer registers for z/Architecture.
+
 inline Register as_Register(int encoding) {
   return (Register)(long)encoding;
 }
@@ -110,6 +116,11 @@
 CONSTANT_REGISTER_DECLARATION(Register, Z_R14, (14));
 CONSTANT_REGISTER_DECLARATION(Register, Z_R15, (15));
 
+
+//=============================
+//===  Condition Registers  ===
+//=============================
+
 // Use ConditionRegister as shortcut
 class ConditionRegisterImpl;
 typedef ConditionRegisterImpl* ConditionRegister;
@@ -159,7 +170,7 @@
 // dangers of defines.
 // If a particular file has a problem with these defines then it's possible
 // to turn them off in that file by defining
-// DONT_USE_REGISTER_DEFINES. Register_definition_s390.cpp does that
+// DONT_USE_REGISTER_DEFINES. Register_definitions_s390.cpp does that
 // so that it's able to provide real definitions of these registers
 // for use in debuggers and such.
 
@@ -186,6 +197,11 @@
 #define Z_CR ((ConditionRegister)(Z_CR_ConditionRegisterEnumValue))
 #endif // DONT_USE_REGISTER_DEFINES
 
+
+//=========================
+//===  Float Registers  ===
+//=========================
+
 // Use FloatRegister as shortcut
 class FloatRegisterImpl;
 typedef FloatRegisterImpl* FloatRegister;
@@ -263,22 +279,6 @@
 #define Z_F15 ((FloatRegister)(  Z_F15_FloatRegisterEnumValue))
 #endif // DONT_USE_REGISTER_DEFINES
 
-// Need to know the total number of registers of all sorts for SharedInfo.
-// Define a class that exports it.
-
-class ConcreteRegisterImpl : public AbstractRegisterImpl {
- public:
-  enum {
-    number_of_registers =
-      (RegisterImpl::number_of_registers +
-      FloatRegisterImpl::number_of_registers)
-      * 2 // register halves
-      + 1 // condition code register
-  };
-  static const int max_gpr;
-  static const int max_fpr;
-};
-
 // Single, Double and Quad fp reg classes. These exist to map the ADLC
 // encoding for a floating point register, to the FloatRegister number
 // desired by the macroassembler. A FloatRegister is a number between
@@ -329,6 +329,161 @@
 };
 
 
+//==========================
+//===  Vector Registers  ===
+//==========================
+
+// Use VectorRegister as shortcut
+class VectorRegisterImpl;
+typedef VectorRegisterImpl* VectorRegister;
+
+// The implementation of vector registers for z/Architecture.
+
+inline VectorRegister as_VectorRegister(int encoding) {
+  return (VectorRegister)(long)encoding;
+}
+
+class VectorRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers     = 32,
+    number_of_arg_registers = 0
+  };
+
+  // construction
+  inline friend VectorRegister as_VectorRegister(int encoding);
+
+  inline VMReg as_VMReg();
+
+  // accessors
+  int encoding() const                                {
+     assert(is_valid(), "invalid register"); return value();
+  }
+
+  bool is_valid() const           { return  0 <= value() && value() < number_of_registers; }
+  bool is_volatile() const        { return true; }
+  bool is_nonvolatile() const     { return false; }
+
+  // Register fields in z/Architecture instructions are 4 bits wide, restricting the
+  // addressable register set size to 16.
+  // The vector register set size is 32, requiring an extension, by one bit, of the
+  // register encoding. This is accomplished by the introduction of a RXB field in the
+  // instruction. RXB = Register eXtension Bits.
+  // The RXB field contains the MSBs (most significant bit) of the vector register numbers
+  // used for this instruction. Assignment of MSB in RBX is by bit position of the
+  // register field in the instruction.
+  // Example:
+  //   The register field starting at bit position 12 in the instruction is assigned RXB bit 0b0100.
+  int64_t RXB_mask(int pos) {
+    if (encoding() >= number_of_registers/2) {
+      switch (pos) {
+        case 8:   return ((int64_t)0b1000) << 8; // actual bit pos: 36
+        case 12:  return ((int64_t)0b0100) << 8; // actual bit pos: 37
+        case 16:  return ((int64_t)0b0010) << 8; // actual bit pos: 38
+        case 32:  return ((int64_t)0b0001) << 8; // actual bit pos: 39
+        default:
+          ShouldNotReachHere();
+      }
+    }
+    return 0;
+  }
+
+  const char* name() const;
+
+  VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
+};
+
+// The Vector registers of z/Architecture.
+
+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg, (-1));
+
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V0,  (0));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V1,  (1));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V2,  (2));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V3,  (3));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V4,  (4));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V5,  (5));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V6,  (6));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V7,  (7));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V8,  (8));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V9,  (9));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V10, (10));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V11, (11));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V12, (12));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V13, (13));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V14, (14));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V15, (15));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V16, (16));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V17, (17));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V18, (18));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V19, (19));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V20, (20));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V21, (21));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V22, (22));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V23, (23));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V24, (24));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V25, (25));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V26, (26));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V27, (27));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V28, (28));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V29, (29));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V30, (30));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V31, (31));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define vnoreg ((VectorRegister)(vnoreg_VectorRegisterEnumValue))
+#define Z_V0  ((VectorRegister)(   Z_V0_VectorRegisterEnumValue))
+#define Z_V1  ((VectorRegister)(   Z_V1_VectorRegisterEnumValue))
+#define Z_V2  ((VectorRegister)(   Z_V2_VectorRegisterEnumValue))
+#define Z_V3  ((VectorRegister)(   Z_V3_VectorRegisterEnumValue))
+#define Z_V4  ((VectorRegister)(   Z_V4_VectorRegisterEnumValue))
+#define Z_V5  ((VectorRegister)(   Z_V5_VectorRegisterEnumValue))
+#define Z_V6  ((VectorRegister)(   Z_V6_VectorRegisterEnumValue))
+#define Z_V7  ((VectorRegister)(   Z_V7_VectorRegisterEnumValue))
+#define Z_V8  ((VectorRegister)(   Z_V8_VectorRegisterEnumValue))
+#define Z_V9  ((VectorRegister)(   Z_V9_VectorRegisterEnumValue))
+#define Z_V10 ((VectorRegister)(  Z_V10_VectorRegisterEnumValue))
+#define Z_V11 ((VectorRegister)(  Z_V11_VectorRegisterEnumValue))
+#define Z_V12 ((VectorRegister)(  Z_V12_VectorRegisterEnumValue))
+#define Z_V13 ((VectorRegister)(  Z_V13_VectorRegisterEnumValue))
+#define Z_V14 ((VectorRegister)(  Z_V14_VectorRegisterEnumValue))
+#define Z_V15 ((VectorRegister)(  Z_V15_VectorRegisterEnumValue))
+#define Z_V16 ((VectorRegister)(  Z_V16_VectorRegisterEnumValue))
+#define Z_V17 ((VectorRegister)(  Z_V17_VectorRegisterEnumValue))
+#define Z_V18 ((VectorRegister)(  Z_V18_VectorRegisterEnumValue))
+#define Z_V19 ((VectorRegister)(  Z_V19_VectorRegisterEnumValue))
+#define Z_V20 ((VectorRegister)(  Z_V20_VectorRegisterEnumValue))
+#define Z_V21 ((VectorRegister)(  Z_V21_VectorRegisterEnumValue))
+#define Z_V22 ((VectorRegister)(  Z_V22_VectorRegisterEnumValue))
+#define Z_V23 ((VectorRegister)(  Z_V23_VectorRegisterEnumValue))
+#define Z_V24 ((VectorRegister)(  Z_V24_VectorRegisterEnumValue))
+#define Z_V25 ((VectorRegister)(  Z_V25_VectorRegisterEnumValue))
+#define Z_V26 ((VectorRegister)(  Z_V26_VectorRegisterEnumValue))
+#define Z_V27 ((VectorRegister)(  Z_V27_VectorRegisterEnumValue))
+#define Z_V28 ((VectorRegister)(  Z_V28_VectorRegisterEnumValue))
+#define Z_V29 ((VectorRegister)(  Z_V29_VectorRegisterEnumValue))
+#define Z_V30 ((VectorRegister)(  Z_V30_VectorRegisterEnumValue))
+#define Z_V31 ((VectorRegister)(  Z_V31_VectorRegisterEnumValue))
+#endif // DONT_USE_REGISTER_DEFINES
+
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers =
+      (RegisterImpl::number_of_registers +
+      FloatRegisterImpl::number_of_registers)
+      * 2 // register halves
+      + 1 // condition code register
+  };
+  static const int max_gpr;
+  static const int max_fpr;
+};
+
+
 // Common register declarations used in assembler code.
 REGISTER_DECLARATION(Register,      Z_EXC_OOP, Z_R2);
 REGISTER_DECLARATION(Register,      Z_EXC_PC,  Z_R3);

--- a/src/hotspot/cpu/s390/s390.ad	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/s390.ad	Tue Oct 10 16:29:04 2017 +0200
@@ -3149,7 +3149,7 @@
   interface(REG_INTER);
 %}
 
-// Revenregi and roddRegI constitute and even-odd-pair.
+// revenRegI and roddRegI constitute and even-odd-pair.
 operand revenRegI() %{
   constraint(ALLOC_IN_RC(z_rarg3_int_reg));
   match(iRegI);
@@ -3157,7 +3157,7 @@
   interface(REG_INTER);
 %}
 
-// Revenregi and roddRegI constitute and even-odd-pair.
+// revenRegI and roddRegI constitute and even-odd-pair.
 operand roddRegI() %{
   constraint(ALLOC_IN_RC(z_rarg4_int_reg));
   match(iRegI);
@@ -3283,7 +3283,7 @@
   interface(REG_INTER);
 %}
 
-// Revenregp and roddRegP constitute and even-odd-pair.
+// revenRegP and roddRegP constitute and even-odd-pair.
 operand revenRegP() %{
   constraint(ALLOC_IN_RC(z_rarg3_ptr_reg));
   match(iRegP);
@@ -3291,7 +3291,7 @@
   interface(REG_INTER);
 %}
 
-// Revenregl and roddRegL constitute and even-odd-pair.
+// revenRegP and roddRegP constitute and even-odd-pair.
 operand roddRegP() %{
   constraint(ALLOC_IN_RC(z_rarg4_ptr_reg));
   match(iRegP);
@@ -3380,7 +3380,7 @@
   interface(REG_INTER);
 %}
 
-// Revenregl and roddRegL constitute and even-odd-pair.
+// revenRegL and roddRegL constitute and even-odd-pair.
 operand revenRegL() %{
   constraint(ALLOC_IN_RC(z_rarg3_long_reg));
   match(iRegL);
@@ -3388,7 +3388,7 @@
   interface(REG_INTER);
 %}
 
-// Revenregl and roddRegL constitute and even-odd-pair.
+// revenRegL and roddRegL constitute and even-odd-pair.
 operand roddRegL() %{
   constraint(ALLOC_IN_RC(z_rarg4_long_reg));
   match(iRegL);
@@ -6443,6 +6443,32 @@
   ins_pipe(pipe_class_dummy);
 %}
 
+instruct mulHiL_reg_reg(revenRegL Rdst, roddRegL Rsrc1, iRegL Rsrc2, iRegL Rtmp1, flagsReg cr)%{
+  match(Set Rdst (MulHiL Rsrc1 Rsrc2));
+  effect(TEMP_DEF Rdst, USE_KILL Rsrc1, TEMP Rtmp1, KILL cr);
+  ins_cost(7*DEFAULT_COST);
+  // TODO: s390 port size(VARIABLE_SIZE);
+  format %{ "MulHiL  $Rdst, $Rsrc1, $Rsrc2\t # Multiply High Long" %}
+  ins_encode%{
+    Register dst  = $Rdst$$Register;
+    Register src1 = $Rsrc1$$Register;
+    Register src2 = $Rsrc2$$Register;
+    Register tmp1 = $Rtmp1$$Register;
+    Register tmp2 = $Rdst$$Register;
+    // z/Architecture has only unsigned multiply (64 * 64 -> 128).
+    // implementing mulhs(a,b) = mulhu(a,b) – (a & (b>>63)) – (b & (a>>63))
+    __ z_srag(tmp2, src1, 63);  // a>>63
+    __ z_srag(tmp1, src2, 63);  // b>>63
+    __ z_ngr(tmp2, src2);       // b & (a>>63)
+    __ z_ngr(tmp1, src1);       // a & (b>>63)
+    __ z_agr(tmp1, tmp2);       // ((a & (b>>63)) + (b & (a>>63)))
+    __ z_mlgr(dst, src2);       // tricky: 128-bit product is written to even/odd pair (dst,src1),
+                                //         multiplicand is taken from oddReg (src1), multiplier in src2.
+    __ z_sgr(dst, tmp1);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
 //  DIV
 
 // Integer DIVMOD with Register, both quotient and mod results

--- a/src/hotspot/cpu/s390/templateTable_s390.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/templateTable_s390.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -2382,6 +2382,7 @@
   if (is_static) {
     __ mem2reg_opt(obj, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
     __ mem2reg_opt(obj, Address(obj, Klass::java_mirror_offset()));
+    __ resolve_oop_handle(obj);
   }
 }

--- a/src/hotspot/cpu/s390/vm_version_s390.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/vm_version_s390.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -706,12 +706,13 @@
   Label    getCPUFEATURES;                   // fcode = -1 (cache)
   Label    getCIPHERFEATURES;                // fcode = -2 (cipher)
   Label    getMSGDIGESTFEATURES;             // fcode = -3 (SHA)
+  Label    getVECTORFEATURES;                // fcode = -4 (OS support for vector instructions)
   Label    checkLongDispFast;
   Label    noLongDisp;
   Label    posDisp, negDisp;
   Label    errRTN;
   a->z_ltgfr(Z_R0, Z_ARG2);                  // Buf len to r0 and test.
-  a->z_brl(getFEATURES);                     // negative -> Get machine features.
+  a->z_brl(getFEATURES);                     // negative -> Get machine features not covered by facility list.
   a->z_brz(checkLongDispFast);               // zero -> Check for high-speed Long Displacement Facility.
   a->z_aghi(Z_R0, -1);
   a->z_stfle(0, Z_ARG1);
@@ -736,6 +737,8 @@
   a->z_bre(getCIPHERFEATURES);
   a->z_cghi(Z_R0, -3);                       // -3: Extract detailed crypto capabilities (msg digest instructions).
   a->z_bre(getMSGDIGESTFEATURES);
+  a->z_cghi(Z_R0, -4);                       // -4: Verify vector instruction availability (OS support).
+  a->z_bre(getVECTORFEATURES);
 
   a->z_xgr(Z_RET, Z_RET);                    // Not a valid function code.
   a->z_br(Z_R14);                            // Return "operation aborted".
@@ -766,6 +769,11 @@
   a->z_ecag(Z_RET,Z_R0,0,Z_ARG3);            // Extract information as requested by Z_ARG1 contents.
   a->z_br(Z_R14);
 
+  // Use a vector instruction to verify OS support. Will fail with SIGFPE if OS support is missing.
+  a->bind(getVECTORFEATURES);
+  a->z_vtm(Z_V0,Z_V0);                       // non-destructive vector instruction. Will cause SIGFPE if not supported.
+  a->z_br(Z_R14);
+
   // Check the performance of the Long Displacement Facility, i.e. find out if we are running on z900 or newer.
   a->bind(checkLongDispFast);
   a->z_llill(Z_R0, 0xffff);                  // preset #iterations
@@ -962,6 +970,19 @@
     _nfeatures = 0;
   }
 
+  if (has_VectorFacility()) {
+    // Verify that feature can actually be used. OS support required.
+    call_getFeatures(buffer, -4, 0);
+    if (printVerbose) {
+      ttyLocker ttyl;
+      if (has_VectorFacility()) {
+        tty->print_cr("  Vector Facility has been verified to be supported by OS");
+      } else {
+        tty->print_cr("  Vector Facility has been disabled - not supported by OS");
+      }
+    }
+  }
+
   // Extract Crypto Facility details.
   if (has_Crypto()) {
     // Get cipher features.

--- a/src/hotspot/cpu/s390/vm_version_s390.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/s390/vm_version_s390.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -473,6 +473,8 @@
   static void set_has_CryptoExt5()                { _features[0] |= CryptoExtension5Mask; }
   static void set_has_VectorFacility()            { _features[2] |= VectorFacilityMask; }
 
+  static void reset_has_VectorFacility()          { _features[2] &= ~VectorFacilityMask; }
+
   // Assembler testing.
   static void allow_all();
   static void revert();

--- a/src/hotspot/cpu/sparc/assembler_sparc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/assembler_sparc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -122,6 +122,7 @@
     fpop1_op3    = 0x34,
     fpop2_op3    = 0x35,
     impdep1_op3  = 0x36,
+    addx_op3     = 0x36,
     aes3_op3     = 0x36,
     sha_op3      = 0x36,
     bmask_op3    = 0x36,
@@ -133,6 +134,8 @@
     fzero_op3    = 0x36,
     fsrc_op3     = 0x36,
     fnot_op3     = 0x36,
+    mpmul_op3    = 0x36,
+    umulx_op3    = 0x36,
     xmulx_op3    = 0x36,
     crc32c_op3   = 0x36,
     impdep2_op3  = 0x37,
@@ -195,6 +198,9 @@
     fnegs_opf          = 0x05,
     fnegd_opf          = 0x06,
 
+    addxc_opf          = 0x11,
+    addxccc_opf        = 0x13,
+    umulxhi_opf        = 0x16,
     alignaddr_opf      = 0x18,
     bmask_opf          = 0x19,
 
@@ -240,7 +246,8 @@
     sha256_opf         = 0x142,
     sha512_opf         = 0x143,
 
-    crc32c_opf         = 0x147
+    crc32c_opf         = 0x147,
+    mpmul_opf          = 0x148
   };
 
   enum op5s {
@@ -380,7 +387,7 @@
     assert_signed_range(x, nbits + 2);
   }
 
-  static void assert_unsigned_const(int x, int nbits) {
+  static void assert_unsigned_range(int x, int nbits) {
     assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
   }
 
@@ -534,6 +541,12 @@
     return x & ((1 << nbits) - 1);
   }
 
+  // unsigned immediate, in low bits, at most nbits long.
+  static int uimm(int x, int nbits) {
+    assert_unsigned_range(x, nbits);
+    return x & ((1 << nbits) - 1);
+  }
+
   // compute inverse of wdisp16
   static intptr_t inv_wdisp16(int x, intptr_t pos) {
     int lo = x & ((1 << 14) - 1);
@@ -631,6 +644,9 @@
   // FMAf instructions supported only on certain processors
   static void fmaf_only() { assert(VM_Version::has_fmaf(), "This instruction only works on SPARC with FMAf"); }
 
+  // MPMUL instruction supported only on certain processors
+  static void mpmul_only() { assert(VM_Version::has_mpmul(), "This instruction only works on SPARC with MPMUL"); }
+
   // instruction only in VIS1
   static void vis1_only() { assert(VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
 
@@ -772,11 +788,12 @@
     AbstractAssembler::flush();
   }
 
-  inline void emit_int32(int);  // shadows AbstractAssembler::emit_int32
-  inline void emit_data(int);
-  inline void emit_data(int, RelocationHolder const &rspec);
-  inline void emit_data(int, relocInfo::relocType rtype);
-  // helper for above functions
+  inline void emit_int32(int32_t);  // shadows AbstractAssembler::emit_int32
+  inline void emit_data(int32_t);
+  inline void emit_data(int32_t, RelocationHolder const&);
+  inline void emit_data(int32_t, relocInfo::relocType rtype);
+
+  // Helper for the above functions.
   inline void check_delay();
 
 
@@ -929,6 +946,10 @@
   // fmaf instructions.
 
   inline void fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+  inline void fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+
+  inline void fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+  inline void fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
 
   // pp 165
 
@@ -960,6 +981,8 @@
   inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d,
                   RelocationHolder const &rspec = RelocationHolder());
 
+  inline void ldd(Register s1, Register s2, FloatRegister d);
+  inline void ldd(Register s1, int simm13a, FloatRegister d);
 
   inline void ldfsr(Register s1, Register s2);
   inline void ldfsr(Register s1, int simm13a);
@@ -987,8 +1010,6 @@
   inline void lduw(Register s1, int simm13a, Register d);
   inline void ldx(Register s1, Register s2, Register d);
   inline void ldx(Register s1, int simm13a, Register d);
-  inline void ldd(Register s1, Register s2, Register d);
-  inline void ldd(Register s1, int simm13a, Register d);
 
   // pp 177
 
@@ -1157,6 +1178,9 @@
   inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2);
   inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a);
 
+  inline void std(FloatRegister d, Register s1, Register s2);
+  inline void std(FloatRegister d, Register s1, int simm13a);
+
   inline void stfsr(Register s1, Register s2);
   inline void stfsr(Register s1, int simm13a);
   inline void stxfsr(Register s1, Register s2);
@@ -1177,8 +1201,6 @@
   inline void stw(Register d, Register s1, int simm13a);
   inline void stx(Register d, Register s1, Register s2);
   inline void stx(Register d, Register s1, int simm13a);
-  inline void std(Register d, Register s1, Register s2);
-  inline void std(Register d, Register s1, int simm13a);
 
   // pp 177
 
@@ -1267,6 +1289,9 @@
 
   // VIS3 instructions
 
+  inline void addxc(Register s1, Register s2, Register d);
+  inline void addxccc(Register s1, Register s2, Register d);
+
   inline void movstosw(FloatRegister s, Register d);
   inline void movstouw(FloatRegister s, Register d);
   inline void movdtox(FloatRegister s, Register d);
@@ -1276,6 +1301,7 @@
 
   inline void xmulx(Register s1, Register s2, Register d);
   inline void xmulxhi(Register s1, Register s2, Register d);
+  inline void umulxhi(Register s1, Register s2, Register d);
 
   // Crypto SHA instructions
 
@@ -1287,6 +1313,10 @@
 
   inline void crc32c(FloatRegister s1, FloatRegister s2, FloatRegister d);
 
+  // MPMUL instruction
+
+  inline void mpmul(int uimm5);
+
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 #ifdef VALIDATE_PIPELINE

--- a/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -59,7 +59,7 @@
 #endif
 }
 
-inline void Assembler::emit_int32(int x) {
+inline void Assembler::emit_int32(int32_t x) {
   check_delay();
 #ifdef VALIDATE_PIPELINE
   _hazard_state = NoHazard;
@@ -67,16 +67,16 @@
   AbstractAssembler::emit_int32(x);
 }
 
-inline void Assembler::emit_data(int x) {
+inline void Assembler::emit_data(int32_t x) {
   emit_int32(x);
 }
 
-inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
+inline void Assembler::emit_data(int32_t x, relocInfo::relocType rtype) {
   relocate(rtype);
   emit_int32(x);
 }
 
-inline void Assembler::emit_data(int x, RelocationHolder const &rspec) {
+inline void Assembler::emit_data(int32_t x, RelocationHolder const &rspec) {
   relocate(rspec);
   emit_int32(x);
 }
@@ -359,6 +359,19 @@
   fmaf_only();
   emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(w) | fs2(s2, w));
 }
+inline void Assembler::fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+  fmaf_only();
+  emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x4 + w) | fs2(s2, w));
+}
+
+inline void Assembler::fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+  fmaf_only();
+  emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0xc + w) | fs2(s2, w));
+}
+inline void Assembler::fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+  fmaf_only();
+  emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x8 + w) | fs2(s2, w));
+}
 
 inline void Assembler::flush(Register s1, Register s2) {
   emit_int32(op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2));
@@ -402,6 +415,15 @@
   emit_data(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);
 }
 
+inline void Assembler::ldd(Register s1, Register s2, FloatRegister d) {
+  assert(d->is_even(), "not even");
+  ldf(FloatRegisterImpl::D, s1, s2, d);
+}
+inline void Assembler::ldd(Register s1, int simm13a, FloatRegister d) {
+  assert(d->is_even(), "not even");
+  ldf(FloatRegisterImpl::D, s1, simm13a, d);
+}
+
 inline void Assembler::ldxfsr(Register s1, Register s2) {
   emit_int32(op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2));
 }
@@ -460,16 +482,6 @@
 inline void Assembler::ldx(Register s1, int simm13a, Register d) {
   emit_data(op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
 }
-inline void Assembler::ldd(Register s1, Register s2, Register d) {
-  v9_dep();
-  assert(d->is_even(), "not even");
-  emit_int32(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2));
-}
-inline void Assembler::ldd(Register s1, int simm13a, Register d) {
-  v9_dep();
-  assert(d->is_even(), "not even");
-  emit_data(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
-}
 
 inline void Assembler::ldsba(Register s1, Register s2, int ia, Register d) {
   emit_int32(op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@@ -806,6 +818,15 @@
   emit_data(op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13));
 }
 
+inline void Assembler::std(FloatRegister d, Register s1, Register s2) {
+  assert(d->is_even(), "not even");
+  stf(FloatRegisterImpl::D, d, s1, s2);
+}
+inline void Assembler::std(FloatRegister d, Register s1, int simm13a) {
+  assert(d->is_even(), "not even");
+  stf(FloatRegisterImpl::D, d, s1, simm13a);
+}
+
 inline void Assembler::stxfsr(Register s1, Register s2) {
   emit_int32(op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2));
 }
@@ -848,16 +869,6 @@
 inline void Assembler::stx(Register d, Register s1, int simm13a) {
   emit_data(op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
 }
-inline void Assembler::std(Register d, Register s1, Register s2) {
-  v9_dep();
-  assert(d->is_even(), "not even");
-  emit_int32(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2));
-}
-inline void Assembler::std(Register d, Register s1, int simm13a) {
-  v9_dep();
-  assert(d->is_even(), "not even");
-  emit_data(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
-}
 
 inline void Assembler::stba(Register d, Register s1, Register s2, int ia) {
   emit_int32(op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@@ -1043,6 +1054,15 @@
 
 // VIS3 instructions
 
+inline void Assembler::addxc(Register s1, Register s2, Register d) {
+  vis3_only();
+  emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxc_opf) | rs2(s2));
+}
+inline void Assembler::addxccc(Register s1, Register s2, Register d) {
+  vis3_only();
+  emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxccc_opf) | rs2(s2));
+}
+
 inline void Assembler::movstosw(FloatRegister s, Register d) {
   vis3_only();
   emit_int32(op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S));
@@ -1073,6 +1093,10 @@
   vis3_only();
   emit_int32(op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2));
 }
+inline void Assembler::umulxhi(Register s1, Register s2, Register d) {
+  vis3_only();
+  emit_int32(op(arith_op) | rd(d) | op3(umulx_op3) | rs1(s1) | opf(umulxhi_opf) | rs2(s2));
+}
 
 // Crypto SHA instructions
 
@@ -1096,4 +1120,11 @@
   emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D));
 }
 
+// MPMUL instruction
+
+inline void Assembler::mpmul(int uimm5) {
+  mpmul_only();
+  emit_int32(op(arith_op) | rd(0) | op3(mpmul_op3) | rs1(0) | opf(mpmul_opf) | uimm(uimm5, 5));
+}
+
 #endif // CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP

--- a/src/hotspot/cpu/sparc/frame_sparc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/frame_sparc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -119,8 +119,8 @@
     reg = regname->as_Register();
   }
   if (reg->is_out()) {
-    assert(_younger_window != NULL, "Younger window should be available");
-    return second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
+    return _younger_window == NULL ? NULL :
+      second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
   }
   if (reg->is_local() || reg->is_in()) {
     assert(_window != NULL, "Window should be available");

--- a/src/hotspot/cpu/sparc/globals_sparc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/globals_sparc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -97,12 +97,15 @@
                    writeable) \
                                                                             \
   product(intx, UseVIS, 99,                                                 \
-          "Highest supported VIS instructions set on Sparc")                \
+          "Highest supported VIS instructions set on SPARC")                \
           range(0, 99)                                                      \
                                                                             \
   product(bool, UseCBCond, false,                                           \
           "Use compare and branch instruction on SPARC")                    \
                                                                             \
+  product(bool, UseMPMUL, false,                                            \
+          "Use multi-precision multiply instruction (mpmul) on SPARC")      \
+                                                                            \
   product(bool, UseBlockZeroing, false,                                     \
           "Use special cpu instructions for block zeroing")                 \
                                                                             \

--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1574,29 +1574,39 @@
   assert_not_delayed();
   if (use_cbcond(L)) {
     Assembler::cbcond(zero, ptr_cc, s1, 0, L);
-    return;
+  } else {
+    br_null(s1, false, p, L);
+    delayed()->nop();
   }
-  br_null(s1, false, p, L);
-  delayed()->nop();
 }
 
 void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
   assert_not_delayed();
   if (use_cbcond(L)) {
     Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
-    return;
+  } else {
+    br_notnull(s1, false, p, L);
+    delayed()->nop();
   }
-  br_notnull(s1, false, p, L);
-  delayed()->nop();
 }
 
 // Unconditional short branch
 void MacroAssembler::ba_short(Label& L) {
+  assert_not_delayed();
   if (use_cbcond(L)) {
     Assembler::cbcond(equal, icc, G0, G0, L);
-    return;
+  } else {
+    br(always, false, pt, L);
+    delayed()->nop();
   }
-  br(always, false, pt, L);
+}
+
+// Branch if 'icc' says zero or not (i.e. icc.z == 1|0).
+
+void MacroAssembler::br_icc_zero(bool iszero, Predict p, Label &L) {
+  assert_not_delayed();
+  Condition cf = (iszero ? Assembler::zero : Assembler::notZero);
+  br(cf, false, p, L);
   delayed()->nop();
 }
 
@@ -3834,6 +3844,7 @@
   ld_ptr(mirror, in_bytes(ConstMethod::constants_offset()), mirror);
   ld_ptr(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
   ld_ptr(mirror, mirror_offset, mirror);
+  resolve_oop_handle(mirror);
 }
 
 void MacroAssembler::load_klass(Register src_oop, Register klass) {

--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -606,7 +606,7 @@
   // offset.  No explicit code generation is needed if the offset is within a certain
   // range (0 <= offset <= page_size).
   //
-  // %%%%%% Currently not done for SPARC
+  // FIXME: Currently not done for SPARC
 
   void null_check(Register reg, int offset = -1);
   static bool needs_explicit_null_check(intptr_t offset);
@@ -648,6 +648,9 @@
   // unconditional short branch
   void ba_short(Label& L);
 
+  // Branch on icc.z (true or not).
+  void br_icc_zero(bool iszero, Predict p, Label &L);
+
   inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
   inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
 
@@ -663,19 +666,19 @@
   inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );
 
   // Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual)
-  inline void cmp(  Register s1, Register s2 );
-  inline void cmp(  Register s1, int simm13a );
+  inline void cmp( Register s1, Register s2 );
+  inline void cmp( Register s1, int simm13a );
 
   inline void jmp( Register s1, Register s2 );
   inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
 
   // Check if the call target is out of wdisp30 range (relative to the code cache)
   static inline bool is_far_target(address d);
-  inline void call( address d,  relocInfo::relocType rt = relocInfo::runtime_call_type );
-  inline void call( address d,  RelocationHolder const& rspec);
+  inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type );
+  inline void call( address d, RelocationHolder const& rspec);
 
-  inline void call( Label& L,   relocInfo::relocType rt = relocInfo::runtime_call_type );
-  inline void call( Label& L,  RelocationHolder const& rspec);
+  inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type );
+  inline void call( Label& L, RelocationHolder const& rspec);
 
   inline void callr( Register s1, Register s2 );
   inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );

--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -185,7 +185,7 @@
 }
 
 inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
-  // See note[+] on 'avoid_pipeline_stalls()', in "assembler_sparc.inline.hpp".
+  // See note[+] on 'avoid_pipeline_stall()', in "assembler_sparc.inline.hpp".
   avoid_pipeline_stall();
   br(c, a, p, target(L));
 }

--- a/src/hotspot/cpu/sparc/register_sparc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/register_sparc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -236,7 +236,7 @@
   inline VMReg as_VMReg( );
 
   // accessors
-  int encoding() const                                { assert(is_valid(), "invalid register"); return value(); }
+  int encoding() const { assert(is_valid(), "invalid register"); return value(); }
 
  public:
   int encoding(Width w) const {
@@ -258,10 +258,12 @@
     return -1;
   }
 
-  bool  is_valid() const                              { return 0 <= value() && value() < number_of_registers; }
+  bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
+  bool is_even()  const { return (encoding() & 1) == 0; }
+
   const char* name() const;
 
-  FloatRegister successor() const                     { return as_FloatRegister(encoding() + 1); }
+  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
 };

--- a/src/hotspot/cpu/sparc/sparc.ad	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/sparc.ad	Tue Oct 10 16:29:04 2017 +0200
@@ -2628,7 +2628,6 @@
 %}
 
 
-
 enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
     MacroAssembler _masm(&cbuf);
 
@@ -2651,7 +2650,71 @@
     __ fmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
 %}
 
-
+enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+    __ fmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+    __ fmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+    __ fnmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+    __ fnmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+    __ fnmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+    __ fnmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}
 
 
 enc_class fmovs (dflt_reg dst, dflt_reg src) %{
@@ -7597,7 +7660,7 @@
   ins_pipe(fdivD_reg_reg);
 %}
 
-// Single precision fused floating-point multiply-add (d = a * b + c).
+// Single/Double precision fused floating-point multiply-add (d = a * b + c).
 instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
   predicate(UseFMA);
   match(Set dst (FmaF c (Binary a b)));
@@ -7606,7 +7669,6 @@
   ins_pipe(fmaF_regx4);
 %}
 
-// Double precision fused floating-point multiply-add (d = a * b + c).
 instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
   predicate(UseFMA);
   match(Set dst (FmaD c (Binary a b)));
@@ -7615,6 +7677,66 @@
   ins_pipe(fmaD_regx4);
 %}
 
+// Additional patterns matching complement versions that we can map directly to
+// variants of the fused multiply-add instructions.
+
+// Single/Double precision fused floating-point multiply-sub (d = a * b - c)
+instruct fmsubF_regx4(regF dst, regF a, regF b, regF c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF (NegF c) (Binary a b)));
+  format %{ "fmsubs $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
+  ins_encode(fmsubs(dst, a, b, c));
+  ins_pipe(fmaF_regx4);
+%}
+
+instruct fmsubD_regx4(regD dst, regD a, regD b, regD c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD (NegD c) (Binary a b)));
+  format %{ "fmsubd $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
+  ins_encode(fmsubd(dst, a, b, c));
+  ins_pipe(fmaD_regx4);
+%}
+
+// Single/Double precision fused floating-point neg. multiply-add,
+//      d = -1 * a * b - c = -(a * b + c)
+instruct fnmaddF_regx4(regF dst, regF a, regF b, regF c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF (NegF c) (Binary (NegF a) b)));
+  match(Set dst (FmaF (NegF c) (Binary a (NegF b))));
+  format %{ "fnmadds $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
+  ins_encode(fnmadds(dst, a, b, c));
+  ins_pipe(fmaF_regx4);
+%}
+
+instruct fnmaddD_regx4(regD dst, regD a, regD b, regD c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD (NegD c) (Binary (NegD a) b)));
+  match(Set dst (FmaD (NegD c) (Binary a (NegD b))));
+  format %{ "fnmaddd $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
+  ins_encode(fnmaddd(dst, a, b, c));
+  ins_pipe(fmaD_regx4);
+%}
+
+// Single/Double precision fused floating-point neg. multiply-sub,
+//      d = -1 * a * b + c = -(a * b - c)
+instruct fnmsubF_regx4(regF dst, regF a, regF b, regF c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF c (Binary (NegF a) b)));
+  match(Set dst (FmaF c (Binary a (NegF b))));
+  format %{ "fnmsubs $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
+  ins_encode(fnmsubs(dst, a, b, c));
+  ins_pipe(fmaF_regx4);
+%}
+
+instruct fnmsubD_regx4(regD dst, regD a, regD b, regD c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD c (Binary (NegD a) b)));
+  match(Set dst (FmaD c (Binary a (NegD b))));
+  format %{ "fnmsubd $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
+  ins_encode(fnmsubd(dst, a, b, c));
+  ins_pipe(fmaD_regx4);
+%}
+
 //----------Logical Instructions-----------------------------------------------
 // And Instructions
 // Register And

--- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -58,7 +58,6 @@
 // Note:  The register L7 is used as L7_thread_cache, and may not be used
 //        any other way within this module.
 
-
 static const Register& Lstub_temp = L2;
 
 // -------------------------------------------------------------------------------------------------------------------------
@@ -4943,7 +4942,7 @@
     return start;
   }
 
-/**
+  /**
    *  Arguments:
    *
    * Inputs:
@@ -4975,6 +4974,773 @@
     return start;
   }
 
+  /**
+   * Arguments:
+   *
+   * Inputs:
+   *   I0   - int* x-addr
+   *   I1   - int  x-len
+   *   I2   - int* y-addr
+   *   I3   - int  y-len
+   *   I4   - int* z-addr   (output vector)
+   *   I5   - int  z-len
+   */
+  address generate_multiplyToLen() {
+    assert(UseMultiplyToLenIntrinsic, "need VIS3 instructions");
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+    address start = __ pc();
+
+    __ save_frame(0);
+
+    const Register xptr = I0; // input address
+    const Register xlen = I1; // ...and length in 32b-words
+    const Register yptr = I2; //
+    const Register ylen = I3; //
+    const Register zptr = I4; // output address
+    const Register zlen = I5; // ...and length in 32b-words
+
+    /* The minimal "limb" representation suggest that odd length vectors are as
+     * likely as even length dittos. This in turn suggests that we need to cope
+     * with odd/even length arrays and data not aligned properly for 64-bit read
+     * and write operations. We thus use a number of different kernels:
+     *
+     *   if (is_even(x.len) && is_even(y.len))
+     *      if (is_align64(x) && is_align64(y) && is_align64(z))
+     *         if (x.len == y.len && 16 <= x.len && x.len <= 64)
+     *            memv_mult_mpmul(...)
+     *         else
+     *            memv_mult_64x64(...)
+     *      else
+     *         memv_mult_64x64u(...)
+     *   else
+     *      memv_mult_32x32(...)
+     *
+     * Here we assume VIS3 support (for 'umulxhi', 'addxc' and 'addxccc').
+     * In case CBCOND instructions are supported, we will use 'cxbX'. If the
+     * MPMUL instruction is supported, we will generate a kernel using 'mpmul'
+     * (for vectors with proper characteristics).
+     */
+    const Register tmp0 = L0;
+    const Register tmp1 = L1;
+
+    Label L_mult_32x32;
+    Label L_mult_64x64u;
+    Label L_mult_64x64;
+    Label L_exit;
+
+    if_both_even(xlen, ylen, tmp0, false, L_mult_32x32);
+    if_all3_aligned(xptr, yptr, zptr, tmp1, 64, false, L_mult_64x64u);
+
+    if (UseMPMUL) {
+      if_eq(xlen, ylen, false, L_mult_64x64);
+      if_in_rng(xlen, 16, 64, tmp0, tmp1, false, L_mult_64x64);
+
+      // 1. Multiply naturally aligned 64b-datums using a generic 'mpmul' kernel,
+      //    operating on equal length vectors of size [16..64].
+      gen_mult_mpmul(xlen, xptr, yptr, zptr, L_exit);
+    }
+
+    // 2. Multiply naturally aligned 64-bit datums (64x64).
+    __ bind(L_mult_64x64);
+    gen_mult_64x64(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
+
+    // 3. Multiply unaligned 64-bit datums (64x64).
+    __ bind(L_mult_64x64u);
+    gen_mult_64x64_unaligned(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
+
+    // 4. Multiply naturally aligned 32-bit datums (32x32).
+    __ bind(L_mult_32x32);
+    gen_mult_32x32(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
+
+    __ bind(L_exit);
+    __ ret();
+    __ delayed()->restore();
+
+    return start;
+  }
+
+  // Additional help functions used by multiplyToLen generation.
+
+  void if_both_even(Register r1, Register r2, Register tmp, bool iseven, Label &L)
+  {
+    __ or3(r1, r2, tmp);
+    __ andcc(tmp, 0x1, tmp);
+    __ br_icc_zero(iseven, Assembler::pn, L);
+  }
+
+  void if_all3_aligned(Register r1, Register r2, Register r3,
+                       Register tmp, uint align, bool isalign, Label &L)
+  {
+    __ or3(r1, r2, tmp);
+    __ or3(r3, tmp, tmp);
+    __ andcc(tmp, (align - 1), tmp);
+    __ br_icc_zero(isalign, Assembler::pn, L);
+  }
+
+  void if_eq(Register x, Register y, bool iseq, Label &L)
+  {
+    Assembler::Condition cf = (iseq ? Assembler::equal : Assembler::notEqual);
+    __ cmp_and_br_short(x, y, cf, Assembler::pt, L);
+  }
+
+  void if_in_rng(Register x, int lb, int ub, Register t1, Register t2, bool inrng, Label &L)
+  {
+    assert(Assembler::is_simm13(lb), "Small ints only!");
+    assert(Assembler::is_simm13(ub), "Small ints only!");
+    // Compute (x - lb) * (ub - x) >= 0
+    // NOTE: With the local use of this routine, we rely on small integers to
+    //       guarantee that we do not overflow in the multiplication.
+    __ add(G0, ub, t2);
+    __ sub(x, lb, t1);
+    __ sub(t2, x, t2);
+    __ mulx(t1, t2, t1);
+    Assembler::Condition cf = (inrng ? Assembler::greaterEqual : Assembler::less);
+    __ cmp_and_br_short(t1, G0, cf, Assembler::pt, L);
+  }
+
+  void ldd_entry(Register base, Register offs, FloatRegister dest)
+  {
+    __ ldd(base, offs, dest);
+    __ inc(offs, 8);
+  }
+
+  void ldx_entry(Register base, Register offs, Register dest)
+  {
+    __ ldx(base, offs, dest);
+    __ inc(offs, 8);
+  }
+
+  void mpmul_entry(int m, Label &next)
+  {
+    __ mpmul(m);
+    __ cbcond(Assembler::equal, Assembler::icc, G0, G0, next);
+  }
+
+  void stx_entry(Label &L, Register r1, Register r2, Register base, Register offs)
+  {
+    __ bind(L);
+    __ stx(r1, base, offs);
+    __ inc(offs, 8);
+    __ stx(r2, base, offs);
+    __ inc(offs, 8);
+  }
+
+  void offs_entry(Label &Lbl0, Label &Lbl1)
+  {
+    assert(Lbl0.is_bound(), "must be");
+    assert(Lbl1.is_bound(), "must be");
+
+    int offset = Lbl0.loc_pos() - Lbl1.loc_pos();
+
+    __ emit_data(offset);
+  }
+
+  /* Generate the actual multiplication kernels for BigInteger vectors:
+   *
+   *   1. gen_mult_mpmul(...)
+   *
+   *   2. gen_mult_64x64(...)
+   *
+   *   3. gen_mult_64x64_unaligned(...)
+   *
+   *   4. gen_mult_32x32(...)
+   */
+  void gen_mult_mpmul(Register len, Register xptr, Register yptr, Register zptr,
+                      Label &L_exit)
+  {
+    const Register zero = G0;
+    const Register gxp  = G1;   // Need to use global registers across RWs.
+    const Register gyp  = G2;
+    const Register gzp  = G3;
+    const Register offs = G4;
+    const Register disp = G5;
+
+    __ mov(xptr, gxp);
+    __ mov(yptr, gyp);
+    __ mov(zptr, gzp);
+
+    /* Compute jump vector entry:
+     *
+     *   1. mpmul input size (0..31) x 64b
+     *   2. vector input size in 32b limbs (even number)
+     *   3. branch entries in reverse order (31..0), using two
+     *      instructions per entry (2 * 4 bytes).
+     *
+     *   displacement = byte_offset(bra_offset(len))
+     *                = byte_offset((64 - len)/2)
+     *                = 8 * (64 - len)/2
+     *                = 4 * (64 - len)
+     */
+    Register temp = I5;         // Alright to use input regs. in first batch.
+
+    __ sub(zero, len, temp);
+    __ add(temp, 64, temp);
+    __ sllx(temp, 2, disp);     // disp := (64 - len) << 2
+
+    // Dispatch relative current PC, into instruction table below.
+    __ rdpc(temp);
+    __ add(temp, 16, temp);
+    __ jmp(temp, disp);
+    __ delayed()->clr(offs);
+
+    ldd_entry(gxp, offs, F22);
+    ldd_entry(gxp, offs, F20);
+    ldd_entry(gxp, offs, F18);
+    ldd_entry(gxp, offs, F16);
+    ldd_entry(gxp, offs, F14);
+    ldd_entry(gxp, offs, F12);
+    ldd_entry(gxp, offs, F10);
+    ldd_entry(gxp, offs, F8);
+    ldd_entry(gxp, offs, F6);
+    ldd_entry(gxp, offs, F4);
+    ldx_entry(gxp, offs, I5);
+    ldx_entry(gxp, offs, I4);
+    ldx_entry(gxp, offs, I3);
+    ldx_entry(gxp, offs, I2);
+    ldx_entry(gxp, offs, I1);
+    ldx_entry(gxp, offs, I0);
+    ldx_entry(gxp, offs, L7);
+    ldx_entry(gxp, offs, L6);
+    ldx_entry(gxp, offs, L5);
+    ldx_entry(gxp, offs, L4);
+    ldx_entry(gxp, offs, L3);
+    ldx_entry(gxp, offs, L2);
+    ldx_entry(gxp, offs, L1);
+    ldx_entry(gxp, offs, L0);
+    ldd_entry(gxp, offs, F2);
+    ldd_entry(gxp, offs, F0);
+    ldx_entry(gxp, offs, O5);
+    ldx_entry(gxp, offs, O4);
+    ldx_entry(gxp, offs, O3);
+    ldx_entry(gxp, offs, O2);
+    ldx_entry(gxp, offs, O1);
+    ldx_entry(gxp, offs, O0);
+
+    __ save(SP, -176, SP);
+
+    const Register addr = gxp;  // Alright to reuse 'gxp'.
+
+    // Dispatch relative current PC, into instruction table below.
+    __ rdpc(addr);
+    __ add(addr, 16, addr);
+    __ jmp(addr, disp);
+    __ delayed()->clr(offs);
+
+    ldd_entry(gyp, offs, F58);
+    ldd_entry(gyp, offs, F56);
+    ldd_entry(gyp, offs, F54);
+    ldd_entry(gyp, offs, F52);
+    ldd_entry(gyp, offs, F50);
+    ldd_entry(gyp, offs, F48);
+    ldd_entry(gyp, offs, F46);
+    ldd_entry(gyp, offs, F44);
+    ldd_entry(gyp, offs, F42);
+    ldd_entry(gyp, offs, F40);
+    ldd_entry(gyp, offs, F38);
+    ldd_entry(gyp, offs, F36);
+    ldd_entry(gyp, offs, F34);
+    ldd_entry(gyp, offs, F32);
+    ldd_entry(gyp, offs, F30);
+    ldd_entry(gyp, offs, F28);
+    ldd_entry(gyp, offs, F26);
+    ldd_entry(gyp, offs, F24);
+    ldx_entry(gyp, offs, O5);
+    ldx_entry(gyp, offs, O4);
+    ldx_entry(gyp, offs, O3);
+    ldx_entry(gyp, offs, O2);
+    ldx_entry(gyp, offs, O1);
+    ldx_entry(gyp, offs, O0);
+    ldx_entry(gyp, offs, L7);
+    ldx_entry(gyp, offs, L6);
+    ldx_entry(gyp, offs, L5);
+    ldx_entry(gyp, offs, L4);
+    ldx_entry(gyp, offs, L3);
+    ldx_entry(gyp, offs, L2);
+    ldx_entry(gyp, offs, L1);
+    ldx_entry(gyp, offs, L0);
+
+    __ save(SP, -176, SP);
+    __ save(SP, -176, SP);
+    __ save(SP, -176, SP);
+    __ save(SP, -176, SP);
+    __ save(SP, -176, SP);
+
+    Label L_mpmul_restore_4, L_mpmul_restore_3, L_mpmul_restore_2;
+    Label L_mpmul_restore_1, L_mpmul_restore_0;
+
+    // Dispatch relative current PC, into instruction table below.
+    __ rdpc(addr);
+    __ add(addr, 16, addr);
+    __ jmp(addr, disp);
+    __ delayed()->clr(offs);
+
+    mpmul_entry(31, L_mpmul_restore_0);
+    mpmul_entry(30, L_mpmul_restore_0);
+    mpmul_entry(29, L_mpmul_restore_0);
+    mpmul_entry(28, L_mpmul_restore_0);
+    mpmul_entry(27, L_mpmul_restore_1);
+    mpmul_entry(26, L_mpmul_restore_1);
+    mpmul_entry(25, L_mpmul_restore_1);
+    mpmul_entry(24, L_mpmul_restore_1);
+    mpmul_entry(23, L_mpmul_restore_1);
+    mpmul_entry(22, L_mpmul_restore_1);
+    mpmul_entry(21, L_mpmul_restore_1);
+    mpmul_entry(20, L_mpmul_restore_2);
+    mpmul_entry(19, L_mpmul_restore_2);
+    mpmul_entry(18, L_mpmul_restore_2);
+    mpmul_entry(17, L_mpmul_restore_2);
+    mpmul_entry(16, L_mpmul_restore_2);
+    mpmul_entry(15, L_mpmul_restore_2);
+    mpmul_entry(14, L_mpmul_restore_2);
+    mpmul_entry(13, L_mpmul_restore_3);
+    mpmul_entry(12, L_mpmul_restore_3);
+    mpmul_entry(11, L_mpmul_restore_3);
+    mpmul_entry(10, L_mpmul_restore_3);
+    mpmul_entry( 9, L_mpmul_restore_3);
+    mpmul_entry( 8, L_mpmul_restore_3);
+    mpmul_entry( 7, L_mpmul_restore_3);
+    mpmul_entry( 6, L_mpmul_restore_4);
+    mpmul_entry( 5, L_mpmul_restore_4);
+    mpmul_entry( 4, L_mpmul_restore_4);
+    mpmul_entry( 3, L_mpmul_restore_4);
+    mpmul_entry( 2, L_mpmul_restore_4);
+    mpmul_entry( 1, L_mpmul_restore_4);
+    mpmul_entry( 0, L_mpmul_restore_4);
+
+    Label L_z31, L_z30, L_z29, L_z28, L_z27, L_z26, L_z25, L_z24;
+    Label L_z23, L_z22, L_z21, L_z20, L_z19, L_z18, L_z17, L_z16;
+    Label L_z15, L_z14, L_z13, L_z12, L_z11, L_z10, L_z09, L_z08;
+    Label L_z07, L_z06, L_z05, L_z04, L_z03, L_z02, L_z01, L_z00;
+
+    Label L_zst_base;    // Store sequence base address.
+    __ bind(L_zst_base);
+
+    stx_entry(L_z31, L7, L6, gzp, offs);
+    stx_entry(L_z30, L5, L4, gzp, offs);
+    stx_entry(L_z29, L3, L2, gzp, offs);
+    stx_entry(L_z28, L1, L0, gzp, offs);
+    __ restore();
+    stx_entry(L_z27, O5, O4, gzp, offs);
+    stx_entry(L_z26, O3, O2, gzp, offs);
+    stx_entry(L_z25, O1, O0, gzp, offs);
+    stx_entry(L_z24, L7, L6, gzp, offs);
+    stx_entry(L_z23, L5, L4, gzp, offs);
+    stx_entry(L_z22, L3, L2, gzp, offs);
+    stx_entry(L_z21, L1, L0, gzp, offs);
+    __ restore();
+    stx_entry(L_z20, O5, O4, gzp, offs);
+    stx_entry(L_z19, O3, O2, gzp, offs);
+    stx_entry(L_z18, O1, O0, gzp, offs);
+    stx_entry(L_z17, L7, L6, gzp, offs);
+    stx_entry(L_z16, L5, L4, gzp, offs);
+    stx_entry(L_z15, L3, L2, gzp, offs);
+    stx_entry(L_z14, L1, L0, gzp, offs);
+    __ restore();
+    stx_entry(L_z13, O5, O4, gzp, offs);
+    stx_entry(L_z12, O3, O2, gzp, offs);
+    stx_entry(L_z11, O1, O0, gzp, offs);
+    stx_entry(L_z10, L7, L6, gzp, offs);
+    stx_entry(L_z09, L5, L4, gzp, offs);
+    stx_entry(L_z08, L3, L2, gzp, offs);
+    stx_entry(L_z07, L1, L0, gzp, offs);
+    __ restore();
+    stx_entry(L_z06, O5, O4, gzp, offs);
+    stx_entry(L_z05, O3, O2, gzp, offs);
+    stx_entry(L_z04, O1, O0, gzp, offs);
+    stx_entry(L_z03, L7, L6, gzp, offs);
+    stx_entry(L_z02, L5, L4, gzp, offs);
+    stx_entry(L_z01, L3, L2, gzp, offs);
+    stx_entry(L_z00, L1, L0, gzp, offs);
+
+    __ restore();
+    __ restore();
+    // Exit out of 'mpmul' routine, back to multiplyToLen.
+    __ ba_short(L_exit);
+
+    Label L_zst_offs;
+    __ bind(L_zst_offs);
+
+    offs_entry(L_z31, L_zst_base);  // index 31: 2048x2048
+    offs_entry(L_z30, L_zst_base);
+    offs_entry(L_z29, L_zst_base);
+    offs_entry(L_z28, L_zst_base);
+    offs_entry(L_z27, L_zst_base);
+    offs_entry(L_z26, L_zst_base);
+    offs_entry(L_z25, L_zst_base);
+    offs_entry(L_z24, L_zst_base);
+    offs_entry(L_z23, L_zst_base);
+    offs_entry(L_z22, L_zst_base);
+    offs_entry(L_z21, L_zst_base);
+    offs_entry(L_z20, L_zst_base);
+    offs_entry(L_z19, L_zst_base);
+    offs_entry(L_z18, L_zst_base);
+    offs_entry(L_z17, L_zst_base);
+    offs_entry(L_z16, L_zst_base);
+    offs_entry(L_z15, L_zst_base);
+    offs_entry(L_z14, L_zst_base);
+    offs_entry(L_z13, L_zst_base);
+    offs_entry(L_z12, L_zst_base);
+    offs_entry(L_z11, L_zst_base);
+    offs_entry(L_z10, L_zst_base);
+    offs_entry(L_z09, L_zst_base);
+    offs_entry(L_z08, L_zst_base);
+    offs_entry(L_z07, L_zst_base);
+    offs_entry(L_z06, L_zst_base);
+    offs_entry(L_z05, L_zst_base);
+    offs_entry(L_z04, L_zst_base);
+    offs_entry(L_z03, L_zst_base);
+    offs_entry(L_z02, L_zst_base);
+    offs_entry(L_z01, L_zst_base);
+    offs_entry(L_z00, L_zst_base);  // index  0:   64x64
+
+    __ bind(L_mpmul_restore_4);
+    __ restore();
+    __ bind(L_mpmul_restore_3);
+    __ restore();
+    __ bind(L_mpmul_restore_2);
+    __ restore();
+    __ bind(L_mpmul_restore_1);
+    __ restore();
+    __ bind(L_mpmul_restore_0);
+
+    // Dispatch via offset vector entry, into z-store sequence.
+    Label L_zst_rdpc;
+    __ bind(L_zst_rdpc);
+
+    assert(L_zst_base.is_bound(), "must be");
+    assert(L_zst_offs.is_bound(), "must be");
+    assert(L_zst_rdpc.is_bound(), "must be");
+
+    int dbase = L_zst_rdpc.loc_pos() - L_zst_base.loc_pos();
+    int doffs = L_zst_rdpc.loc_pos() - L_zst_offs.loc_pos();
+
+    temp = gyp;   // Alright to reuse 'gyp'.
+
+    __ rdpc(addr);
+    __ sub(addr, doffs, temp);
+    __ srlx(disp, 1, disp);
+    __ lduw(temp, disp, offs);
+    __ sub(addr, dbase, temp);
+    __ jmp(temp, offs);
+    __ delayed()->clr(offs);
+  }
+
+  void gen_mult_64x64(Register xp, Register xn,
+                      Register yp, Register yn,
+                      Register zp, Register zn, Label &L_exit)
+  {
+    // Assuming that a stack frame has already been created, i.e. local and
+    // output registers are available for immediate use.
+
+    const Register ri = L0;     // Outer loop index, xv[i]
+    const Register rj = L1;     // Inner loop index, yv[j]
+    const Register rk = L2;     // Output loop index, zv[k]
+    const Register rx = L4;     // x-vector datum [i]
+    const Register ry = L5;     // y-vector datum [j]
+    const Register rz = L6;     // z-vector datum [k]
+    const Register rc = L7;     // carry over (to z-vector datum [k-1])
+
+    const Register lop = O0;    // lo-64b product
+    const Register hip = O1;    // hi-64b product
+
+    const Register zero = G0;
+
+    Label L_loop_i,  L_exit_loop_i;
+    Label L_loop_j;
+    Label L_loop_i2, L_exit_loop_i2;
+
+    __ srlx(xn, 1, xn);         // index for u32 to u64 ditto
+    __ srlx(yn, 1, yn);         // index for u32 to u64 ditto
+    __ srlx(zn, 1, zn);         // index for u32 to u64 ditto
+    __ dec(xn);                 // Adjust [0..(N/2)-1]
+    __ dec(yn);
+    __ dec(zn);
+    __ clr(rc);                 // u64 c = 0
+    __ sllx(xn, 3, ri);         // int i = xn (byte offset i = 8*xn)
+    __ sllx(yn, 3, rj);         // int j = yn (byte offset i = 8*xn)
+    __ sllx(zn, 3, rk);         // int k = zn (byte offset k = 8*zn)
+    __ ldx(yp, rj, ry);         // u64 y = yp[yn]
+
+    // for (int i = xn; i >= 0; i--)
+    __ bind(L_loop_i);
+
+    __ cmp_and_br_short(ri, 0,  // i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i);
+    __ ldx(xp, ri, rx);         // x = xp[i]
+    __ mulx(rx, ry, lop);       // lo-64b-part of result 64x64
+    __ umulxhi(rx, ry, hip);    // hi-64b-part of result 64x64
+    __ addcc(rc, lop, lop);     // Accumulate lower order bits (producing carry)
+    __ addxc(hip, zero, rc);    // carry over to next datum [k-1]
+    __ stx(lop, zp, rk);        // z[k] = lop
+    __ dec(rk, 8);              // k--
+    __ dec(ri, 8);              // i--
+    __ ba_short(L_loop_i);
+
+    __ bind(L_exit_loop_i);
+    __ stx(rc, zp, rk);         // z[k] = c
+
+    // for (int j = yn - 1; j >= 0; j--)
+    __ sllx(yn, 3, rj);         // int j = yn - 1 (byte offset j = 8*yn)
+    __ dec(rj, 8);
+
+    __ bind(L_loop_j);
+
+    __ cmp_and_br_short(rj, 0,  // j >= 0
+                        Assembler::less, Assembler::pn, L_exit);
+    __ clr(rc);                 // u64 c = 0
+    __ ldx(yp, rj, ry);         // u64 y = yp[j]
+
+    // for (int i = xn, k = --zn; i >= 0; i--)
+    __ dec(zn);                 // --zn
+    __ sllx(xn, 3, ri);         // int i = xn (byte offset i = 8*xn)
+    __ sllx(zn, 3, rk);         // int k = zn (byte offset k = 8*zn)
+
+    __ bind(L_loop_i2);
+
+    __ cmp_and_br_short(ri, 0,  // i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i2);
+    __ ldx(xp, ri, rx);         // x = xp[i]
+    __ ldx(zp, rk, rz);         // z = zp[k], accumulator
+    __ mulx(rx, ry, lop);       // lo-64b-part of result 64x64
+    __ umulxhi(rx, ry, hip);    // hi-64b-part of result 64x64
+    __ addcc(rz, rc, rz);       // Accumulate lower order bits,
+    __ addxc(hip, zero, rc);    // Accumulate higher order bits to carry
+    __ addcc(rz, lop, rz);      //    z += lo(p) + c
+    __ addxc(rc, zero, rc);
+    __ stx(rz, zp, rk);         // zp[k] = z
+    __ dec(rk, 8);              // k--
+    __ dec(ri, 8);              // i--
+    __ ba_short(L_loop_i2);
+
+    __ bind(L_exit_loop_i2);
+    __ stx(rc, zp, rk);         // z[k] = c
+    __ dec(rj, 8);              // j--
+    __ ba_short(L_loop_j);
+  }
+
+  void gen_mult_64x64_unaligned(Register xp, Register xn,
+                                Register yp, Register yn,
+                                Register zp, Register zn, Label &L_exit)
+  {
+    // Assuming that a stack frame has already been created, i.e. local and
+    // output registers are available for use.
+
+    const Register xpc = L0;    // Outer loop cursor, xp[i]
+    const Register ypc = L1;    // Inner loop cursor, yp[j]
+    const Register zpc = L2;    // Output loop cursor, zp[k]
+    const Register rx  = L4;    // x-vector datum [i]
+    const Register ry  = L5;    // y-vector datum [j]
+    const Register rz  = L6;    // z-vector datum [k]
+    const Register rc  = L7;    // carry over (to z-vector datum [k-1])
+    const Register rt  = O2;
+
+    const Register lop = O0;    // lo-64b product
+    const Register hip = O1;    // hi-64b product
+
+    const Register zero = G0;
+
+    Label L_loop_i,  L_exit_loop_i;
+    Label L_loop_j;
+    Label L_loop_i2, L_exit_loop_i2;
+
+    __ srlx(xn, 1, xn);         // index for u32 to u64 ditto
+    __ srlx(yn, 1, yn);         // index for u32 to u64 ditto
+    __ srlx(zn, 1, zn);         // index for u32 to u64 ditto
+    __ dec(xn);                 // Adjust [0..(N/2)-1]
+    __ dec(yn);
+    __ dec(zn);
+    __ clr(rc);                 // u64 c = 0
+    __ sllx(xn, 3, xpc);        // u32* xpc = &xp[xn] (byte offset 8*xn)
+    __ add(xp, xpc, xpc);
+    __ sllx(yn, 3, ypc);        // u32* ypc = &yp[yn] (byte offset 8*yn)
+    __ add(yp, ypc, ypc);
+    __ sllx(zn, 3, zpc);        // u32* zpc = &zp[zn] (byte offset 8*zn)
+    __ add(zp, zpc, zpc);
+    __ lduw(ypc, 0, rt);        // u64 y = yp[yn]
+    __ lduw(ypc, 4, ry);        //   ...
+    __ sllx(rt, 32, rt);
+    __ or3(rt, ry, ry);
+
+    // for (int i = xn; i >= 0; i--)
+    __ bind(L_loop_i);
+
+    __ cmp_and_br_short(xpc, xp,// i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i);
+    __ lduw(xpc, 0, rt);        // u64 x = xp[i]
+    __ lduw(xpc, 4, rx);        //   ...
+    __ sllx(rt, 32, rt);
+    __ or3(rt, rx, rx);
+    __ mulx(rx, ry, lop);       // lo-64b-part of result 64x64
+    __ umulxhi(rx, ry, hip);    // hi-64b-part of result 64x64
+    __ addcc(rc, lop, lop);     // Accumulate lower order bits (producing carry)
+    __ addxc(hip, zero, rc);    // carry over to next datum [k-1]
+    __ srlx(lop, 32, rt);
+    __ stw(rt, zpc, 0);         // z[k] = lop
+    __ stw(lop, zpc, 4);        //   ...
+    __ dec(zpc, 8);             // k-- (zpc--)
+    __ dec(xpc, 8);             // i-- (xpc--)
+    __ ba_short(L_loop_i);
+
+    __ bind(L_exit_loop_i);
+    __ srlx(rc, 32, rt);
+    __ stw(rt, zpc, 0);         // z[k] = c
+    __ stw(rc, zpc, 4);
+
+    // for (int j = yn - 1; j >= 0; j--)
+    __ sllx(yn, 3, ypc);        // u32* ypc = &yp[yn] (byte offset 8*yn)
+    __ add(yp, ypc, ypc);
+    __ dec(ypc, 8);             // yn - 1 (ypc--)
+
+    __ bind(L_loop_j);
+
+    __ cmp_and_br_short(ypc, yp,// j >= 0
+                        Assembler::less, Assembler::pn, L_exit);
+    __ clr(rc);                 // u64 c = 0
+    __ lduw(ypc, 0, rt);        // u64 y = yp[j] (= *ypc)
+    __ lduw(ypc, 4, ry);        //   ...
+    __ sllx(rt, 32, rt);
+    __ or3(rt, ry, ry);
+
+    // for (int i = xn, k = --zn; i >= 0; i--)
+    __ sllx(xn, 3, xpc);        // u32* xpc = &xp[xn] (byte offset 8*xn)
+    __ add(xp, xpc, xpc);
+    __ dec(zn);                 // --zn
+    __ sllx(zn, 3, zpc);        // u32* zpc = &zp[zn] (byte offset 8*zn)
+    __ add(zp, zpc, zpc);
+
+    __ bind(L_loop_i2);
+
+    __ cmp_and_br_short(xpc, xp,// i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i2);
+    __ lduw(xpc, 0, rt);        // u64 x = xp[i] (= *xpc)
+    __ lduw(xpc, 4, rx);        //   ...
+    __ sllx(rt, 32, rt);
+    __ or3(rt, rx, rx);
+
+    __ lduw(zpc, 0, rt);        // u64 z = zp[k] (= *zpc)
+    __ lduw(zpc, 4, rz);        //   ...
+    __ sllx(rt, 32, rt);
+    __ or3(rt, rz, rz);
+
+    __ mulx(rx, ry, lop);       // lo-64b-part of result 64x64
+    __ umulxhi(rx, ry, hip);    // hi-64b-part of result 64x64
+    __ addcc(rz, rc, rz);       // Accumulate lower order bits...
+    __ addxc(hip, zero, rc);    // Accumulate higher order bits to carry
+    __ addcc(rz, lop, rz);      // ... z += lo(p) + c
+    __ addxccc(rc, zero, rc);
+    __ srlx(rz, 32, rt);
+    __ stw(rt, zpc, 0);         // zp[k] = z    (*zpc = z)
+    __ stw(rz, zpc, 4);
+    __ dec(zpc, 8);             // k-- (zpc--)
+    __ dec(xpc, 8);             // i-- (xpc--)
+    __ ba_short(L_loop_i2);
+
+    __ bind(L_exit_loop_i2);
+    __ srlx(rc, 32, rt);
+    __ stw(rt, zpc, 0);         // z[k] = c
+    __ stw(rc, zpc, 4);
+    __ dec(ypc, 8);             // j-- (ypc--)
+    __ ba_short(L_loop_j);
+  }
+
+  void gen_mult_32x32(Register xp, Register xn,
+                      Register yp, Register yn,
+                      Register zp, Register zn, Label &L_exit)
+  {
+    // Assuming that a stack frame has already been created, i.e. local and
+    // output registers are available for use.
+
+    const Register ri = L0;     // Outer loop index, xv[i]
+    const Register rj = L1;     // Inner loop index, yv[j]
+    const Register rk = L2;     // Output loop index, zv[k]
+    const Register rx = L4;     // x-vector datum [i]
+    const Register ry = L5;     // y-vector datum [j]
+    const Register rz = L6;     // z-vector datum [k]
+    const Register rc = L7;     // carry over (to z-vector datum [k-1])
+
+    const Register p64 = O0;    // 64b product
+    const Register z65 = O1;    // carry+64b accumulator
+    const Register c65 = O2;    // carry at bit 65
+    const Register c33 = O2;    // carry at bit 33 (after shift)
+
+    const Register zero = G0;
+
+    Label L_loop_i,  L_exit_loop_i;
+    Label L_loop_j;
+    Label L_loop_i2, L_exit_loop_i2;
+
+    __ dec(xn);                 // Adjust [0..N-1]
+    __ dec(yn);
+    __ dec(zn);
+    __ clr(rc);                 // u32 c = 0
+    __ sllx(xn, 2, ri);         // int i = xn (byte offset i = 4*xn)
+    __ sllx(yn, 2, rj);         // int j = yn (byte offset i = 4*xn)
+    __ sllx(zn, 2, rk);         // int k = zn (byte offset k = 4*zn)
+    __ lduw(yp, rj, ry);        // u32 y = yp[yn]
+
+    // for (int i = xn; i >= 0; i--)
+    __ bind(L_loop_i);
+
+    __ cmp_and_br_short(ri, 0,  // i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i);
+    __ lduw(xp, ri, rx);        // x = xp[i]
+    __ mulx(rx, ry, p64);       // 64b result of 32x32
+    __ addcc(rc, p64, z65);     // Accumulate to 65 bits (producing carry)
+    __ addxc(zero, zero, c65);  // Materialise carry (in bit 65) into lsb,
+    __ sllx(c65, 32, c33);      // and shift into bit 33
+    __ srlx(z65, 32, rc);       // carry = c33 | hi(z65) >> 32
+    __ add(c33, rc, rc);        // carry over to next datum [k-1]
+    __ stw(z65, zp, rk);        // z[k] = lo(z65)
+    __ dec(rk, 4);              // k--
+    __ dec(ri, 4);              // i--
+    __ ba_short(L_loop_i);
+
+    __ bind(L_exit_loop_i);
+    __ stw(rc, zp, rk);         // z[k] = c
+
+    // for (int j = yn - 1; j >= 0; j--)
+    __ sllx(yn, 2, rj);         // int j = yn - 1 (byte offset j = 4*yn)
+    __ dec(rj, 4);
+
+    __ bind(L_loop_j);
+
+    __ cmp_and_br_short(rj, 0,  // j >= 0
+                        Assembler::less, Assembler::pn, L_exit);
+    __ clr(rc);                 // u32 c = 0
+    __ lduw(yp, rj, ry);        // u32 y = yp[j]
+
+    // for (int i = xn, k = --zn; i >= 0; i--)
+    __ dec(zn);                 // --zn
+    __ sllx(xn, 2, ri);         // int i = xn (byte offset i = 4*xn)
+    __ sllx(zn, 2, rk);         // int k = zn (byte offset k = 4*zn)
+
+    __ bind(L_loop_i2);
+
+    __ cmp_and_br_short(ri, 0,  // i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i2);
+    __ lduw(xp, ri, rx);        // x = xp[i]
+    __ lduw(zp, rk, rz);        // z = zp[k], accumulator
+    __ mulx(rx, ry, p64);       // 64b result of 32x32
+    __ add(rz, rc, rz);         // Accumulate lower order bits,
+    __ addcc(rz, p64, z65);     //   z += lo(p64) + c
+    __ addxc(zero, zero, c65);  // Materialise carry (in bit 65) into lsb,
+    __ sllx(c65, 32, c33);      // and shift into bit 33
+    __ srlx(z65, 32, rc);       // carry = c33 | hi(z65) >> 32
+    __ add(c33, rc, rc);        // carry over to next datum [k-1]
+    __ stw(z65, zp, rk);        // zp[k] = lo(z65)
+    __ dec(rk, 4);              // k--
+    __ dec(ri, 4);              // i--
+    __ ba_short(L_loop_i2);
+
+    __ bind(L_exit_loop_i2);
+    __ stw(rc, zp, rk);         // z[k] = c
+    __ dec(rj, 4);              // j--
+    __ ba_short(L_loop_j);
+  }
+
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points
 
@@ -5073,9 +5839,15 @@
     if (UseAdler32Intrinsics) {
       StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
     }
+
+#ifdef COMPILER2
+    // Intrinsics supported by C2 only:
+    if (UseMultiplyToLenIntrinsic) {
+      StubRoutines::_multiplyToLen = generate_multiplyToLen();
+    }
+#endif // COMPILER2
   }
 
-
  public:
   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
     // replace the standard masm with a special one:

--- a/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -41,7 +41,7 @@
 enum /* platform_dependent_constants */ {
   // %%%%%%%% May be able to shrink this a lot
   code_size1 = 20000,           // simply increase if too small (assembler will crash if too small)
-  code_size2 = 27000            // simply increase if too small (assembler will crash if too small)
+  code_size2 = 29000            // simply increase if too small (assembler will crash if too small)
 };
 
 class Sparc {

--- a/src/hotspot/cpu/sparc/templateTable_sparc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/templateTable_sparc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -2049,6 +2049,7 @@
     __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f1_offset(), Robj);
     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ ld_ptr( Robj, mirror_offset, Robj);
+    __ resolve_oop_handle(Robj);
   }
 }

--- a/src/hotspot/cpu/sparc/vmStructs_sparc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/vmStructs_sparc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -101,6 +101,14 @@
   declare_constant(VM_Version::ISA_XMONT)               \
   declare_constant(VM_Version::ISA_PAUSE_NSEC)          \
   declare_constant(VM_Version::ISA_VAMASK)              \
+  declare_constant(VM_Version::ISA_SPARC6)              \
+  declare_constant(VM_Version::ISA_DICTUNP)             \
+  declare_constant(VM_Version::ISA_FPCMPSHL)            \
+  declare_constant(VM_Version::ISA_RLE)                 \
+  declare_constant(VM_Version::ISA_SHA3)                \
+  declare_constant(VM_Version::ISA_VIS3C)               \
+  declare_constant(VM_Version::ISA_SPARC5B)             \
+  declare_constant(VM_Version::ISA_MME)                 \
   declare_constant(VM_Version::CPU_FAST_IDIV)           \
   declare_constant(VM_Version::CPU_FAST_RDPC)           \
   declare_constant(VM_Version::CPU_FAST_BIS)            \

--- a/src/hotspot/cpu/sparc/vm_version_sparc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/vm_version_sparc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -103,7 +103,7 @@
       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
     }
     else if (has_sparc5()) {
-      // Use prefetch instruction to avoid partial RAW issue on Core S4 processors,
+      // Use prefetch instruction to avoid partial RAW issue on Core C4 processors,
       // also use prefetch style 3.
       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
@@ -128,7 +128,7 @@
 
   // We increase the number of prefetched cache lines, to use just a bit more
   // aggressive approach, when the L2-cache line size is small (32 bytes), or
-  // when running on newer processor implementations, such as the Core S4.
+  // when running on newer processor implementations, such as the Core C4.
   bool inc_prefetch = cache_line_size > 0 && (cache_line_size < 64 || has_sparc5());
 
   if (inc_prefetch) {
@@ -168,6 +168,16 @@
     FLAG_SET_DEFAULT(UseCBCond, false);
   }
 
+  // Use 'mpmul' instruction if available.
+  if (has_mpmul()) {
+    if (FLAG_IS_DEFAULT(UseMPMUL)) {
+      FLAG_SET_DEFAULT(UseMPMUL, true);
+    }
+  } else if (UseMPMUL) {
+    warning("MPMUL instruction is not available on this CPU");
+    FLAG_SET_DEFAULT(UseMPMUL, false);
+  }
+
   assert(BlockZeroingLowLimit > 0, "invalid value");
 
   if (has_blk_zeroing() && cache_line_size > 0) {
@@ -208,7 +218,9 @@
 
   char buf[512];
   jio_snprintf(buf, sizeof(buf),
-               "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+               "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
+               "%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s"
+               "%s%s%s%s%s%s%s",
                (has_v9()          ? "v9" : ""),
                (has_popc()        ? ", popc" : ""),
                (has_vis1()        ? ", vis1" : ""),
@@ -241,6 +253,16 @@
                (has_pause_nsec()  ? ", pause_nsec" : ""),
                (has_vamask()      ? ", vamask" : ""),
 
+               (has_sparc6()      ? ", sparc6" : ""),
+               (has_dictunp()     ? ", dictunp" : ""),
+               (has_fpcmpshl()    ? ", fpcmpshl" : ""),
+               (has_rle()         ? ", rle" : ""),
+               (has_sha3()        ? ", sha3" : ""),
+               (has_athena_plus2()? ", athena_plus2" : ""),
+               (has_vis3c()       ? ", vis3c" : ""),
+               (has_sparc5b()     ? ", sparc5b" : ""),
+               (has_mme()         ? ", mme" : ""),
+
                (has_fast_idiv()   ? ", *idiv" : ""),
                (has_fast_rdpc()   ? ", *rdpc" : ""),
                (has_fast_bis()    ? ", *bis" : ""),
@@ -409,6 +431,15 @@
     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
   }
 
+  if (UseVIS > 2) {
+    if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+      FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
+    }
+  } else if (UseMultiplyToLenIntrinsic) {
+    warning("SPARC multiplyToLen intrinsics require VIS3 instructions support. Intrinsics will be disabled");
+    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
+  }
+
   if (UseVectorizedMismatchIntrinsic) {
     warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);

--- a/src/hotspot/cpu/sparc/vm_version_sparc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/sparc/vm_version_sparc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -67,6 +67,16 @@
     ISA_PAUSE_NSEC,
     ISA_VAMASK,
 
+    ISA_SPARC6,
+    ISA_DICTUNP,
+    ISA_FPCMPSHL,
+    ISA_RLE,
+    ISA_SHA3,
+    ISA_FJATHPLUS2,
+    ISA_VIS3C,
+    ISA_SPARC5B,
+    ISA_MME,
+
     // Synthesised properties:
 
     CPU_FAST_IDIV,
@@ -79,7 +89,7 @@
   };
 
 private:
-  enum { ISA_last_feature = ISA_VAMASK,
+  enum { ISA_last_feature = ISA_MME,
          CPU_last_feature = CPU_BLK_ZEROING };
 
   enum {
@@ -119,6 +129,16 @@
     ISA_pause_nsec_msk  = UINT64_C(1) << ISA_PAUSE_NSEC,
     ISA_vamask_msk      = UINT64_C(1) << ISA_VAMASK,
 
+    ISA_sparc6_msk      = UINT64_C(1) << ISA_SPARC6,
+    ISA_dictunp_msk     = UINT64_C(1) << ISA_DICTUNP,
+    ISA_fpcmpshl_msk    = UINT64_C(1) << ISA_FPCMPSHL,
+    ISA_rle_msk         = UINT64_C(1) << ISA_RLE,
+    ISA_sha3_msk        = UINT64_C(1) << ISA_SHA3,
+    ISA_fjathplus2_msk  = UINT64_C(1) << ISA_FJATHPLUS2,
+    ISA_vis3c_msk       = UINT64_C(1) << ISA_VIS3C,
+    ISA_sparc5b_msk     = UINT64_C(1) << ISA_SPARC5B,
+    ISA_mme_msk         = UINT64_C(1) << ISA_MME,
+
     CPU_fast_idiv_msk   = UINT64_C(1) << CPU_FAST_IDIV,
     CPU_fast_rdpc_msk   = UINT64_C(1) << CPU_FAST_RDPC,
     CPU_fast_bis_msk    = UINT64_C(1) << CPU_FAST_BIS,
@@ -153,40 +173,51 @@
  *  UltraSPARC T2+:    (Victoria Falls, etc.)
  *    SPARC-V9, VIS, VIS2, ASI_BIS, POPC    (Crypto/hash in SPU)
  *
- *  UltraSPARC T3:     (Rainbow Falls/S2)
+ *  UltraSPARC T3:     (Rainbow Falls/C2)
  *    SPARC-V9, VIS, VIS2, ASI_BIS, POPC    (Crypto/hash in SPU)
  *
- *  Oracle SPARC T4/T5/M5:  (Core S3)
+ *  Oracle SPARC T4/T5/M5:  (Core C3)
  *    SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
  *    AES, DES, Kasumi, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL
  *
- *  Oracle SPARC M7:   (Core S4)
+ *  Oracle SPARC M7:   (Core C4)
  *    SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
  *    AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
  *    ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK
  *
+ *  Oracle SPARC M8:   (Core C5)
+ *    SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
+ *    AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
+ *    ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK, SPARC6, FPCMPSHL,
+ *    DICTUNP, RLE, SHA3, MME
+ *
+ *    NOTE: Oracle Number support ignored.
  */
   enum {
     niagara1_msk = ISA_v9_msk | ISA_vis1_msk | ISA_blk_init_msk,
     niagara2_msk = niagara1_msk | ISA_popc_msk,
 
-    core_S2_msk  = niagara2_msk | ISA_vis2_msk,
+    core_C2_msk  = niagara2_msk | ISA_vis2_msk,
 
-    core_S3_msk  = core_S2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
+    core_C3_msk  = core_C2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
         ISA_ima_msk | ISA_aes_msk | ISA_des_msk | ISA_kasumi_msk |
         ISA_camellia_msk | ISA_md5_msk | ISA_sha1_msk | ISA_sha256_msk |
         ISA_sha512_msk | ISA_mpmul_msk | ISA_mont_msk | ISA_pause_msk |
         ISA_cbcond_msk | ISA_crc32c_msk,
 
-    core_S4_msk  = core_S3_msk - ISA_kasumi_msk |
+    core_C4_msk  = core_C3_msk - ISA_kasumi_msk |
         ISA_vis3b_msk | ISA_adi_msk | ISA_sparc5_msk | ISA_mwait_msk |
         ISA_xmpmul_msk | ISA_xmont_msk | ISA_pause_nsec_msk | ISA_vamask_msk,
 
+    core_C5_msk = core_C4_msk | ISA_sparc6_msk | ISA_dictunp_msk |
+        ISA_fpcmpshl_msk | ISA_rle_msk | ISA_sha3_msk | ISA_mme_msk,
+
     ultra_sparc_t1_msk = niagara1_msk,
     ultra_sparc_t2_msk = niagara2_msk,
-    ultra_sparc_t3_msk = core_S2_msk,
-    ultra_sparc_m5_msk = core_S3_msk,   // NOTE: First out-of-order pipeline.
-    ultra_sparc_m7_msk = core_S4_msk
+    ultra_sparc_t3_msk = core_C2_msk,
+    ultra_sparc_m5_msk = core_C3_msk,   // NOTE: First out-of-order pipeline.
+    ultra_sparc_m7_msk = core_C4_msk,
+    ultra_sparc_m8_msk = core_C5_msk
   };
 
   static uint _L2_data_cache_line_size;
@@ -247,6 +278,16 @@
   static bool has_pause_nsec()   { return (_features & ISA_pause_nsec_msk) != 0; }
   static bool has_vamask()       { return (_features & ISA_vamask_msk) != 0; }
 
+  static bool has_sparc6()       { return (_features & ISA_sparc6_msk) != 0; }
+  static bool has_dictunp()      { return (_features & ISA_dictunp_msk) != 0; }
+  static bool has_fpcmpshl()     { return (_features & ISA_fpcmpshl_msk) != 0; }
+  static bool has_rle()          { return (_features & ISA_rle_msk) != 0; }
+  static bool has_sha3()         { return (_features & ISA_sha3_msk) != 0; }
+  static bool has_athena_plus2() { return (_features & ISA_fjathplus2_msk) != 0; }
+  static bool has_vis3c()        { return (_features & ISA_vis3c_msk) != 0; }
+  static bool has_sparc5b()      { return (_features & ISA_sparc5b_msk) != 0; }
+  static bool has_mme()          { return (_features & ISA_mme_msk) != 0; }
+
   static bool has_fast_idiv()    { return (_features & CPU_fast_idiv_msk) != 0; }
   static bool has_fast_rdpc()    { return (_features & CPU_fast_rdpc_msk) != 0; }
   static bool has_fast_bis()     { return (_features & CPU_fast_bis_msk) != 0; }

--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -6617,6 +6617,7 @@
   movptr(mirror, Address(mirror, ConstMethod::constants_offset()));
   movptr(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
   movptr(mirror, Address(mirror, mirror_offset));
+  resolve_oop_handle(mirror);
 }
 
 void MacroAssembler::load_klass(Register dst, Register src) {

--- a/src/hotspot/cpu/x86/templateTable_x86.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -2665,6 +2665,7 @@
                                     ConstantPoolCacheEntry::f1_offset())));
     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movptr(obj, Address(obj, mirror_offset));
+    __ resolve_oop_handle(obj);
   }
 }

--- a/src/hotspot/cpu/x86/vm_version_x86.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -46,7 +46,7 @@
 address VM_Version::_cpuinfo_cont_addr = 0;
 
 static BufferBlob* stub_blob;
-static const int stub_size = 1000;
+static const int stub_size = 1100;
 
 extern "C" {
   typedef void (*get_cpu_info_stub_t)(void*);
@@ -70,7 +70,7 @@
     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 
     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
-    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
+    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 
     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@@ -267,14 +267,30 @@
     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
     __ jcc(Assembler::belowEqual, done);
     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
-    __ jccb(Assembler::belowEqual, ext_cpuid1);
+    __ jcc(Assembler::belowEqual, ext_cpuid1);
     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
     __ jccb(Assembler::belowEqual, ext_cpuid5);
     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
     __ jccb(Assembler::belowEqual, ext_cpuid7);
+    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
+    __ jccb(Assembler::belowEqual, ext_cpuid8);
+    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
+    __ jccb(Assembler::below, ext_cpuid8);
+    //
+    // Extended cpuid(0x8000001E)
+    //
+    __ movl(rax, 0x8000001E);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
     //
     // Extended cpuid(0x80000008)
     //
+    __ bind(ext_cpuid8);
     __ movl(rax, 0x80000008);
     __ cpuid();
     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
@@ -1109,11 +1125,27 @@
     }
 
 #ifdef COMPILER2
-    if (MaxVectorSize > 16) {
-      // Limit vectors size to 16 bytes on current AMD cpus.
+    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
+      // Limit vectors size to 16 bytes on AMD cpus < 17h.
       FLAG_SET_DEFAULT(MaxVectorSize, 16);
     }
 #endif // COMPILER2
+
+    // Some defaults for AMD family 17h
+    if ( cpu_family() == 0x17 ) {
+      // On family 17h processors use XMM and UnalignedLoadStores for Array Copy
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
+      }
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
+      }
+#ifdef COMPILER2
+      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+      }
+#endif
+    }
   }
 
   if( is_intel() ) { // Intel cpus specific settings

--- a/src/hotspot/cpu/x86/vm_version_x86.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -228,6 +228,15 @@
     } bits;
   };
 
+  union ExtCpuid1EEbx {
+    uint32_t value;
+    struct {
+      uint32_t                  : 8,
+               threads_per_core : 8,
+                                : 16;
+    } bits;
+  };
+
   union XemXcr0Eax {
     uint32_t value;
     struct {
@@ -398,6 +407,12 @@
     ExtCpuid8Ecx ext_cpuid8_ecx;
     uint32_t     ext_cpuid8_edx; // reserved
 
+    // cpuid function 0x8000001E // AMD 17h
+    uint32_t      ext_cpuid1E_eax;
+    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
+    uint32_t      ext_cpuid1E_ecx;
+    uint32_t      ext_cpuid1E_edx; // unused currently
+
     // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
     XemXcr0Eax   xem_xcr0_eax;
     uint32_t     xem_xcr0_edx; // reserved
@@ -505,6 +520,14 @@
       result |= CPU_CLMUL;
     if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
       result |= CPU_RTM;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
+       result |= CPU_ADX;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
+      result |= CPU_BMI2;
+    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
+      result |= CPU_SHA;
+    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
+      result |= CPU_FMA;
 
     // AMD features.
     if (is_amd()) {
@@ -518,16 +541,8 @@
     }
     // Intel features.
     if(is_intel()) {
-      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
-         result |= CPU_ADX;
-      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
-        result |= CPU_BMI2;
-      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
-        result |= CPU_SHA;
       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
         result |= CPU_LZCNT;
-      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
-        result |= CPU_FMA;
       // for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
       if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
         result |= CPU_3DNOW_PREFETCH;
@@ -590,6 +605,7 @@
   static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
   static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
   static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
+  static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
   static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
   static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
   static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
@@ -673,8 +689,12 @@
     if (is_intel() && supports_processor_topology()) {
       result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
     } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
-      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
-               cores_per_cpu();
+      if (cpu_family() >= 0x17) {
+        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
+      } else {
+        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
+                 cores_per_cpu();
+      }
     }
     return (result == 0 ? 1 : result);
   }

--- a/src/hotspot/os/aix/os_aix.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/aix/os_aix.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -770,8 +770,15 @@
   const pthread_t pthread_id = ::pthread_self();
   const tid_t kernel_thread_id = ::thread_self();
 
-  log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT ").",
-    os::current_thread_id(), (uintx) kernel_thread_id);
+  LogTarget(Info, os, thread) lt;
+  if (lt.is_enabled()) {
+    address low_address = thread->stack_end();
+    address high_address = thread->stack_base();
+    lt.print("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT
+             ", stack [" PTR_FORMAT " - " PTR_FORMAT " (" SIZE_FORMAT "k using %uk pages)).",
+             os::current_thread_id(), (uintx) kernel_thread_id, low_address, high_address,
+             (high_address - low_address) / K, os::Aix::query_pagesize(low_address) / K);
+  }
 
   // Normally, pthread stacks on AIX live in the data segment (are allocated with malloc()
   // by the pthread library). In rare cases, this may not be the case, e.g. when third-party
@@ -864,6 +871,14 @@
   // Calculate stack size if it's not specified by caller.
   size_t stack_size = os::Posix::get_initial_stack_size(thr_type, req_stack_size);
 
+  // JDK-8187028: It was observed that on some configurations (4K backed thread stacks)
+  // the real thread stack size may be smaller than the requested stack size, by as much as 64K.
+  // This very much looks like a pthread lib error. As a workaround, increase the stack size
+  // by 64K for small thread stacks (arbitrarily choosen to be < 4MB)
+  if (stack_size < 4096 * K) {
+    stack_size += 64 * K;
+  }
+
   // On Aix, pthread_attr_setstacksize fails with huge values and leaves the
   // thread size in attr unchanged. If this is the minimal stack size as set
   // by pthread_attr_init this leads to crashes after thread creation. E.g. the
@@ -3443,8 +3458,6 @@
 
   init_random(1234567);
 
-  ThreadCritical::initialize();
-
   // Main_thread points to the aboriginal thread.
   Aix::_main_thread = pthread_self();

--- a/src/hotspot/os/aix/threadCritical_aix.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/aix/threadCritical_aix.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -38,12 +38,6 @@
 static pthread_mutex_t       tc_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int                   tc_count = 0;
 
-void ThreadCritical::initialize() {
-}
-
-void ThreadCritical::release() {
-}
-
 ThreadCritical::ThreadCritical() {
   pthread_t self = pthread_self();
   if (self != tc_owner) {

--- a/src/hotspot/os/bsd/os_bsd.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/bsd/os_bsd.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -3353,8 +3353,6 @@
 
   init_random(1234567);
 
-  ThreadCritical::initialize();
-
   Bsd::set_page_size(getpagesize());
   if (Bsd::page_size() == -1) {
     fatal("os_bsd.cpp: os::init: sysconf failed (%s)", os::strerror(errno));

--- a/src/hotspot/os/bsd/threadCritical_bsd.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/bsd/threadCritical_bsd.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -37,12 +37,6 @@
 static pthread_mutex_t       tc_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int                   tc_count = 0;
 
-void ThreadCritical::initialize() {
-}
-
-void ThreadCritical::release() {
-}
-
 ThreadCritical::ThreadCritical() {
   pthread_t self = pthread_self();
   if (self != tc_owner) {

--- a/src/hotspot/os/linux/os_linux.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/linux/os_linux.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -4768,8 +4768,6 @@
 
   init_random(1234567);
 
-  ThreadCritical::initialize();
-
   Linux::set_page_size(sysconf(_SC_PAGESIZE));
   if (Linux::page_size() == -1) {
     fatal("os_linux.cpp: os::init: sysconf failed (%s)",

--- a/src/hotspot/os/linux/threadCritical_linux.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/linux/threadCritical_linux.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -37,12 +37,6 @@
 static pthread_mutex_t       tc_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int                   tc_count = 0;
 
-void ThreadCritical::initialize() {
-}
-
-void ThreadCritical::release() {
-}
-
 ThreadCritical::ThreadCritical() {
   pthread_t self = pthread_self();
   if (self != tc_owner) {

--- a/src/hotspot/os/posix/os_posix.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/posix/os_posix.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1770,6 +1770,12 @@
 
   if (v == 0) { // Do this the hard way by blocking ...
     struct timespec abst;
+    // We have to watch for overflow when converting millis to nanos,
+    // but if millis is that large then we will end up limiting to
+    // MAX_SECS anyway, so just do that here.
+    if (millis / MILLIUNITS > MAX_SECS) {
+      millis = jlong(MAX_SECS) * MILLIUNITS;
+    }
     to_abstime(&abst, millis * (NANOUNITS / MILLIUNITS), false);
 
     int ret = OS_TIMEOUT;

--- a/src/hotspot/os/solaris/os_solaris.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/solaris/os_solaris.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -4076,6 +4076,7 @@
 int_fnP_cond_tP_i_vP os::Solaris::_cond_init;
 int_fnP_cond_tP os::Solaris::_cond_destroy;
 int os::Solaris::_cond_scope = USYNC_THREAD;
+bool os::Solaris::_synchronization_initialized;
 
 void os::Solaris::synchronization_init() {
   if (UseLWPSynchronization) {
@@ -4125,6 +4126,7 @@
       os::Solaris::set_cond_destroy(::cond_destroy);
     }
   }
+  _synchronization_initialized = true;
 }
 
 bool os::Solaris::liblgrp_init() {
@@ -4198,9 +4200,6 @@
     dladdr1_func = CAST_TO_FN_PTR(dladdr1_func_type, dlsym(hdl, "dladdr1"));
   }
 
-  // (Solaris only) this switches to calls that actually do locking.
-  ThreadCritical::initialize();
-
   main_thread = thr_self();
 
   // dynamic lookup of functions that may not be available in our lowest

--- a/src/hotspot/os/solaris/os_solaris.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/solaris/os_solaris.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -65,6 +65,8 @@
   static int_fnP_cond_tP _cond_destroy;
   static int _cond_scope;
 
+  static bool _synchronization_initialized;
+
   typedef uintptr_t       lgrp_cookie_t;
   typedef id_t            lgrp_id_t;
   typedef int             lgrp_rsrc_t;
@@ -227,6 +229,8 @@
   static void set_cond_destroy(int_fnP_cond_tP func)       { _cond_destroy = func; }
   static void set_cond_scope(int scope)                    { _cond_scope = scope; }
 
+  static bool synchronization_initialized()                { return _synchronization_initialized; }
+
   static void set_lgrp_home(lgrp_home_func_t func) { _lgrp_home = func; }
   static void set_lgrp_init(lgrp_init_func_t func) { _lgrp_init = func; }
   static void set_lgrp_fini(lgrp_fini_func_t func) { _lgrp_fini = func; }

--- a/src/hotspot/os/solaris/threadCritical_solaris.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/solaris/threadCritical_solaris.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -42,10 +42,9 @@
 static  mutex_t  global_mut;
 static  thread_t global_mut_owner = -1;
 static  int      global_mut_count = 0;
-static  bool     initialized = false;
 
 ThreadCritical::ThreadCritical() {
-  if (initialized) {
+  if (os::Solaris::synchronization_initialized()) {
     thread_t owner = thr_self();
     if (global_mut_owner != owner) {
       if (os::Solaris::mutex_lock(&global_mut))
@@ -62,7 +61,7 @@
 }
 
 ThreadCritical::~ThreadCritical() {
-  if (initialized) {
+  if (os::Solaris::synchronization_initialized()) {
     assert(global_mut_owner == thr_self(), "must have correct owner");
     assert(global_mut_count > 0, "must have correct count");
     --global_mut_count;
@@ -75,12 +74,3 @@
     assert (Threads::number_of_threads() == 0, "valid only during initialization");
   }
 }
-
-void ThreadCritical::initialize() {
-  // This method is called at the end of os::init(). Until
-  // then, we don't do real locking.
-  initialized = true;
-}
-
-void ThreadCritical::release() {
-}

--- a/src/hotspot/os/windows/os_windows.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/windows/os_windows.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -428,7 +428,7 @@
   // When the VMThread gets here, the main thread may have already exited
   // which frees the CodeHeap containing the Atomic::add code
   if (thread != VMThread::vm_thread() && VMThread::vm_thread() != NULL) {
-    Atomic::dec_ptr((intptr_t*)&os::win32::_os_thread_count);
+    Atomic::dec(&os::win32::_os_thread_count);
   }
 
   // If a thread has not deleted itself ("delete this") as part of its
@@ -634,7 +634,7 @@
     return NULL;
   }
 
-  Atomic::inc_ptr((intptr_t*)&os::win32::_os_thread_count);
+  Atomic::inc(&os::win32::_os_thread_count);
 
   // Store info on the Win32 thread into the OSThread
   osthread->set_thread_handle(thread_handle);

--- a/src/hotspot/os/windows/threadCritical_windows.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os/windows/threadCritical_windows.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -51,16 +51,6 @@
 // and found them ~30 times slower than the critical region code.
 //
 
-void ThreadCritical::initialize() {
-}
-
-void ThreadCritical::release() {
-  assert(lock_owner == -1, "Mutex being deleted while owned.");
-  assert(lock_count == -1, "Mutex being deleted while recursively locked");
-  assert(lock_event != NULL, "Sanity check");
-  CloseHandle(lock_event);
-}
-
 ThreadCritical::ThreadCritical() {
   DWORD current_thread = GetCurrentThreadId();

--- a/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -34,22 +34,6 @@
 
 // Implementation of class atomic
 
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
 //
 //   machine barrier instructions:
 //
@@ -148,90 +132,15 @@
   return result;
 }
 
-
-inline void Atomic::inc    (volatile jint*     dest) {
-
-  unsigned int temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: lwarx   %0,  0, %2    \n"
-    "   addic   %0, %0,  1    \n"
-    "   stwcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-
-  long temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: ldarx   %0,  0, %2    \n"
-    "   addic   %0, %0,  1    \n"
-    "   stdcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-
-inline void Atomic::dec    (volatile jint*     dest) {
-
-  unsigned int temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: lwarx   %0,  0, %2    \n"
-    "   addic   %0, %0, -1    \n"
-    "   stwcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-
-  long temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: ldarx   %0,  0, %2    \n"
-    "   addic   %0, %0, -1    \n"
-    "   stdcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
-inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
-
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
   // Note that xchg_ptr doesn't necessarily do an acquire
   // (see synchronizer.cpp).
 
-  unsigned int old_value;
+  T old_value;
   const uint64_t zero = 0;
 
   __asm__ __volatile__ (
@@ -259,15 +168,18 @@
       "memory"
     );
 
-  return (jint) old_value;
+  return old_value;
 }
 
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
   // Note that xchg_ptr doesn't necessarily do an acquire
   // (see synchronizer.cpp).
 
-  long old_value;
+  T old_value;
   const uint64_t zero = 0;
 
   __asm__ __volatile__ (
@@ -295,11 +207,7 @@
       "memory"
     );
 
-  return (intptr_t) old_value;
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
-  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+  return old_value;
 }
 
 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {

--- a/src/hotspot/os_cpu/aix_ppc/orderAccess_aix_ppc.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/aix_ppc/orderAccess_aix_ppc.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -78,16 +78,17 @@
 inline void OrderAccess::release()    { inlasm_lwsync(); }
 inline void OrderAccess::fence()      { inlasm_sync();   }
 
-template<> inline jbyte  OrderAccess::specialized_load_acquire<jbyte> (const volatile jbyte*  p) { register jbyte t = load(p);  inlasm_acquire_reg(t); return t; }
-template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) { register jshort t = load(p); inlasm_acquire_reg(t); return t; }
-template<> inline jint   OrderAccess::specialized_load_acquire<jint>  (const volatile jint*   p) { register jint t = load(p);   inlasm_acquire_reg(t); return t; }
-template<> inline jlong  OrderAccess::specialized_load_acquire<jlong> (const volatile jlong*  p) { register jlong t = load(p);  inlasm_acquire_reg(t); return t; }
+template<size_t byte_size>
+struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  T operator()(const volatile T* p) const { register T t = Atomic::load(p); inlasm_acquire_reg(t); return t; }
+};
 
 #undef inlasm_sync
 #undef inlasm_lwsync
 #undef inlasm_eieio
 #undef inlasm_isync
 
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
 #endif // OS_CPU_AIX_OJDKPPC_VM_ORDERACCESS_AIX_PPC_INLINE_HPP

--- a/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -27,19 +27,6 @@
 
 // Implementation of class atomic
 
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
-
 template<size_t byte_size>
 struct Atomic::PlatformAdd
   : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
@@ -61,25 +48,11 @@
   return old_value;
 }
 
-inline void Atomic::inc    (volatile jint*     dest) {
-  __asm__ volatile (  "lock addl $1,(%0)" :
-                    : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-inline void Atomic::dec    (volatile jint*     dest) {
-  __asm__ volatile (  "lock subl $1,(%0)" :
-                    : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
-inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
   __asm__ volatile (  "xchgl (%2),%0"
                     : "=r" (exchange_value)
                     : "0" (exchange_value), "r" (dest)
@@ -87,10 +60,6 @@
   return exchange_value;
 }
 
-inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
-  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
-}
-
 template<>
 template<typename T>
 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
@@ -120,9 +89,6 @@
 }
 
 #ifdef AMD64
-inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
-
 template<>
 template<typename I, typename D>
 inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) const {
@@ -136,21 +102,11 @@
   return old_value;
 }
 
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  __asm__ __volatile__ (  "lock addq $1,(%0)"
-                        :
-                        : "r" (dest)
-                        : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  __asm__ __volatile__ (  "lock subq $1,(%0)"
-                        :
-                        : "r" (dest)
-                        : "cc", "memory");
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
   __asm__ __volatile__ ("xchgq (%2),%0"
                         : "=r" (exchange_value)
                         : "0" (exchange_value), "r" (dest)
@@ -172,22 +128,8 @@
   return exchange_value;
 }
 
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
 #else // !AMD64
 
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  inc((volatile jint*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  dec((volatile jint*)dest);
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-  return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
-}
-
 extern "C" {
   // defined in bsd_x86.s
   jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong, bool);
@@ -204,18 +146,21 @@
   return cmpxchg_using_helper<jlong>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
 }
 
-inline jlong Atomic::load(const volatile jlong* src) {
+template<>
+template<typename T>
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+  STATIC_ASSERT(8 == sizeof(T));
   volatile jlong dest;
-  _Atomic_move_long(src, &dest);
-  return dest;
+  _Atomic_move_long(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
+  return PrimitiveConversions::cast<T>(dest);
 }
 
-inline void Atomic::store(jlong store_value, jlong* dest) {
-  _Atomic_move_long((volatile jlong*)&store_value, (volatile jlong*)dest);
-}
-
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
-  _Atomic_move_long((volatile jlong*)&store_value, dest);
+template<>
+template<typename T>
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+                                                 T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  _Atomic_move_long(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
 }
 
 #endif // AMD64

--- a/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -64,46 +64,57 @@
 }
 
 template<>
-inline void OrderAccess::specialized_release_store_fence<jbyte> (volatile jbyte*  p, jbyte  v) {
-  __asm__ volatile (  "xchgb (%2),%0"
-                    : "=q" (v)
-                    : "0" (v), "r" (p)
-                    : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile (  "xchgb (%2),%0"
+                      : "=q" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+
 template<>
-inline void OrderAccess::specialized_release_store_fence<jshort>(volatile jshort* p, jshort v) {
-  __asm__ volatile (  "xchgw (%2),%0"
-                    : "=r" (v)
-                    : "0" (v), "r" (p)
-                    : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile (  "xchgw (%2),%0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+
 template<>
-inline void OrderAccess::specialized_release_store_fence<jint>  (volatile jint*   p, jint   v) {
-  __asm__ volatile (  "xchgl (%2),%0"
-                    : "=r" (v)
-                    : "0" (v), "r" (p)
-                    : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile (  "xchgl (%2),%0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
 
 #ifdef AMD64
 template<>
-inline void OrderAccess::specialized_release_store_fence<jlong> (volatile jlong*  p, jlong  v) {
-  __asm__ volatile (  "xchgq (%2), %0"
-                    : "=r" (v)
-                    : "0" (v), "r" (p)
-                    : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile (  "xchgq (%2), %0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
 #endif // AMD64
 
-template<>
-inline void OrderAccess::specialized_release_store_fence<jfloat> (volatile jfloat*  p, jfloat  v) {
-  release_store_fence((volatile jint*)p, jint_cast(v));
-}
-template<>
-inline void OrderAccess::specialized_release_store_fence<jdouble>(volatile jdouble* p, jdouble v) {
-  release_store_fence((volatile jlong*)p, jlong_cast(v));
-}
-
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
 #endif // OS_CPU_BSD_X86_VM_ORDERACCESS_BSD_X86_INLINE_HPP

--- a/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -87,7 +87,7 @@
 
 /* Atomically write VALUE into `*PTR' and returns the previous
    contents of `*PTR'.  */
-static inline int m68k_lock_test_and_set(volatile int *ptr, int newval) {
+static inline int m68k_lock_test_and_set(int newval, volatile int *ptr) {
   for (;;) {
       // Loop until success.
       int prev = *ptr;
@@ -148,7 +148,7 @@
 
 /* Atomically write VALUE into `*PTR' and returns the previous
    contents of `*PTR'.  */
-static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
+static inline int arm_lock_test_and_set(int newval, volatile int *ptr) {
   for (;;) {
       // Loop until a __kernel_cmpxchg succeeds.
       int prev = *ptr;
@@ -159,20 +159,6 @@
 }
 #endif // ARM
 
-inline void Atomic::store(jint store_value, volatile jint* dest) {
-#if !defined(ARM) && !defined(M68K)
-  __sync_synchronize();
-#endif
-  *dest = store_value;
-}
-
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) {
-#if !defined(ARM) && !defined(M68K)
-  __sync_synchronize();
-#endif
-  *dest = store_value;
-}
-
 template<size_t byte_size>
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
@@ -207,42 +193,22 @@
   return __sync_add_and_fetch(dest, add_value);
 }
 
-inline void Atomic::inc(volatile jint* dest) {
-  add(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  add_ptr(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
-  add_ptr(1, dest);
-}
-
-inline void Atomic::dec(volatile jint* dest) {
-  add(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  add_ptr(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
-  add_ptr(-1, dest);
-}
-
-inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
 #ifdef ARM
-  return arm_lock_test_and_set(dest, exchange_value);
+  return xchg_using_helper<int>(arm_lock_test_and_set, exchange_value, dest);
 #else
 #ifdef M68K
-  return m68k_lock_test_and_set(dest, exchange_value);
+  return xchg_using_helper<int>(m68k_lock_test_and_set, exchange_value, dest);
 #else
   // __sync_lock_test_and_set is a bizarrely named atomic exchange
   // operation.  Note that some platforms only support this with the
   // limitation that the only valid value to store is the immediate
   // constant 1.  There is a test for this in JNI_CreateJavaVM().
-  jint result = __sync_lock_test_and_set (dest, exchange_value);
+  T result = __sync_lock_test_and_set (dest, exchange_value);
   // All atomic operations are expected to be full memory barriers
   // (see atomic.hpp). However, __sync_lock_test_and_set is not
   // a full memory barrier, but an acquire barrier. Hence, this added
@@ -253,24 +219,14 @@
 #endif // ARM
 }
 
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value,
-                                 volatile intptr_t* dest) {
-#ifdef ARM
-  return arm_lock_test_and_set(dest, exchange_value);
-#else
-#ifdef M68K
-  return m68k_lock_test_and_set(dest, exchange_value);
-#else
-  intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  T result = __sync_lock_test_and_set (dest, exchange_value);
   __sync_synchronize();
   return result;
-#endif // M68K
-#endif // ARM
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
-  return (void *) xchg_ptr((intptr_t) exchange_value,
-                           (volatile intptr_t*) dest);
 }
 
 // No direct support for cmpxchg of bytes; emulate using int.
@@ -305,18 +261,21 @@
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 }
 
-inline jlong Atomic::load(const volatile jlong* src) {
+template<>
+template<typename T>
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+  STATIC_ASSERT(8 == sizeof(T));
   volatile jlong dest;
-  os::atomic_copy64(src, &dest);
-  return dest;
+  os::atomic_copy64(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
+  return PrimitiveConversions::cast<T>(dest);
 }
 
-inline void Atomic::store(jlong store_value, jlong* dest) {
-  os::atomic_copy64((volatile jlong*)&store_value, (volatile jlong*)dest);
-}
-
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
-  os::atomic_copy64((volatile jlong*)&store_value, dest);
+template<>
+template<typename T>
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+                                                 T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  os::atomic_copy64(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
 }
 
 #endif // OS_CPU_BSD_ZERO_VM_ATOMIC_BSD_ZERO_HPP

--- a/src/hotspot/os_cpu/bsd_zero/orderAccess_bsd_zero.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/bsd_zero/orderAccess_bsd_zero.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007, 2008, 2009 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -74,6 +74,4 @@
 inline void OrderAccess::release()    { LIGHT_MEM_BARRIER; }
 inline void OrderAccess::fence()      { FULL_MEM_BARRIER;  }
 
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
 #endif // OS_CPU_BSD_ZERO_VM_ORDERACCESS_BSD_ZERO_INLINE_HPP

--- a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -34,19 +34,6 @@
 #define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
 #define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
 
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
-
 template<size_t byte_size>
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
@@ -57,39 +44,16 @@
   }
 };
 
-inline void Atomic::inc(volatile jint* dest)
-{
- add(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest)
-{
- add_ptr(1, dest);
-}
-
-inline void Atomic::dec (volatile jint* dest)
-{
- add(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest)
-{
- add_ptr(-1, dest);
-}
-
-inline jint Atomic::xchg (jint exchange_value, volatile jint* dest)
-{
-  jint res = __sync_lock_test_and_set (dest, exchange_value);
+template<size_t byte_size>
+template<typename T>
+inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
+                                                     T volatile* dest) const {
+  STATIC_ASSERT(byte_size == sizeof(T));
+  T res = __sync_lock_test_and_set(dest, exchange_value);
   FULL_MEM_BARRIER;
   return res;
 }
 
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest)
-{
-  return (void *) xchg_ptr((intptr_t) exchange_value,
-                           (volatile intptr_t*) dest);
-}
-
 template<size_t byte_size>
 template<typename T>
 inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
@@ -107,26 +71,4 @@
   }
 }
 
-inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest)
-{
- add_ptr(1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest)
-{
- add_ptr(-1, dest);
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
-{
-  intptr_t res = __sync_lock_test_and_set (dest, exchange_value);
-  FULL_MEM_BARRIER;
-  return res;
-}
-
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
 #endif // OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_HPP

--- a/src/hotspot/os_cpu/linux_aarch64/orderAccess_linux_aarch64.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_aarch64/orderAccess_linux_aarch64.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -50,93 +50,28 @@
   FULL_MEM_BARRIER;
 }
 
-inline jbyte    OrderAccess::load_acquire(const volatile jbyte*   p)
-{ jbyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jshort   OrderAccess::load_acquire(const volatile jshort*  p)
-{ jshort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jint     OrderAccess::load_acquire(const volatile jint*    p)
-{ jint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jlong    OrderAccess::load_acquire(const volatile jlong*   p)
-{ jlong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jubyte    OrderAccess::load_acquire(const volatile jubyte*   p)
-{ jubyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jushort   OrderAccess::load_acquire(const volatile jushort*  p)
-{ jushort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline juint     OrderAccess::load_acquire(const volatile juint*    p)
-{ juint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline julong   OrderAccess::load_acquire(const volatile julong*  p)
-{ julong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jfloat   OrderAccess::load_acquire(const volatile jfloat*  p)
-{ jfloat data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jdouble  OrderAccess::load_acquire(const volatile jdouble* p)
-{ jdouble data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline intptr_t OrderAccess::load_ptr_acquire(const volatile intptr_t*   p)
-{ intptr_t data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline void*    OrderAccess::load_ptr_acquire(const volatile void* p)
-{ void* data; __atomic_load((void* const volatile *)p, &data, __ATOMIC_ACQUIRE); return data; }
+template<size_t byte_size>
+struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+};
 
-inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store(volatile jint*    p, jint    v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store(volatile jushort* p, jushort v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store(volatile juint*   p, juint   v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store(volatile julong*  p, julong  v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v)
-{ __atomic_store((void* volatile *)p, &v, __ATOMIC_RELEASE); }
+template<size_t byte_size>
+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); }
+};
 
-inline void     OrderAccess::store_fence(jbyte*   p, jbyte   v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_fence(jshort*  p, jshort  v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_fence(jint*    p, jint    v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_fence(jlong*   p, jlong   v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_fence(jushort* p, jushort v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_fence(juint*   p, juint   v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_fence(julong*  p, julong  v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_fence(jdouble* p, jdouble v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void     OrderAccess::store_ptr_fence(void**    p, void*    v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-
-inline void     OrderAccess::release_store_fence(volatile jbyte*   p, jbyte   v) { release_store(p, v); fence(); }
-inline void     OrderAccess::release_store_fence(volatile jshort*  p, jshort  v) { release_store(p, v); fence(); }
-inline void     OrderAccess::release_store_fence(volatile jint*    p, jint    v) { release_store(p, v); fence(); }
-inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) { release_store(p, v); fence(); }
-inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { release_store(p, v); fence(); }
-inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); }
-inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { release_store(p, v); fence(); }
-inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { release_store(p, v); fence(); }
-inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { release_store(p, v); fence(); }
-inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); }
-
-inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); }
-inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { release_store_ptr(p, v); fence(); }
+template<size_t byte_size>
+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const { release_store(p, v); fence(); }
+};
 
 #endif // OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP

--- a/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -44,39 +44,24 @@
  * kernel source or kernel_user_helpers.txt in Linux Doc.
  */
 
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
-inline jlong Atomic::load (const volatile jlong* src) {
-  assert(((intx)src & (sizeof(jlong)-1)) == 0, "Atomic load jlong mis-aligned");
-#ifdef AARCH64
-  return *src;
-#else
-  return (*os::atomic_load_long_func)(src);
-#endif
+#ifndef AARCH64
+template<>
+template<typename T>
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  return PrimitiveConversions::cast<T>(
+    (*os::atomic_load_long_func)(reinterpret_cast<const volatile jlong*>(src)));
 }
 
-inline void Atomic::store (jlong value, volatile jlong* dest) {
-  assert(((intx)dest & (sizeof(jlong)-1)) == 0, "Atomic store jlong mis-aligned");
-#ifdef AARCH64
-  *dest = value;
-#else
-  (*os::atomic_store_long_func)(value, dest);
+template<>
+template<typename T>
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+                                                 T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  (*os::atomic_store_long_func)(
+    PrimitiveConversions::cast<jlong>(store_value), reinterpret_cast<volatile jlong*>(dest));
+}
 #endif
-}
-
-inline void Atomic::store (jlong value, jlong* dest) {
-  store(value, (volatile jlong*)dest);
-}
 
 // As per atomic.hpp all read-modify-write operations have to provide two-way
 // barriers semantics. For AARCH64 we are using load-acquire-with-reservation and
@@ -122,14 +107,6 @@
 #endif
 }
 
-inline void Atomic::inc(volatile jint* dest) {
-  Atomic::add(1, (volatile jint *)dest);
-}
-
-inline void Atomic::dec(volatile jint* dest) {
-  Atomic::add(-1, (volatile jint *)dest);
-}
-
 #ifdef AARCH64
 template<>
 template<typename I, typename D>
@@ -149,28 +126,15 @@
     : "memory");
   return val;
 }
-#endif // AARCH64
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  Atomic::add_ptr(1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  Atomic::add_ptr(-1, dest);
-}
+#endif
 
-inline void Atomic::inc_ptr(volatile void* dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
-
-inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
 #ifdef AARCH64
-  jint old_val;
+  T old_val;
   int tmp;
   __asm__ volatile(
     "1:\n\t"
@@ -182,13 +146,17 @@
     : "memory");
   return old_val;
 #else
-  return (*os::atomic_xchg_func)(exchange_value, dest);
+  return xchg_using_helper<jint>(os::atomic_xchg_func, exchange_value, dest);
 #endif
 }
 
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
 #ifdef AARCH64
-  intptr_t old_val;
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  T old_val;
   int tmp;
   __asm__ volatile(
     "1:\n\t"
@@ -199,14 +167,8 @@
     : [new_val] "r" (exchange_value), [dest] "r" (dest)
     : "memory");
   return old_val;
-#else
-  return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
-#endif
 }
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
-  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
-}
+#endif // AARCH64
 
 // The memory_order parameter is ignored - we always provide the strongest/most-conservative ordering

--- a/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -33,7 +33,6 @@
 // - we define the high level barriers below and use the general
 //   implementation in orderAccess.inline.hpp, with customizations
 //   on AARCH64 via the specialized_* template functions
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
 
 // Memory Ordering on ARM is weak.
 //
@@ -131,91 +130,126 @@
 
 #ifdef AARCH64
 
-template<> inline jbyte    OrderAccess::specialized_load_acquire<jbyte>(const volatile jbyte*   p) {
-  volatile jbyte result;
-  __asm__ volatile(
-    "ldarb %w[res], [%[ptr]]"
-    : [res] "=&r" (result)
-    : [ptr] "r" (p)
-    : "memory");
-  return result;
-}
+template<>
+struct OrderAccess::PlatformOrderedLoad<1, X_ACQUIRE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  T operator()(const volatile T* p) const {
+    volatile T result;
+    __asm__ volatile(
+      "ldarb %w[res], [%[ptr]]"
+      : [res] "=&r" (result)
+      : [ptr] "r" (p)
+      : "memory");
+    return result;
+  }
+};
 
-template<> inline jshort   OrderAccess::specialized_load_acquire<jshort>(const volatile jshort*  p) {
-  volatile jshort result;
-  __asm__ volatile(
-    "ldarh %w[res], [%[ptr]]"
-    : [res] "=&r" (result)
-    : [ptr] "r" (p)
-    : "memory");
-  return result;
-}
+template<>
+struct OrderAccess::PlatformOrderedLoad<2, X_ACQUIRE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  T operator()(const volatile T* p) const {
+    volatile T result;
+    __asm__ volatile(
+      "ldarh %w[res], [%[ptr]]"
+      : [res] "=&r" (result)
+      : [ptr] "r" (p)
+      : "memory");
+    return result;
+  }
+};
 
-template<> inline jint     OrderAccess::specialized_load_acquire<jint>(const volatile jint*    p) {
-  volatile jint result;
-  __asm__ volatile(
-    "ldar %w[res], [%[ptr]]"
-    : [res] "=&r" (result)
-    : [ptr] "r" (p)
-    : "memory");
-  return result;
-}
-
-template<> inline jfloat   OrderAccess::specialized_load_acquire<jfloat>(const volatile jfloat*  p) {
-  return jfloat_cast(specialized_load_acquire((const volatile jint*)p));
-}
-
-// This is implicit as jlong and intptr_t are both "long int"
-//template<> inline jlong    OrderAccess::specialized_load_acquire(const volatile jlong*   p) {
-//  return (volatile jlong)specialized_load_acquire((const volatile intptr_t*)p);
-//}
+template<>
+struct OrderAccess::PlatformOrderedLoad<4, X_ACQUIRE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  T operator()(const volatile T* p) const {
+    volatile T result;
+    __asm__ volatile(
+      "ldar %w[res], [%[ptr]]"
+      : [res] "=&r" (result)
+      : [ptr] "r" (p)
+      : "memory");
+    return result;
+  }
+};
 
-template<> inline intptr_t OrderAccess::specialized_load_acquire<intptr_t>(const volatile intptr_t*   p) {
-  volatile intptr_t result;
-  __asm__ volatile(
-    "ldar %[res], [%[ptr]]"
-    : [res] "=&r" (result)
-    : [ptr] "r" (p)
-    : "memory");
-  return result;
-}
+template<>
+struct OrderAccess::PlatformOrderedLoad<8, X_ACQUIRE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  T operator()(const volatile T* p) const {
+    volatile T result;
+    __asm__ volatile(
+      "ldar %[res], [%[ptr]]"
+      : [res] "=&r" (result)
+      : [ptr] "r" (p)
+      : "memory");
+    return result;
+  }
+};
 
-template<> inline jdouble  OrderAccess::specialized_load_acquire<jdouble>(const volatile jdouble* p) {
-  return jdouble_cast(specialized_load_acquire((const volatile intptr_t*)p));
-}
-
-
-template<> inline void     OrderAccess::specialized_release_store<jbyte>(volatile jbyte*   p, jbyte   v) {
-  __asm__ volatile(
-    "stlrb %w[val], [%[ptr]]"
-    :
-    : [ptr] "r" (p), [val] "r" (v)
-    : "memory");
-}
+template<>
+struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile(
+      "stlrb %w[val], [%[ptr]]"
+      :
+      : [ptr] "r" (p), [val] "r" (v)
+      : "memory");
+  }
+};
 
-template<> inline void     OrderAccess::specialized_release_store<jshort>(volatile jshort*  p, jshort  v) {
-  __asm__ volatile(
-    "stlrh %w[val], [%[ptr]]"
-    :
-    : [ptr] "r" (p), [val] "r" (v)
-    : "memory");
-}
+template<>
+struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile(
+      "stlrh %w[val], [%[ptr]]"
+      :
+      : [ptr] "r" (p), [val] "r" (v)
+      : "memory");
+  }
+};
 
-template<> inline void     OrderAccess::specialized_release_store<jint>(volatile jint*    p, jint    v) {
-  __asm__ volatile(
-    "stlr %w[val], [%[ptr]]"
-    :
-    : [ptr] "r" (p), [val] "r" (v)
-    : "memory");
-}
+template<>
+struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile(
+      "stlr %w[val], [%[ptr]]"
+      :
+      : [ptr] "r" (p), [val] "r" (v)
+      : "memory");
+  }
+};
 
-template<> inline void     OrderAccess::specialized_release_store<jlong>(volatile jlong*   p, jlong   v) {
-  __asm__ volatile(
-    "stlr %[val], [%[ptr]]"
-    :
-    : [ptr] "r" (p), [val] "r" (v)
-    : "memory");
-}
+template<>
+struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile(
+      "stlr %[val], [%[ptr]]"
+      :
+      : [ptr] "r" (p), [val] "r" (v)
+      : "memory");
+  }
+};
+
 #endif // AARCH64
 
 #endif // OS_CPU_LINUX_ARM_VM_ORDERACCESS_LINUX_ARM_INLINE_HPP

--- a/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -32,22 +32,6 @@
 
 // Implementation of class atomic
 
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
 //
 // machine barrier instructions:
 //
@@ -146,90 +130,14 @@
   return result;
 }
 
-
-inline void Atomic::inc    (volatile jint*     dest) {
-
-  unsigned int temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: lwarx   %0,  0, %2    \n"
-    "   addic   %0, %0,  1    \n"
-    "   stwcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-
-  long temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: ldarx   %0,  0, %2    \n"
-    "   addic   %0, %0,  1    \n"
-    "   stdcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-
-inline void Atomic::dec    (volatile jint*     dest) {
-
-  unsigned int temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: lwarx   %0,  0, %2    \n"
-    "   addic   %0, %0, -1    \n"
-    "   stwcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-
-  long temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: ldarx   %0,  0, %2    \n"
-    "   addic   %0, %0, -1    \n"
-    "   stdcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
-inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
-
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
   // Note that xchg_ptr doesn't necessarily do an acquire
   // (see synchronizer.cpp).
 
-  unsigned int old_value;
+  T old_value;
   const uint64_t zero = 0;
 
   __asm__ __volatile__ (
@@ -257,15 +165,18 @@
       "memory"
     );
 
-  return (jint) old_value;
+  return old_value;
 }
 
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
   // Note that xchg_ptr doesn't necessarily do an acquire
   // (see synchronizer.cpp).
 
-  long old_value;
+  T old_value;
   const uint64_t zero = 0;
 
   __asm__ __volatile__ (
@@ -293,11 +204,7 @@
       "memory"
     );
 
-  return (intptr_t) old_value;
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
-  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+  return old_value;
 }
 
 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {

--- a/src/hotspot/os_cpu/linux_ppc/orderAccess_linux_ppc.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_ppc/orderAccess_linux_ppc.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -80,10 +80,14 @@
 inline void   OrderAccess::release()    { inlasm_lwsync(); }
 inline void   OrderAccess::fence()      { inlasm_sync();   }
 
-template<> inline jbyte  OrderAccess::specialized_load_acquire<jbyte> (const volatile jbyte*  p) { register jbyte t = load(p);  inlasm_acquire_reg(t); return t; }
-template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) { register jshort t = load(p); inlasm_acquire_reg(t); return t; }
-template<> inline jint   OrderAccess::specialized_load_acquire<jint>  (const volatile jint*   p) { register jint t = load(p);   inlasm_acquire_reg(t); return t; }
-template<> inline jlong  OrderAccess::specialized_load_acquire<jlong> (const volatile jlong*  p) { register jlong t = load(p);  inlasm_acquire_reg(t); return t; }
+
+template<size_t byte_size>
+struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  T operator()(const volatile T* p) const { register T t = Atomic::load(p); inlasm_acquire_reg(t); return t; }
+};
 
 #undef inlasm_sync
 #undef inlasm_lwsync
@@ -91,6 +95,4 @@
 #undef inlasm_isync
 #undef inlasm_acquire_reg
 
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
 #endif // OS_CPU_LINUX_PPC_VM_ORDERACCESS_LINUX_PPC_INLINE_HPP

--- a/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -53,20 +53,6 @@
 // is an integer multiple of the data length. Furthermore, all stores are ordered:
 // a store which occurs conceptually before another store becomes visible to other CPUs
 // before the other store becomes visible.
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
 
 //------------
 // Atomic::add
@@ -192,219 +178,6 @@
 }
 
 
-//------------
-// Atomic::inc
-//------------
-// These methods force the value in memory to be incremented (augmented by 1).
-// Both, memory value and increment, are treated as 32bit signed binary integers.
-// No overflow exceptions are recognized, and the condition code does not hold
-// information about the value in memory.
-//
-// The value in memory is updated by using a compare-and-swap instruction. The
-// instruction is retried as often as required.
-
-inline void Atomic::inc(volatile jint* dest) {
-  unsigned int old, upd;
-
-  if (VM_Version::has_LoadAndALUAtomicV1()) {
-//  tty->print_cr("Atomic::inc     called... dest @%p", dest);
-    __asm__ __volatile__ (
-      "   LGHI     2,1                     \n\t" // load increment
-      "   LA       3,%[mem]                \n\t" // force data address into ARG2
-//    "   LAA      %[upd],%[inc],%[mem]    \n\t" // increment and get old value
-//    "   LAA      2,2,0(3)                \n\t" // actually coded instruction
-      "   .byte    0xeb                    \n\t" // LAA main opcode
-      "   .byte    0x22                    \n\t" // R1,R3
-      "   .byte    0x30                    \n\t" // R2,disp1
-      "   .byte    0x00                    \n\t" // disp2,disp3
-      "   .byte    0x00                    \n\t" // disp4,disp5
-      "   .byte    0xf8                    \n\t" // LAA minor opcode
-      "   AGHI     2,1                     \n\t" // calc new value in register
-      "   LR       %[upd],2                \n\t" // move to result register
-      //---<  outputs  >---
-      : [upd]  "=&d" (upd)    // write-only, updated counter value
-      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-//    : [inc]  "a"   (inc)    // read-only.
-      //---<  clobbered  >---
-      : "cc", "r2", "r3", "memory"
-    );
-  } else {
-    __asm__ __volatile__ (
-      "   LLGF     %[old],%[mem]           \n\t" // get old value
-      "0: LA       %[upd],1(,%[old])       \n\t" // calc result
-      "   CS       %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
-      "   JNE      0b                      \n\t" // no success? -> retry
-      //---<  outputs  >---
-      : [old] "=&a" (old)    // write-only, old counter value
-      , [upd] "=&d" (upd)    // write-only, updated counter value
-      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-      //---<  clobbered  >---
-      : "cc", "memory"
-    );
-  }
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  unsigned long old, upd;
-
-  if (VM_Version::has_LoadAndALUAtomicV1()) {
-    __asm__ __volatile__ (
-      "   LGHI     2,1                     \n\t" // load increment
-      "   LA       3,%[mem]                \n\t" // force data address into ARG2
-//    "   LAAG     %[upd],%[inc],%[mem]    \n\t" // increment and get old value
-//    "   LAAG     2,2,0(3)                \n\t" // actually coded instruction
-      "   .byte    0xeb                    \n\t" // LAA main opcode
-      "   .byte    0x22                    \n\t" // R1,R3
-      "   .byte    0x30                    \n\t" // R2,disp1
-      "   .byte    0x00                    \n\t" // disp2,disp3
-      "   .byte    0x00                    \n\t" // disp4,disp5
-      "   .byte    0xe8                    \n\t" // LAA minor opcode
-      "   AGHI     2,1                     \n\t" // calc new value in register
-      "   LR       %[upd],2                \n\t" // move to result register
-      //---<  outputs  >---
-      : [upd]  "=&d" (upd)    // write-only, updated counter value
-      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-//    : [inc]  "a"   (inc)    // read-only.
-      //---<  clobbered  >---
-      : "cc", "r2", "r3", "memory"
-    );
-  } else {
-    __asm__ __volatile__ (
-      "   LG       %[old],%[mem]           \n\t" // get old value
-      "0: LA       %[upd],1(,%[old])       \n\t" // calc result
-      "   CSG      %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
-      "   JNE      0b                      \n\t" // no success? -> retry
-      //---<  outputs  >---
-      : [old] "=&a" (old)    // write-only, old counter value
-      , [upd] "=&d" (upd)    // write-only, updated counter value
-      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-      //---<  clobbered  >---
-      : "cc", "memory"
-    );
-  }
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-//------------
-// Atomic::dec
-//------------
-// These methods force the value in memory to be decremented (augmented by -1).
-// Both, memory value and decrement, are treated as 32bit signed binary integers.
-// No overflow exceptions are recognized, and the condition code does not hold
-// information about the value in memory.
-//
-// The value in memory is updated by using a compare-and-swap instruction. The
-// instruction is retried as often as required.
-
-inline void Atomic::dec(volatile jint* dest) {
-  unsigned int old, upd;
-
-  if (VM_Version::has_LoadAndALUAtomicV1()) {
-    __asm__ __volatile__ (
-      "   LGHI     2,-1                    \n\t" // load increment
-      "   LA       3,%[mem]                \n\t" // force data address into ARG2
-//    "   LAA      %[upd],%[inc],%[mem]    \n\t" // increment and get old value
-//    "   LAA      2,2,0(3)                \n\t" // actually coded instruction
-      "   .byte    0xeb                    \n\t" // LAA main opcode
-      "   .byte    0x22                    \n\t" // R1,R3
-      "   .byte    0x30                    \n\t" // R2,disp1
-      "   .byte    0x00                    \n\t" // disp2,disp3
-      "   .byte    0x00                    \n\t" // disp4,disp5
-      "   .byte    0xf8                    \n\t" // LAA minor opcode
-      "   AGHI     2,-1                    \n\t" // calc new value in register
-      "   LR       %[upd],2                \n\t" // move to result register
-      //---<  outputs  >---
-      : [upd]  "=&d" (upd)    // write-only, updated counter value
-      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-//    : [inc]  "a"   (inc)    // read-only.
-      //---<  clobbered  >---
-      : "cc", "r2", "r3", "memory"
-    );
-  } else {
-    __asm__ __volatile__ (
-      "   LLGF     %[old],%[mem]           \n\t" // get old value
-  // LAY not supported by inline assembler
-  //  "0: LAY      %[upd],-1(,%[old])      \n\t" // calc result
-      "0: LR       %[upd],%[old]           \n\t" // calc result
-      "   AHI      %[upd],-1               \n\t"
-      "   CS       %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
-      "   JNE      0b                      \n\t" // no success? -> retry
-      //---<  outputs  >---
-      : [old] "=&a" (old)    // write-only, old counter value
-      , [upd] "=&d" (upd)    // write-only, updated counter value
-      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-      //---<  clobbered  >---
-      : "cc", "memory"
-    );
-  }
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  unsigned long old, upd;
-
-  if (VM_Version::has_LoadAndALUAtomicV1()) {
-    __asm__ __volatile__ (
-      "   LGHI     2,-1                    \n\t" // load increment
-      "   LA       3,%[mem]                \n\t" // force data address into ARG2
-//    "   LAAG     %[upd],%[inc],%[mem]    \n\t" // increment and get old value
-//    "   LAAG     2,2,0(3)                \n\t" // actually coded instruction
-      "   .byte    0xeb                    \n\t" // LAA main opcode
-      "   .byte    0x22                    \n\t" // R1,R3
-      "   .byte    0x30                    \n\t" // R2,disp1
-      "   .byte    0x00                    \n\t" // disp2,disp3
-      "   .byte    0x00                    \n\t" // disp4,disp5
-      "   .byte    0xe8                    \n\t" // LAA minor opcode
-      "   AGHI     2,-1                    \n\t" // calc new value in register
-      "   LR       %[upd],2                \n\t" // move to result register
-      //---<  outputs  >---
-      : [upd]  "=&d" (upd)    // write-only, updated counter value
-      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-//    : [inc]  "a"   (inc)    // read-only.
-      //---<  clobbered  >---
-      : "cc", "r2", "r3", "memory"
-    );
-  } else {
-    __asm__ __volatile__ (
-      "   LG       %[old],%[mem]           \n\t" // get old value
-//    LAY not supported by inline assembler
-//    "0: LAY      %[upd],-1(,%[old])      \n\t" // calc result
-      "0: LGR      %[upd],%[old]           \n\t" // calc result
-      "   AGHI     %[upd],-1               \n\t"
-      "   CSG      %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
-      "   JNE      0b                      \n\t" // no success? -> retry
-      //---<  outputs  >---
-      : [old] "=&a" (old)    // write-only, old counter value
-      , [upd] "=&d" (upd)    // write-only, updated counter value
-      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-      //---<  clobbered  >---
-      : "cc", "memory"
-    );
-  }
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
 //-------------
 // Atomic::xchg
 //-------------
@@ -421,8 +194,12 @@
 //
 // The return value is the (unchanged) value from memory as it was when the
 // replacement succeeded.
-inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
-  unsigned int  old;
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
+  T old;
 
   __asm__ __volatile__ (
     "   LLGF     %[old],%[mem]           \n\t" // get old value
@@ -432,16 +209,20 @@
     : [old] "=&d" (old)      // write-only, prev value irrelevant
     , [mem] "+Q"  (*dest)    // read/write, memory to be updated atomically
     //---<  inputs  >---
-    : [upd] "d"   (xchg_val) // read-only, value to be written to memory
+    : [upd] "d"   (exchange_value) // read-only, value to be written to memory
     //---<  clobbered  >---
     : "cc", "memory"
   );
 
-  return (jint)old;
+  return old;
 }
 
-inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
-  unsigned long old;
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  T old;
 
   __asm__ __volatile__ (
     "   LG       %[old],%[mem]           \n\t" // get old value
@@ -451,16 +232,12 @@
     : [old] "=&d" (old)      // write-only, init from memory
     , [mem] "+Q"  (*dest)    // read/write, memory to be updated atomically
     //---<  inputs  >---
-    : [upd] "d"   (xchg_val) // read-only, value to be written to memory
+    : [upd] "d"   (exchange_value) // read-only, value to be written to memory
     //---<  clobbered  >---
     : "cc", "memory"
   );
 
-  return (intptr_t)old;
-}
-
-inline void *Atomic::xchg_ptr(void *exchange_value, volatile void *dest) {
-  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+  return old;
 }
 
 //----------------
@@ -544,6 +321,4 @@
   return old;
 }
 
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
 #endif // OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP

--- a/src/hotspot/os_cpu/linux_s390/orderAccess_linux_s390.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_s390/orderAccess_linux_s390.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -74,10 +74,13 @@
 inline void OrderAccess::release()    { inlasm_zarch_release(); }
 inline void OrderAccess::fence()      { inlasm_zarch_sync(); }
 
-template<> inline jbyte  OrderAccess::specialized_load_acquire<jbyte> (const volatile jbyte*  p) { register jbyte  t = *p; inlasm_zarch_acquire(); return t; }
-template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) { register jshort t = *p; inlasm_zarch_acquire(); return t; }
-template<> inline jint   OrderAccess::specialized_load_acquire<jint>  (const volatile jint*   p) { register jint   t = *p; inlasm_zarch_acquire(); return t; }
-template<> inline jlong  OrderAccess::specialized_load_acquire<jlong> (const volatile jlong*  p) { register jlong  t = *p; inlasm_zarch_acquire(); return t; }
+template<size_t byte_size>
+struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  T operator()(const volatile T* p) const { register T t = *p; inlasm_zarch_acquire(); return t; }
+};
 
 #undef inlasm_compiler_barrier
 #undef inlasm_zarch_sync
@@ -85,8 +88,4 @@
 #undef inlasm_zarch_acquire
 #undef inlasm_zarch_fence
 
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
 #endif // OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP
-
-

--- a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -448,11 +448,17 @@
     }
 
     else { // thread->thread_state() != _thread_in_Java
-      if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
-        // SIGILL must be caused by VM_Version::determine_features().
+      if ((sig == SIGILL) && VM_Version::is_determine_features_test_running()) {
+        // SIGILL must be caused by VM_Version::determine_features()
+        // when attempting to execute a non-existing instruction.
         //*(int *) (pc-6)=0; // Patch instruction to 0 to indicate that it causes a SIGILL.
                              // Flushing of icache is not necessary.
         stub = pc; // Continue with next instruction.
+      } else if ((sig == SIGFPE) && VM_Version::is_determine_features_test_running()) {
+        // SIGFPE is known to be caused by trying to execute a vector instruction
+        // when the vector facility is installed, but operating system support is missing.
+        VM_Version::reset_has_VectorFacility();
+        stub = pc; // Continue with next instruction.
       } else if (thread->thread_state() == _thread_in_vm &&
                  sig == SIGBUS && thread->doing_unsafe_access()) {
         // We don't really need a stub here! Just set the pending exeption and
@@ -471,7 +477,7 @@
     // Info->si_addr need not be the exact address, it is only
     // guaranteed to be on the same page as the address that caused
     // the SIGSEGV.
-    if ((sig == SIGSEGV) &&
+    if ((sig == SIGSEGV) && !UseMembar &&
         (os::get_memory_serialize_page() ==
          (address)((uintptr_t)info->si_addr & ~(os::vm_page_size()-1)))) {
       return true;
@@ -510,7 +516,7 @@
   // Note: this should be combined with the trap_pc handling above,
   // because it handles the same issue.
   if (sig == SIGILL || sig == SIGFPE) {
-    pc = (address) info->si_addr;
+    pc = (address)info->si_addr;
   }
 
   VMError::report_and_die(t, sig, pc, info, ucVoid);

--- a/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -27,30 +27,6 @@
 
 // Implementation of class atomic
 
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
-inline void Atomic::inc    (volatile jint*     dest) { (void)add    (1, dest); }
-inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
-inline void Atomic::inc_ptr(volatile void*     dest) { (void)add_ptr(1, dest); }
-
-inline void Atomic::dec    (volatile jint*     dest) { (void)add    (-1, dest); }
-inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
-inline void Atomic::dec_ptr(volatile void*     dest) { (void)add_ptr(-1, dest); }
-
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
 template<size_t byte_size>
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
@@ -103,9 +79,12 @@
   return rv;
 }
 
-
-inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
-  intptr_t rv = exchange_value;
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
+  T rv = exchange_value;
   __asm__ volatile(
     " swap   [%2],%1\n\t"
     : "=r" (rv)
@@ -114,8 +93,12 @@
   return rv;
 }
 
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-  intptr_t rv = exchange_value;
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  T rv = exchange_value;
   __asm__ volatile(
     "1:\n\t"
     " mov    %1, %%o3\n\t"
@@ -131,10 +114,6 @@
   return rv;
 }
 
-inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
-  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
-}
-
 // No direct support for cmpxchg of bytes; emulate using int.
 template<>
 struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};

--- a/src/hotspot/os_cpu/linux_sparc/orderAccess_linux_sparc.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_sparc/orderAccess_linux_sparc.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,6 +48,4 @@
   __asm__ volatile ("membar  #StoreLoad" : : : "memory");
 }
 
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
 #endif // OS_CPU_LINUX_SPARC_VM_ORDERACCESS_LINUX_SPARC_INLINE_HPP

--- a/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -27,19 +27,6 @@
 
 // Implementation of class atomic
 
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
-
 template<size_t byte_size>
 struct Atomic::PlatformAdd
   : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
@@ -61,25 +48,11 @@
   return old_value;
 }
 
-inline void Atomic::inc    (volatile jint*     dest) {
-  __asm__ volatile (  "lock addl $1,(%0)" :
-                    : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-inline void Atomic::dec    (volatile jint*     dest) {
-  __asm__ volatile (  "lock subl $1,(%0)" :
-                    : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
-inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
   __asm__ volatile (  "xchgl (%2),%0"
                     : "=r" (exchange_value)
                     : "0" (exchange_value), "r" (dest)
@@ -87,10 +60,6 @@
   return exchange_value;
 }
 
-inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
-  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
-}
-
 template<>
 template<typename T>
 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
@@ -120,8 +89,6 @@
 }
 
 #ifdef AMD64
-inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
 
 template<>
 template<typename I, typename D>
@@ -136,21 +103,11 @@
   return old_value;
 }
 
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  __asm__ __volatile__ ("lock addq $1,(%0)"
-                        :
-                        : "r" (dest)
-                        : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  __asm__ __volatile__ ("lock subq $1,(%0)"
-                        :
-                        : "r" (dest)
-                        : "cc", "memory");
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
   __asm__ __volatile__ ("xchgq (%2),%0"
                         : "=r" (exchange_value)
                         : "0" (exchange_value), "r" (dest)
@@ -172,22 +129,8 @@
   return exchange_value;
 }
 
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
 #else // !AMD64
 
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  inc((volatile jint*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  dec((volatile jint*)dest);
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-  return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
-}
-
 extern "C" {
   // defined in linux_x86.s
   jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong);
@@ -204,18 +147,21 @@
   return cmpxchg_using_helper<jlong>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
 }
 
-inline jlong Atomic::load(const volatile jlong* src) {
+template<>
+template<typename T>
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+  STATIC_ASSERT(8 == sizeof(T));
   volatile jlong dest;
-  _Atomic_move_long(src, &dest);
-  return dest;
+  _Atomic_move_long(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
+  return PrimitiveConversions::cast<T>(dest);
 }
 
-inline void Atomic::store(jlong store_value, jlong* dest) {
-  _Atomic_move_long((volatile jlong*)&store_value, (volatile jlong*)dest);
-}
-
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
-  _Atomic_move_long((volatile jlong*)&store_value, dest);
+template<>
+template<typename T>
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+                                                 T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  _Atomic_move_long(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
 }
 
 #endif // AMD64

--- a/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,46 +60,57 @@
 }
 
 template<>
-inline void OrderAccess::specialized_release_store_fence<jbyte> (volatile jbyte*  p, jbyte  v) {
-  __asm__ volatile (  "xchgb (%2),%0"
-                    : "=q" (v)
-                    : "0" (v), "r" (p)
-                    : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile (  "xchgb (%2),%0"
+                      : "=q" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+
 template<>
-inline void OrderAccess::specialized_release_store_fence<jshort>(volatile jshort* p, jshort v) {
-  __asm__ volatile (  "xchgw (%2),%0"
-                    : "=r" (v)
-                    : "0" (v), "r" (p)
-                    : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile (  "xchgw (%2),%0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
+
 template<>
-inline void OrderAccess::specialized_release_store_fence<jint>  (volatile jint*   p, jint   v) {
-  __asm__ volatile (  "xchgl (%2),%0"
-                    : "=r" (v)
-                    : "0" (v), "r" (p)
-                    : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile (  "xchgl (%2),%0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
 
 #ifdef AMD64
 template<>
-inline void OrderAccess::specialized_release_store_fence<jlong> (volatile jlong*  p, jlong  v) {
-  __asm__ volatile (  "xchgq (%2), %0"
-                    : "=r" (v)
-                    : "0" (v), "r" (p)
-                    : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm__ volatile (  "xchgq (%2), %0"
+                      : "=r" (v)
+                      : "0" (v), "r" (p)
+                      : "memory");
+  }
+};
 #endif // AMD64
 
-template<>
-inline void OrderAccess::specialized_release_store_fence<jfloat> (volatile jfloat*  p, jfloat  v) {
-  release_store_fence((volatile jint*)p, jint_cast(v));
-}
-template<>
-inline void OrderAccess::specialized_release_store_fence<jdouble>(volatile jdouble* p, jdouble v) {
-  release_store_fence((volatile jlong*)p, jlong_cast(v));
-}
-
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
 #endif // OS_CPU_LINUX_X86_VM_ORDERACCESS_LINUX_X86_INLINE_HPP

--- a/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -87,7 +87,7 @@
 
 /* Atomically write VALUE into `*PTR' and returns the previous
    contents of `*PTR'.  */
-static inline int m68k_lock_test_and_set(volatile int *ptr, int newval) {
+static inline int m68k_lock_test_and_set(int newval, volatile int *ptr) {
   for (;;) {
       // Loop until success.
       int prev = *ptr;
@@ -148,7 +148,7 @@
 
 /* Atomically write VALUE into `*PTR' and returns the previous
    contents of `*PTR'.  */
-static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
+static inline int arm_lock_test_and_set(int newval, volatile int *ptr) {
   for (;;) {
       // Loop until a __kernel_cmpxchg succeeds.
       int prev = *ptr;
@@ -159,14 +159,6 @@
 }
 #endif // ARM
 
-inline void Atomic::store(jint store_value, volatile jint* dest) {
-  *dest = store_value;
-}
-
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) {
-  *dest = store_value;
-}
-
 template<size_t byte_size>
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
@@ -201,42 +193,22 @@
   return __sync_add_and_fetch(dest, add_value);
 }
 
-inline void Atomic::inc(volatile jint* dest) {
-  add(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  add_ptr(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
-  add_ptr(1, dest);
-}
-
-inline void Atomic::dec(volatile jint* dest) {
-  add(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  add_ptr(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
-  add_ptr(-1, dest);
-}
-
-inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
 #ifdef ARM
-  return arm_lock_test_and_set(dest, exchange_value);
+  return xchg_using_helper<int>(arm_lock_test_and_set, exchange_value, dest);
 #else
 #ifdef M68K
-  return m68k_lock_test_and_set(dest, exchange_value);
+  return xchg_using_helper<int>(m68k_lock_test_and_set, exchange_value, dest);
 #else
   // __sync_lock_test_and_set is a bizarrely named atomic exchange
   // operation.  Note that some platforms only support this with the
   // limitation that the only valid value to store is the immediate
   // constant 1.  There is a test for this in JNI_CreateJavaVM().
-  jint result = __sync_lock_test_and_set (dest, exchange_value);
+  T result = __sync_lock_test_and_set (dest, exchange_value);
   // All atomic operations are expected to be full memory barriers
   // (see atomic.hpp). However, __sync_lock_test_and_set is not
   // a full memory barrier, but an acquire barrier. Hence, this added
@@ -247,24 +219,14 @@
 #endif // ARM
 }
 
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value,
-                                 volatile intptr_t* dest) {
-#ifdef ARM
-  return arm_lock_test_and_set(dest, exchange_value);
-#else
-#ifdef M68K
-  return m68k_lock_test_and_set(dest, exchange_value);
-#else
-  intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  T result = __sync_lock_test_and_set (dest, exchange_value);
   __sync_synchronize();
   return result;
-#endif // M68K
-#endif // ARM
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
-  return (void *) xchg_ptr((intptr_t) exchange_value,
-                           (volatile intptr_t*) dest);
 }
 
 // No direct support for cmpxchg of bytes; emulate using int.
@@ -299,18 +261,21 @@
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 }
 
-inline jlong Atomic::load(const volatile jlong* src) {
+template<>
+template<typename T>
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+  STATIC_ASSERT(8 == sizeof(T));
   volatile jlong dest;
-  os::atomic_copy64(src, &dest);
-  return dest;
+  os::atomic_copy64(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
+  return PrimitiveConversions::cast<T>(dest);
 }
 
-inline void Atomic::store(jlong store_value, jlong* dest) {
-  os::atomic_copy64((volatile jlong*)&store_value, (volatile jlong*)dest);
-}
-
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
-  os::atomic_copy64((volatile jlong*)&store_value, dest);
+template<>
+template<typename T>
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+                                                 T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  os::atomic_copy64(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
 }
 
 #endif // OS_CPU_LINUX_ZERO_VM_ATOMIC_LINUX_ZERO_HPP

--- a/src/hotspot/os_cpu/linux_zero/orderAccess_linux_zero.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/linux_zero/orderAccess_linux_zero.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007, 2008, 2009 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -56,8 +56,16 @@
 
 #else // PPC
 
+#ifdef ALPHA
+
+#define LIGHT_MEM_BARRIER __sync_synchronize()
+
+#else // ALPHA
+
 #define LIGHT_MEM_BARRIER __asm __volatile ("":::"memory")
 
+#endif // ALPHA
+
 #endif // PPC
 
 #endif // ARM
@@ -75,6 +83,4 @@
 
 inline void OrderAccess::fence()      { FULL_MEM_BARRIER;  }
 
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
 #endif // OS_CPU_LINUX_ZERO_VM_ORDERACCESS_LINUX_ZERO_INLINE_HPP

--- a/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -27,41 +27,6 @@
 
 // Implementation of class atomic
 
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
-inline void Atomic::inc    (volatile jint*     dest) { (void)add    (1, dest); }
-inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
-inline void Atomic::inc_ptr(volatile void*     dest) { (void)add_ptr(1, dest); }
-
-inline void Atomic::dec    (volatile jint*     dest) { (void)add    (-1, dest); }
-inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
-inline void Atomic::dec_ptr(volatile void*     dest) { (void)add_ptr(-1, dest); }
-
-
-inline void Atomic::store(jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store(jlong store_value, volatile jlong* dest) { *dest = store_value; }
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
-
-// This is the interface to the atomic instructions in solaris_sparc.il.
-// It's very messy because we need to support v8 and these instructions
-// are illegal there.  When sparc v8 is dropped, we can drop out lots of
-// this code.  Also compiler2 does not support v8 so the conditional code
-// omits the instruction set check.
-
-extern "C" jint     _Atomic_swap32(jint     exchange_value, volatile jint*     dest);
-extern "C" intptr_t _Atomic_swap64(intptr_t exchange_value, volatile intptr_t* dest);
-
 // Implement ADD using a CAS loop.
 template<size_t byte_size>
 struct Atomic::PlatformAdd VALUE_OBJ_CLASS_SPEC {
@@ -78,16 +43,30 @@
   }
 };
 
-inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
-  return _Atomic_swap32(exchange_value, dest);
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
+  __asm__ volatile (  "swap [%2],%0"
+                    : "=r" (exchange_value)
+                    : "0" (exchange_value), "r" (dest)
+                    : "memory");
+  return exchange_value;
 }
 
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-  return _Atomic_swap64(exchange_value, dest);
-}
-
-inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
-  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  T old_value = *dest;
+  while (true) {
+    T result = cmpxchg(exchange_value, dest, old_value);
+    if (result == old_value) break;
+    old_value = result;
+  }
+  return old_value;
 }
 
 // No direct support for cmpxchg of bytes; emulate using int.

--- a/src/hotspot/os_cpu/solaris_sparc/orderAccess_solaris_sparc.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/solaris_sparc/orderAccess_solaris_sparc.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -52,6 +52,4 @@
   __asm__ volatile ("membar  #StoreLoad" : : : "memory");
 }
 
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
 #endif // OS_CPU_SOLARIS_SPARC_VM_ORDERACCESS_SOLARIS_SPARC_INLINE_HPP

--- a/src/hotspot/os_cpu/solaris_sparc/solaris_sparc.il	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/solaris_sparc/solaris_sparc.il	Tue Oct 10 16:29:04 2017 +0200
@@ -32,47 +32,6 @@
        .end
 
 
-  // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
-  //
-  // Arguments:
-  //      exchange_value: O0
-  //      dest:           O1
-  //
-  // Results:
-  //     O0: the value previously stored in dest
-
-        .inline _Atomic_swap32, 2
-        .volatile
-        swap    [%o1],%o0
-        .nonvolatile
-        .end
-
-
-  // Support for intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t * dest).
-  //
-  // 64-bit
-  //
-  // Arguments:
-  //      exchange_value: O0
-  //      dest:           O1
-  //
-  // Results:
-  //     O0: the value previously stored in dest
-
-        .inline _Atomic_swap64, 2
-        .volatile
-    1:
-        mov     %o0, %o3
-        ldx     [%o1], %o2
-        casx    [%o1], %o2, %o3
-        cmp     %o2, %o3
-        bne     %xcc, 1b
-         nop
-        mov     %o2, %o0
-        .nonvolatile
-        .end
-
-
   // Support for jlong Atomic::load and Atomic::store on v9.
   //
   // void _Atomic_move_long_v9(volatile jlong* src, volatile jlong* dst)

--- a/src/hotspot/os_cpu/solaris_sparc/vm_version_solaris_sparc.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/solaris_sparc/vm_version_solaris_sparc.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -380,7 +380,7 @@
   if (av & AV_SPARC_CRC32C)       features |= ISA_crc32c_msk;
 
 #ifndef AV2_SPARC_FJATHPLUS
-#define AV2_SPARC_FJATHPLUS  0x00000001 // Fujitsu Athena+
+#define AV2_SPARC_FJATHPLUS  0x00000001 // Fujitsu Athena+ insns
 #endif
 #ifndef AV2_SPARC_VIS3B
 #define AV2_SPARC_VIS3B      0x00000002 // VIS3 present on multiple chips
@@ -407,6 +407,34 @@
 #define AV2_SPARC_VAMASK     0x00000100 // Virtual Address masking
 #endif
 
+#ifndef AV2_SPARC_SPARC6
+#define AV2_SPARC_SPARC6     0x00000200 // REVB*, FPSLL*, RDENTROPY, LDM* and STM*
+#endif
+#ifndef AV2_SPARC_DICTUNP
+#define AV2_SPARC_DICTUNP    0x00002000 // Dictionary unpack instruction
+#endif
+#ifndef AV2_SPARC_FPCMPSHL
+#define AV2_SPARC_FPCMPSHL   0x00004000 // Partition compare with shifted result
+#endif
+#ifndef AV2_SPARC_RLE
+#define AV2_SPARC_RLE        0x00008000 // Run-length encoded burst and length
+#endif
+#ifndef AV2_SPARC_SHA3
+#define AV2_SPARC_SHA3       0x00010000 // SHA3 instructions
+#endif
+#ifndef AV2_SPARC_FJATHPLUS2
+#define AV2_SPARC_FJATHPLUS2 0x00020000 // Fujitsu Athena++ insns
+#endif
+#ifndef AV2_SPARC_VIS3C
+#define AV2_SPARC_VIS3C      0x00040000 // Subset of VIS3 insns provided by Athena++
+#endif
+#ifndef AV2_SPARC_SPARC5B
+#define AV2_SPARC_SPARC5B    0x00080000 // subset of SPARC5 insns (fpadd8, fpsub8)
+#endif
+#ifndef AV2_SPARC_MME
+#define AV2_SPARC_MME        0x00100000 // Misaligned Mitigation Enable
+#endif
+
   if (avn > 1) {
     uint32_t av2 = avs[AV_HW2_IDX];
 
@@ -419,19 +447,30 @@
     if (av2 & AV2_SPARC_XMONT)      features |= ISA_xmont_msk;
     if (av2 & AV2_SPARC_PAUSE_NSEC) features |= ISA_pause_nsec_msk;
     if (av2 & AV2_SPARC_VAMASK)     features |= ISA_vamask_msk;
+
+    if (av2 & AV2_SPARC_SPARC6)     features |= ISA_sparc6_msk;
+    if (av2 & AV2_SPARC_DICTUNP)    features |= ISA_dictunp_msk;
+    if (av2 & AV2_SPARC_FPCMPSHL)   features |= ISA_fpcmpshl_msk;
+    if (av2 & AV2_SPARC_RLE)        features |= ISA_rle_msk;
+    if (av2 & AV2_SPARC_SHA3)       features |= ISA_sha3_msk;
+    if (av2 & AV2_SPARC_FJATHPLUS2) features |= ISA_fjathplus2_msk;
+    if (av2 & AV2_SPARC_VIS3C)      features |= ISA_vis3c_msk;
+    if (av2 & AV2_SPARC_SPARC5B)    features |= ISA_sparc5b_msk;
+    if (av2 & AV2_SPARC_MME)        features |= ISA_mme_msk;
   }
 
   _features = features;     // ISA feature set completed, update state.
 
   Sysinfo machine(SI_MACHINE);
 
-  bool is_sun4v = machine.match("sun4v");   // All Oracle SPARC + Fujitsu Athena+
+  bool is_sun4v = machine.match("sun4v");   // All Oracle SPARC + Fujitsu Athena+/++
   bool is_sun4u = machine.match("sun4u");   // All other Fujitsu
 
-  // Handle Athena+ conservatively (simply because we are lacking info.).
+  // Handle Athena+/++ conservatively (simply because we are lacking info.).
 
-  bool do_sun4v = is_sun4v && !has_athena_plus();
-  bool do_sun4u = is_sun4u ||  has_athena_plus();
+  bool an_athena = has_athena_plus() || has_athena_plus2();
+  bool do_sun4v  = is_sun4v && !an_athena;
+  bool do_sun4u  = is_sun4u ||  an_athena;
 
   uint64_t synthetic = 0;
 
@@ -441,16 +480,16 @@
     // Fast IDIV, BIS and LD available on Niagara Plus.
     if (has_vis2()) {
       synthetic |= (CPU_fast_idiv_msk | CPU_fast_ld_msk);
-      // ...on Core S4 however, we prefer not to use BIS.
+      // ...on Core C4 however, we prefer not to use BIS.
       if (!has_sparc5()) {
         synthetic |= CPU_fast_bis_msk;
       }
     }
-    // Niagara Core S3 supports fast RDPC and block zeroing.
+    // SPARC Core C3 supports fast RDPC and block zeroing.
     if (has_ima()) {
       synthetic |= (CPU_fast_rdpc_msk | CPU_blk_zeroing_msk);
     }
-    // Niagara Core S3 and S4 have slow CMOVE.
+    // SPARC Core C3 and C4 have slow CMOVE.
     if (!has_ima()) {
       synthetic |= CPU_fast_cmove_msk;
     }

--- a/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -25,28 +25,6 @@
 #ifndef OS_CPU_SOLARIS_X86_VM_ATOMIC_SOLARIS_X86_HPP
 #define OS_CPU_SOLARIS_X86_VM_ATOMIC_SOLARIS_X86_HPP
 
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-
-
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
-inline void Atomic::inc    (volatile jint*     dest) { (void)add    (1, dest); }
-inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
-inline void Atomic::inc_ptr(volatile void*     dest) { (void)add_ptr(1, dest); }
-
-inline void Atomic::dec    (volatile jint*     dest) { (void)add    (-1, dest); }
-inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
-inline void Atomic::dec_ptr(volatile void*     dest) { (void)add_ptr(-1, dest); }
-
 // For Sun Studio - implementation is in solaris_x86_64.il.
 
 extern "C" {
@@ -92,8 +70,26 @@
                      reinterpret_cast<jlong volatile*>(dest)));
 }
 
-inline jint     Atomic::xchg       (jint     exchange_value, volatile jint*     dest) {
-  return _Atomic_xchg(exchange_value, dest);
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
+  return PrimitiveConversions::cast<T>(
+    _Atomic_xchg(PrimitiveConversions::cast<jint>(exchange_value),
+                 reinterpret_cast<jint volatile*>(dest)));
+}
+
+extern "C" jlong _Atomic_xchg_long(jlong exchange_value, volatile jlong* dest);
+
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  return PrimitiveConversions::cast<T>(
+    _Atomic_xchg_long(PrimitiveConversions::cast<jlong>(exchange_value),
+                      reinterpret_cast<jlong volatile*>(dest)));
 }
 
 // Not using cmpxchg_using_helper here, because some configurations of
@@ -141,18 +137,4 @@
                          PrimitiveConversions::cast<jlong>(compare_value)));
 }
 
-inline void Atomic::store    (jlong    store_value, jlong*             dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
-extern "C" jlong _Atomic_xchg_long(jlong exchange_value, volatile jlong* dest);
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-  return (intptr_t)_Atomic_xchg_long((jlong)exchange_value, (volatile jlong*)dest);
-}
-
-inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
-  return (void*)_Atomic_xchg_long((jlong)exchange_value, (volatile jlong*)dest);
-}
-
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
 #endif // OS_CPU_SOLARIS_X86_VM_ATOMIC_SOLARIS_X86_HPP

--- a/src/hotspot/os_cpu/solaris_x86/orderAccess_solaris_x86.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/solaris_x86/orderAccess_solaris_x86.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,6 +58,4 @@
   compiler_barrier();
 }
 
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
 #endif // OS_CPU_SOLARIS_X86_VM_ORDERACCESS_SOLARIS_X86_INLINE_HPP

--- a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -42,21 +42,6 @@
 
 #pragma warning(disable: 4035) // Disables warnings reporting missing return statement
 
-inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
-
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
-inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
-inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
-
-
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
-
 template<size_t byte_size>
 struct Atomic::PlatformAdd
   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
@@ -66,9 +51,6 @@
 };
 
 #ifdef AMD64
-inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
-inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
-
 template<>
 template<typename I, typename D>
 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const {
@@ -81,41 +63,19 @@
   return add_using_helper<intptr_t>(os::atomic_add_ptr_func, add_value, dest);
 }
 
-inline void Atomic::inc    (volatile jint*     dest) {
-  (void)add    (1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  (void)add_ptr(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  (void)add_ptr(1, dest);
-}
-
-inline void Atomic::dec    (volatile jint*     dest) {
-  (void)add    (-1, dest);
-}
+#define DEFINE_STUB_XCHG(ByteSize, StubType, StubName)                  \
+  template<>                                                            \
+  template<typename T>                                                  \
+  inline T Atomic::PlatformXchg<ByteSize>::operator()(T exchange_value, \
+                                                      T volatile* dest) const { \
+    STATIC_ASSERT(ByteSize == sizeof(T));                               \
+    return xchg_using_helper<StubType>(StubName, exchange_value, dest); \
+  }
 
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  (void)add_ptr(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  (void)add_ptr(-1, dest);
-}
+DEFINE_STUB_XCHG(4, jint, os::atomic_xchg_func)
+DEFINE_STUB_XCHG(8, jlong, os::atomic_xchg_ptr_func)
 
-inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
-  return (jint)(*os::atomic_xchg_func)(exchange_value, dest);
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-  return (intptr_t)(os::atomic_xchg_ptr_func)(exchange_value, dest);
-}
-
-inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
-  return (void *)(os::atomic_xchg_ptr_func)((intptr_t)exchange_value, (volatile intptr_t*)dest);
-}
+#undef DEFINE_STUB_XCHG
 
 #define DEFINE_STUB_CMPXCHG(ByteSize, StubType, StubName)               \
   template<>                                                            \
@@ -134,8 +94,6 @@
 
 #undef DEFINE_STUB_CMPXCHG
 
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
 #else // !AMD64
 
 template<>
@@ -152,39 +110,11 @@
   }
 }
 
-inline void Atomic::inc    (volatile jint*     dest) {
-  // alternative for InterlockedIncrement
-  __asm {
-    mov edx, dest;
-    lock add dword ptr [edx], 1;
-  }
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  inc((volatile jint*)dest);
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  inc((volatile jint*)dest);
-}
-
-inline void Atomic::dec    (volatile jint*     dest) {
-  // alternative for InterlockedDecrement
-  __asm {
-    mov edx, dest;
-    lock sub dword ptr [edx], 1;
-  }
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  dec((volatile jint*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  dec((volatile jint*)dest);
-}
-
-inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
+template<>
+template<typename T>
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+                                             T volatile* dest) const {
+  STATIC_ASSERT(4 == sizeof(T));
   // alternative for InterlockedExchange
   __asm {
     mov eax, exchange_value;
@@ -193,14 +123,6 @@
   }
 }
 
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-  return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
-}
-
-inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
-  return (void*)xchg((jint)exchange_value, (volatile jint*)dest);
-}
-
 template<>
 template<typename T>
 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
@@ -258,9 +180,12 @@
   }
 }
 
-inline jlong Atomic::load(const volatile jlong* src) {
-  volatile jlong dest;
-  volatile jlong* pdest = &dest;
+template<>
+template<typename T>
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  volatile T dest;
+  volatile T* pdest = &dest;
   __asm {
     mov eax, src
     fild     qword ptr [eax]
@@ -270,8 +195,12 @@
   return dest;
 }
 
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
-  volatile jlong* src = &store_value;
+template<>
+template<typename T>
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+                                                 T volatile* dest) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  volatile T* src = &store_value;
   __asm {
     mov eax, src
     fild     qword ptr [eax]
@@ -280,10 +209,6 @@
   }
 }
 
-inline void Atomic::store(jlong store_value, jlong* dest) {
-  Atomic::store(store_value, (volatile jlong*)dest);
-}
-
 #endif // AMD64
 
 #pragma warning(default: 4035) // Enables warnings reporting missing return statement

--- a/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,42 +74,46 @@
 
 #ifndef AMD64
 template<>
-inline void OrderAccess::specialized_release_store_fence<jbyte> (volatile jbyte*  p, jbyte  v) {
-  __asm {
-    mov edx, p;
-    mov al, v;
-    xchg al, byte ptr [edx];
+struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm {
+      mov edx, p;
+      mov al, v;
+      xchg al, byte ptr [edx];
+    }
   }
-}
-
-template<>
-inline void OrderAccess::specialized_release_store_fence<jshort>(volatile jshort* p, jshort v) {
-  __asm {
-    mov edx, p;
-    mov ax, v;
-    xchg ax, word ptr [edx];
-  }
-}
+};
 
 template<>
-inline void OrderAccess::specialized_release_store_fence<jint>  (volatile jint*   p, jint   v) {
-  __asm {
-    mov edx, p;
-    mov eax, v;
-    xchg eax, dword ptr [edx];
+struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm {
+      mov edx, p;
+      mov ax, v;
+      xchg ax, word ptr [edx];
+    }
   }
-}
-#endif // AMD64
+};
 
 template<>
-inline void OrderAccess::specialized_release_store_fence<jfloat>(volatile jfloat*  p, jfloat  v) {
-    release_store_fence((volatile jint*)p, jint_cast(v));
-}
-template<>
-inline void OrderAccess::specialized_release_store_fence<jdouble>(volatile jdouble* p, jdouble v) {
-    release_store_fence((volatile jlong*)p, jlong_cast(v));
-}
-
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
+struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
+  VALUE_OBJ_CLASS_SPEC
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    __asm {
+      mov edx, p;
+      mov eax, v;
+      xchg eax, dword ptr [edx];
+    }
+  }
+};
+#endif // AMD64
 
 #endif // OS_CPU_WINDOWS_X86_VM_ORDERACCESS_WINDOWS_X86_INLINE_HPP

--- a/src/hotspot/share/adlc/output_c.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/adlc/output_c.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -2276,6 +2276,10 @@
     if (strcmp(rep_var,"$XMMRegister") == 0)   return "as_XMMRegister";
 #endif
     if (strcmp(rep_var,"$CondRegister") == 0)  return "as_ConditionRegister";
+#if defined(PPC64)
+    if (strcmp(rep_var,"$VectorRegister") == 0)   return "as_VectorRegister";
+    if (strcmp(rep_var,"$VectorSRegister") == 0)  return "as_VectorSRegister";
+#endif
     return NULL;
   }

--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1304,7 +1304,9 @@
   // FIXME T_ADDRESS should actually be T_METADATA but it can't because the
   // meaning of these two is mixed up (see JDK-8026837).
   __ move(new LIR_Address(rcvr.result(), oopDesc::klass_offset_in_bytes(), T_ADDRESS), temp, info);
-  __ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
+  __ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_ADDRESS), result);
+  // mirror = ((OopHandle)mirror)->resolve();
+  __ move_wide(new LIR_Address(result, T_OBJECT), result);
 }
 
 // java.lang.Class::isPrimitive()

--- a/src/hotspot/share/ci/ciInstanceKlass.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/ci/ciInstanceKlass.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -665,9 +665,8 @@
             _out->print_cr("null");
           } else if (value->is_instance()) {
             if (value->is_a(SystemDictionary::String_klass())) {
-              _out->print("\"");
-              _out->print_raw(java_lang_String::as_quoted_ascii(value));
-              _out->print_cr("\"");
+              const char* ascii_value = java_lang_String::as_quoted_ascii(value);
+              _out->print("\"%s\"", (ascii_value != NULL) ? ascii_value : "");
             } else {
               const char* klass_name  = value->klass()->name()->as_quoted_ascii();
               _out->print_cr("%s", klass_name);

--- a/src/hotspot/share/classfile/classLoader.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/classfile/classLoader.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -802,7 +802,6 @@
           if (DumpSharedSpaces) {
             JImageFile *jimage = _jrt_entry->jimage();
             assert(jimage != NULL, "No java runtime image file present");
-            ClassLoader::initialize_module_loader_map(jimage);
           }
 #endif
         }
@@ -1144,61 +1143,6 @@
   return (*Crc32)(crc, (const jbyte*)buf, len);
 }
 
-#if INCLUDE_CDS
-void ClassLoader::initialize_module_loader_map(JImageFile* jimage) {
-  if (!DumpSharedSpaces) {
-    return; // only needed for CDS dump time
-  }
-
-  ResourceMark rm;
-  jlong size;
-  JImageLocationRef location = (*JImageFindResource)(jimage, JAVA_BASE_NAME, get_jimage_version_string(), MODULE_LOADER_MAP, &size);
-  if (location == 0) {
-    vm_exit_during_initialization(
-      "Cannot find ModuleLoaderMap location from modules jimage.", NULL);
-  }
-  char* buffer = NEW_RESOURCE_ARRAY(char, size + 1);
-  buffer[size] = '\0';
-  jlong read = (*JImageGetResource)(jimage, location, buffer, size);
-  if (read != size) {
-    vm_exit_during_initialization(
-      "Cannot find ModuleLoaderMap resource from modules jimage.", NULL);
-  }
-  char* char_buf = (char*)buffer;
-  int buflen = (int)strlen(char_buf);
-  char* begin_ptr = char_buf;
-  char* end_ptr = strchr(begin_ptr, '\n');
-  bool process_boot_modules = false;
-  _boot_modules_array = new (ResourceObj::C_HEAP, mtModule)
-    GrowableArray<char*>(INITIAL_BOOT_MODULES_ARRAY_SIZE, true);
-  _platform_modules_array = new (ResourceObj::C_HEAP, mtModule)
-    GrowableArray<char*>(INITIAL_PLATFORM_MODULES_ARRAY_SIZE, true);
-  while (end_ptr != NULL && (end_ptr - char_buf) < buflen) {
-    // Allocate a buffer from the C heap to be appended to the _boot_modules_array
-    // or the _platform_modules_array.
-    char* temp_name = NEW_C_HEAP_ARRAY(char, (size_t)(end_ptr - begin_ptr + 1), mtInternal);
-    strncpy(temp_name, begin_ptr, end_ptr - begin_ptr);
-    temp_name[end_ptr - begin_ptr] = '\0';
-    if (strncmp(temp_name, "BOOT", 4) == 0) {
-      process_boot_modules = true;
-      FREE_C_HEAP_ARRAY(char, temp_name);
-    } else if (strncmp(temp_name, "PLATFORM", 8) == 0) {
-      process_boot_modules = false;
-      FREE_C_HEAP_ARRAY(char, temp_name);
-    } else {
-      // module name
-      if (process_boot_modules) {
-        _boot_modules_array->append(temp_name);
-      } else {
-        _platform_modules_array->append(temp_name);
-      }
-    }
-    begin_ptr = ++end_ptr;
-    end_ptr = strchr(begin_ptr, '\n');
-  }
-}
-#endif
-
 // Function add_package extracts the package from the fully qualified class name
 // and checks if the package is in the boot loader's package entry table.  If so,
 // then it sets the classpath_index in the package entry record.
@@ -1290,58 +1234,6 @@
   return result();
 }
 
-#if INCLUDE_CDS
-s2 ClassLoader::module_to_classloader(const char* module_name) {
-
-  assert(DumpSharedSpaces, "dump time only");
-  assert(_boot_modules_array != NULL, "_boot_modules_array is NULL");
-  assert(_platform_modules_array != NULL, "_platform_modules_array is NULL");
-
-  int array_size = _boot_modules_array->length();
-  for (int i = 0; i < array_size; i++) {
-    if (strcmp(module_name, _boot_modules_array->at(i)) == 0) {
-      return BOOT_LOADER;
-    }
-  }
-
-  array_size = _platform_modules_array->length();
-  for (int i = 0; i < array_size; i++) {
-    if (strcmp(module_name, _platform_modules_array->at(i)) == 0) {
-      return PLATFORM_LOADER;
-    }
-  }
-
-  return APP_LOADER;
-}
-
-s2 ClassLoader::classloader_type(Symbol* class_name, ClassPathEntry* e, int classpath_index, TRAPS) {
-  assert(DumpSharedSpaces, "Only used for CDS dump time");
-
-  // obtain the classloader type based on the class name.
-  // First obtain the package name based on the class name. Then obtain
-  // the classloader type based on the package name from the jimage using
-  // a jimage API. If the classloader type cannot be found from the
-  // jimage, it is determined by the class path entry.
-  jshort loader_type = ClassLoader::APP_LOADER;
-  if (e->is_jrt()) {
-    ResourceMark rm;
-    TempNewSymbol pkg_name = InstanceKlass::package_from_name(class_name, CHECK_0);
-    if (pkg_name != NULL) {
-      const char* pkg_name_C_string = (const char*)(pkg_name->as_C_string());
-      ClassPathImageEntry* cpie = (ClassPathImageEntry*)e;
-      JImageFile* jimage = cpie->jimage();
-      char* module_name = (char*)(*JImagePackageToModule)(jimage, pkg_name_C_string);
-      if (module_name != NULL) {
-        loader_type = ClassLoader::module_to_classloader(module_name);
-      }
-    }
-  } else if (ClassLoaderExt::is_boot_classpath(classpath_index)) {
-    loader_type = ClassLoader::BOOT_LOADER;
-  }
-  return loader_type;
-}
-#endif
-
 // caller needs ResourceMark
 const char* ClassLoader::file_name_for_class_name(const char* class_name,
                                                   int class_name_len) {

--- a/src/hotspot/share/classfile/classLoader.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/classfile/classLoader.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -37,13 +37,6 @@
 // Name of boot "modules" image
 #define  MODULES_IMAGE_NAME "modules"
 
-// Name of the resource containing mapping from module names to defining class loader type
-#define MODULE_LOADER_MAP "jdk/internal/vm/cds/resources/ModuleLoaderMap.dat"
-
-// Initial sizes of the following arrays are based on the generated ModuleLoaderMap.dat
-#define INITIAL_BOOT_MODULES_ARRAY_SIZE 30
-#define INITIAL_PLATFORM_MODULES_ARRAY_SIZE  15
-
 // Class path entry (directory or zip file)
 
 class JImageFile;
@@ -403,7 +396,8 @@
   static int compute_Object_vtable();
 
   static ClassPathEntry* classpath_entry(int n) {
-    assert(n >= 0 && n < _num_entries, "sanity");
+    assert(n >= 0, "sanity");
+    assert(!has_jrt_entry() || n < _num_entries, "sanity");
     if (n == 0) {
       assert(has_jrt_entry(), "No class path entry at 0 for exploded module builds");
       return ClassLoader::_jrt_entry;
@@ -438,10 +432,6 @@
   static bool  check_shared_paths_misc_info(void* info, int size);
   static void  exit_with_path_failure(const char* error, const char* message);
 
-  static s2 module_to_classloader(const char* module_name);
-  static void initialize_module_loader_map(JImageFile* jimage);
-  static s2 classloader_type(Symbol* class_name, ClassPathEntry* e,
-                             int classpath_index, TRAPS);
   static void record_shared_class_loader_type(InstanceKlass* ik, const ClassFileStream* stream);
 #endif
   static JImageLocationRef jimage_find_resource(JImageFile* jf, const char* module_name,

--- a/src/hotspot/share/classfile/classLoaderData.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/classfile/classLoaderData.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -98,7 +98,8 @@
   _keep_alive((is_anonymous || h_class_loader.is_null()) ? 1 : 0),
   _metaspace(NULL), _unloading(false), _klasses(NULL),
   _modules(NULL), _packages(NULL),
-  _claimed(0), _jmethod_ids(NULL), _handles(), _deallocate_list(NULL),
+  _claimed(0), _modified_oops(true), _accumulated_modified_oops(false),
+  _jmethod_ids(NULL), _handles(), _deallocate_list(NULL),
   _next(NULL), _dependencies(dependencies),
   _metaspace_lock(new Mutex(Monitor::leaf+1, "Metaspace allocation lock", true,
                             Monitor::_safepoint_check_never)) {
@@ -207,7 +208,7 @@
   oops_do(&cl);
   return cl.found();
 }
-#endif
+#endif // ASSERT
 
 bool ClassLoaderData::claim() {
   if (_claimed == 1) {
@@ -236,19 +237,19 @@
   }
 }
 
-void ClassLoaderData::oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
+void ClassLoaderData::oops_do(OopClosure* f, bool must_claim, bool clear_mod_oops) {
   if (must_claim && !claim()) {
     return;
   }
 
+  // Only clear modified_oops after the ClassLoaderData is claimed.
+  if (clear_mod_oops) {
+    clear_modified_oops();
+  }
+
   f->do_oop(&_class_loader);
   _dependencies.oops_do(f);
-
   _handles.oops_do(f);
-
-  if (klass_closure != NULL) {
-    classes_do(klass_closure);
-  }
 }
 
 void ClassLoaderData::Dependencies::oops_do(OopClosure* f) {
@@ -368,6 +369,9 @@
   // Must handle over GC point.
   Handle dependency(THREAD, to);
   from_cld->_dependencies.add(dependency, CHECK);
+
+  // Added a potentially young gen oop to the ClassLoaderData
+  record_modified_oops();
 }
 
 
@@ -764,6 +768,7 @@
 
 OopHandle ClassLoaderData::add_handle(Handle h) {
   MutexLockerEx ml(metaspace_lock(),  Mutex::_no_safepoint_check_flag);
+  record_modified_oops();
   return OopHandle(_handles.add(h()));
 }
 
@@ -875,8 +880,7 @@
   if (Verbose) {
     Klass* k = _klasses;
     while (k != NULL) {
-      out->print_cr("klass " PTR_FORMAT ", %s, CT: %d, MUT: %d", k, k->name()->as_C_string(),
-          k->has_modified_oops(), k->has_accumulated_modified_oops());
+      out->print_cr("klass " PTR_FORMAT ", %s", p2i(k), k->name()->as_C_string());
       assert(k != k->next_link(), "no loops!");
       k = k->next_link();
     }
@@ -1003,25 +1007,25 @@
 }
 
 
-void ClassLoaderDataGraph::oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
+void ClassLoaderDataGraph::oops_do(OopClosure* f, bool must_claim) {
   for (ClassLoaderData* cld = _head; cld != NULL; cld = cld->next()) {
-    cld->oops_do(f, klass_closure, must_claim);
+    cld->oops_do(f, must_claim);
   }
 }
 
-void ClassLoaderDataGraph::keep_alive_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
+void ClassLoaderDataGraph::keep_alive_oops_do(OopClosure* f, bool must_claim) {
   for (ClassLoaderData* cld = _head; cld != NULL; cld = cld->next()) {
     if (cld->keep_alive()) {
-      cld->oops_do(f, klass_closure, must_claim);
+      cld->oops_do(f, must_claim);
     }
   }
 }
 
-void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
+void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, bool must_claim) {
   if (ClassUnloading) {
-    keep_alive_oops_do(f, klass_closure, must_claim);
+    keep_alive_oops_do(f, must_claim);
   } else {
-    oops_do(f, klass_closure, must_claim);
+    oops_do(f, must_claim);
   }
 }

--- a/src/hotspot/share/classfile/classLoaderData.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/classfile/classLoaderData.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -87,9 +87,9 @@
   static void purge();
   static void clear_claimed_marks();
   // oops do
-  static void oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim);
-  static void keep_alive_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
-  static void always_strong_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
+  static void oops_do(OopClosure* f, bool must_claim);
+  static void keep_alive_oops_do(OopClosure* blk, bool must_claim);
+  static void always_strong_oops_do(OopClosure* blk, bool must_claim);
   // cld do
   static void cld_do(CLDClosure* cl);
   static void cld_unloading_do(CLDClosure* cl);
@@ -230,10 +230,16 @@
   Mutex* _metaspace_lock;  // Locks the metaspace for allocations and setup.
   bool _unloading;         // true if this class loader goes away
   bool _is_anonymous;      // if this CLD is for an anonymous class
+
+  // Remembered sets support for the oops in the class loader data.
+  bool _modified_oops;             // Card Table Equivalent (YC/CMS support)
+  bool _accumulated_modified_oops; // Mod Union Equivalent (CMS support)
+
   s2 _keep_alive;          // if this CLD is kept alive without a keep_alive_object().
                            // Used for anonymous classes and the boot class
                            // loader. _keep_alive does not need to be volatile or
                            // atomic since there is one unique CLD per anonymous class.
+
   volatile int _claimed;   // true if claimed, for example during GC traces.
                            // To avoid applying oop closure more than once.
                            // Has to be an int because we cas it.
@@ -276,6 +282,19 @@
   bool claimed() const          { return _claimed == 1; }
   bool claim();
 
+  // The CLD are not placed in the Heap, so the Card Table or
+  // the Mod Union Table can't be used to mark when CLD have modified oops.
+  // The CT and MUT bits saves this information for the whole class loader data.
+  void clear_modified_oops()             { _modified_oops = false; }
+ public:
+  void record_modified_oops()            { _modified_oops = true; }
+  bool has_modified_oops()               { return _modified_oops; }
+
+  void accumulate_modified_oops()        { if (has_modified_oops()) _accumulated_modified_oops = true; }
+  void clear_accumulated_modified_oops() { _accumulated_modified_oops = false; }
+  bool has_accumulated_modified_oops()   { return _accumulated_modified_oops; }
+ private:
+
   void unload();
   bool keep_alive() const       { return _keep_alive > 0; }
   void classes_do(void f(Klass*));
@@ -346,8 +365,7 @@
 
   inline unsigned int identity_hash() const { return (unsigned int)(((intptr_t)this) >> 3); }
 
-  // Used when tracing from klasses.
-  void oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim);
+  void oops_do(OopClosure* f, bool must_claim, bool clear_modified_oops = false);
 
   void classes_do(KlassClosure* klass_closure);
   Klass* klasses() { return _klasses; }

--- a/src/hotspot/share/classfile/defaultMethods.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/classfile/defaultMethods.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -767,15 +767,14 @@
 // This is the guts of the default methods implementation.  This is called just
 // after the classfile has been parsed if some ancestor has default methods.
 //
-// First if finds any name/signature slots that need any implementation (either
+// First it finds any name/signature slots that need any implementation (either
 // because they are miranda or a superclass's implementation is an overpass
 // itself).  For each slot, iterate over the hierarchy, to see if they contain a
 // signature that matches the slot we are looking at.
 //
-// For each slot filled, we generate an overpass method that either calls the
-// unique default method candidate using invokespecial, or throws an exception
-// (in the case of no default method candidates, or more than one valid
-// candidate).  These methods are then added to the class's method list.
+// For each slot filled, we either record the default method candidate in the
+// klass default_methods list or, only to handle exception cases, we create an
+// overpass method that throws an exception and add it to the klass methods list.
 // The JVM does not create bridges nor handle generic signatures here.
 void DefaultMethods::generate_default_methods(
     InstanceKlass* klass, const GrowableArray<Method*>* mirandas, TRAPS) {
@@ -901,6 +900,11 @@
 // This allows virtual methods to override the overpass, but ensures
 // that a local method search will find the exception rather than an abstract
 // or default method that is not a valid candidate.
+//
+// Note that if overpass method are ever created that are not exception
+// throwing methods then the loader constraint checking logic for vtable and
+// itable creation needs to be changed to check loader constraints for the
+// overpass methods that do not throw exceptions.
 static void create_defaults_and_exceptions(
     GrowableArray<EmptyVtableSlot*>* slots,
     InstanceKlass* klass, TRAPS) {

--- a/src/hotspot/share/classfile/javaClasses.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/classfile/javaClasses.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -889,7 +889,7 @@
 
     // Setup indirection from klass->mirror
     // after any exceptions can happen during allocations.
-    k->set_java_mirror(mirror());
+    k->set_java_mirror(mirror);
 
     // Set the module field in the java_lang_Class instance.  This must be done
     // after the mirror is set.

--- a/src/hotspot/share/classfile/vmSymbols.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/classfile/vmSymbols.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -461,6 +461,8 @@
   template(getProtectionDomain_signature,             "(Ljava/security/CodeSource;)Ljava/security/ProtectionDomain;") \
   template(url_code_signer_array_void_signature,      "(Ljava/net/URL;[Ljava/security/CodeSigner;)V") \
   template(module_entry_name,                         "module_entry")                             \
+  template(resolved_references_name,                  "<resolved_references>")                    \
+  template(init_lock_name,                            "<init_lock>")                              \
                                                                                                   \
   /* name symbols needed by intrinsics */                                                         \
   VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, template, VM_SYMBOL_IGNORE, VM_ALIAS_IGNORE) \
@@ -779,6 +781,7 @@
   do_name(decrementExact_name,"decrementExact")                                                                         \
   do_name(incrementExact_name,"incrementExact")                                                                         \
   do_name(multiplyExact_name,"multiplyExact")                                                                           \
+  do_name(multiplyHigh_name,"multiplyHigh")                                                                             \
   do_name(negateExact_name,"negateExact")                                                                               \
   do_name(subtractExact_name,"subtractExact")                                                                           \
   do_name(fma_name, "fma")                                                                                              \
@@ -803,6 +806,7 @@
   do_intrinsic(_incrementExactL,          java_lang_Math,         incrementExact_name, long_long_signature,      F_S)   \
   do_intrinsic(_multiplyExactI,           java_lang_Math,         multiplyExact_name, int2_int_signature,        F_S)   \
   do_intrinsic(_multiplyExactL,           java_lang_Math,         multiplyExact_name, long2_long_signature,      F_S)   \
+  do_intrinsic(_multiplyHigh,             java_lang_Math,         multiplyHigh_name, long2_long_signature,       F_S)   \
   do_intrinsic(_negateExactI,             java_lang_Math,         negateExact_name, int_int_signature,           F_S)   \
   do_intrinsic(_negateExactL,             java_lang_Math,         negateExact_name, long_long_signature,         F_S)   \
   do_intrinsic(_subtractExactI,           java_lang_Math,         subtractExact_name, int2_int_signature,        F_S)   \

--- a/src/hotspot/share/compiler/compileBroker.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/compiler/compileBroker.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -332,7 +332,7 @@
   static void disable_compilation_forever() {
     UseCompiler               = false;
     AlwaysCompileLoopMethods  = false;
-    Atomic::xchg(shutdown_compilation, &_should_compile_new_jobs);
+    Atomic::xchg(jint(shutdown_compilation), &_should_compile_new_jobs);
   }
 
   static bool is_compilation_disabled_forever() {

--- a/src/hotspot/share/compiler/methodMatcher.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/compiler/methodMatcher.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -96,7 +96,7 @@
   bool have_colon = (colon != NULL);
   if (have_colon) {
     // Don't allow multiple '::'
-    if (colon + 2 != '\0') {
+    if (colon[2] != '\0') {
       if (strstr(colon+2, "::")) {
         error_msg = "Method pattern only allows one '::' allowed";
         return false;

--- a/src/hotspot/share/compiler/oopMap.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/compiler/oopMap.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,9 +40,6 @@
 #ifdef COMPILER2
 #include "opto/optoreg.hpp"
 #endif
-#ifdef SPARC
-#include "vmreg_sparc.inline.hpp"
-#endif
 
 // OopMapStream

--- a/src/hotspot/share/gc/cms/cmsOopClosures.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/cms/cmsOopClosures.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,12 +48,7 @@
 //       because some CMS OopClosures derive from OopsInGenClosure. It would be
 //       good to get rid of them completely.
 class MetadataAwareOopsInGenClosure: public OopsInGenClosure {
-  KlassToOopClosure _klass_closure;
  public:
-  MetadataAwareOopsInGenClosure() {
-    _klass_closure.initialize(this);
-  }
-
   virtual bool do_metadata()    { return do_metadata_nv(); }
   inline  bool do_metadata_nv() { return true; }

--- a/src/hotspot/share/gc/cms/cmsOopClosures.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/cms/cmsOopClosures.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -40,10 +40,8 @@
 inline void MetadataAwareOopsInGenClosure::do_klass(Klass* k) { do_klass_nv(k); }
 
 inline void MetadataAwareOopsInGenClosure::do_cld_nv(ClassLoaderData* cld) {
-  assert(_klass_closure._oop_closure == this, "Must be");
-
   bool claim = true;  // Must claim the class loader data before processing.
-  cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
+  cld->oops_do(this, claim);
 }
 
 // Decode the oop and call do_oop on it.

--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1075,7 +1075,7 @@
                         obj_ptr, old->is_objArray(), word_sz);
 
   NOT_PRODUCT(
-    Atomic::inc_ptr(&_numObjectsPromoted);
+    Atomic::inc(&_numObjectsPromoted);
     Atomic::add_ptr(alloc_sz, &_numWordsPromoted);
   )
 
@@ -1553,9 +1553,10 @@
   assert(_collectorState != Idling || _modUnionTable.isAllClear(),
     "_modUnionTable should be clear if the baton was not passed");
   _modUnionTable.clear_all();
-  assert(_collectorState != Idling || _ct->klass_rem_set()->mod_union_is_clear(),
+  assert(_collectorState != Idling || _ct->cld_rem_set()->mod_union_is_clear(),
     "mod union for klasses should be clear if the baton was passed");
-  _ct->klass_rem_set()->clear_mod_union();
+  _ct->cld_rem_set()->clear_mod_union();
+
 
   // We must adjust the allocation statistics being maintained
   // in the free list space. We do so by reading and clearing
@@ -2025,7 +2026,7 @@
   // that information. Tell the young collection to save the union of all
   // modified klasses.
   if (duringMarking) {
-    _ct->klass_rem_set()->set_accumulate_modified_oops(true);
+    _ct->cld_rem_set()->set_accumulate_modified_oops(true);
   }
 
   bool registerClosure = duringMarking;
@@ -2101,7 +2102,7 @@
   assert(haveFreelistLocks(), "must have freelist locks");
   assert_lock_strong(bitMapLock());
 
-  _ct->klass_rem_set()->set_accumulate_modified_oops(false);
+  _ct->cld_rem_set()->set_accumulate_modified_oops(false);
 
   _cmsGen->gc_epilogue_work(full);
 
@@ -2380,18 +2381,18 @@
   }
 }
 
-class VerifyKlassOopsKlassClosure : public KlassClosure {
-  class VerifyKlassOopsClosure : public OopClosure {
+class VerifyCLDOopsCLDClosure : public CLDClosure {
+  class VerifyCLDOopsClosure : public OopClosure {
     CMSBitMap* _bitmap;
    public:
-    VerifyKlassOopsClosure(CMSBitMap* bitmap) : _bitmap(bitmap) { }
+    VerifyCLDOopsClosure(CMSBitMap* bitmap) : _bitmap(bitmap) { }
     void do_oop(oop* p)       { guarantee(*p == NULL || _bitmap->isMarked((HeapWord*) *p), "Should be marked"); }
     void do_oop(narrowOop* p) { ShouldNotReachHere(); }
   } _oop_closure;
  public:
-  VerifyKlassOopsKlassClosure(CMSBitMap* bitmap) : _oop_closure(bitmap) {}
-  void do_klass(Klass* k) {
-    k->oops_do(&_oop_closure);
+  VerifyCLDOopsCLDClosure(CMSBitMap* bitmap) : _oop_closure(bitmap) {}
+  void do_cld(ClassLoaderData* cld) {
+    cld->oops_do(&_oop_closure, false, false);
   }
 };
 
@@ -2437,8 +2438,8 @@
   assert(verification_mark_stack()->isEmpty(), "Should have been drained");
   verify_work_stacks_empty();
 
-  VerifyKlassOopsKlassClosure verify_klass_oops(verification_mark_bm());
-  ClassLoaderDataGraph::classes_do(&verify_klass_oops);
+  VerifyCLDOopsCLDClosure verify_cld_oops(verification_mark_bm());
+  ClassLoaderDataGraph::cld_do(&verify_cld_oops);
 
   // Marking completed -- now verify that each bit marked in
   // verification_mark_bm() is also marked in markBitMap(); flag all
@@ -2911,7 +2912,7 @@
        " or no bits are set in the gc_prologue before the start of the next "
        "subsequent marking phase.");
 
-  assert(_ct->klass_rem_set()->mod_union_is_clear(), "Must be");
+  assert(_ct->cld_rem_set()->mod_union_is_clear(), "Must be");
 
   // Save the end of the used_region of the constituent generations
   // to be used to limit the extent of sweep in each generation.
@@ -3848,7 +3849,7 @@
     }
   }
 
-  preclean_klasses(&mrias_cl, _cmsGen->freelistLock());
+  preclean_cld(&mrias_cl, _cmsGen->freelistLock());
 
   curNumCards = preclean_card_table(_cmsGen, &smoac_cl);
   cumNumCards += curNumCards;
@@ -4067,21 +4068,21 @@
   return cumNumDirtyCards;
 }
 
-class PrecleanKlassClosure : public KlassClosure {
-  KlassToOopClosure _cm_klass_closure;
+class PrecleanCLDClosure : public CLDClosure {
+  MetadataAwareOopsInGenClosure* _cm_closure;
  public:
-  PrecleanKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
-  void do_klass(Klass* k) {
-    if (k->has_accumulated_modified_oops()) {
-      k->clear_accumulated_modified_oops();
-
-      _cm_klass_closure.do_klass(k);
+  PrecleanCLDClosure(MetadataAwareOopsInGenClosure* oop_closure) : _cm_closure(oop_closure) {}
+  void do_cld(ClassLoaderData* cld) {
+    if (cld->has_accumulated_modified_oops()) {
+      cld->clear_accumulated_modified_oops();
+
+      _cm_closure->do_cld(cld);
     }
   }
 };
 
 // The freelist lock is needed to prevent asserts, is it really needed?
-void CMSCollector::preclean_klasses(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock) {
+void CMSCollector::preclean_cld(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock) {
 
   cl->set_freelistLock(freelistLock);
 
@@ -4089,8 +4090,8 @@
 
   // SSS: Add equivalent to ScanMarkedObjectsAgainCarefullyClosure::do_yield_check and should_abort_preclean?
   // SSS: We should probably check if precleaning should be aborted, at suitable intervals?
-  PrecleanKlassClosure preclean_klass_closure(cl);
-  ClassLoaderDataGraph::classes_do(&preclean_klass_closure);
+  PrecleanCLDClosure preclean_closure(cl);
+  ClassLoaderDataGraph::cld_do(&preclean_closure);
 
   verify_work_stacks_empty();
   verify_overflow_empty();
@@ -4250,7 +4251,7 @@
   // Call isAllClear() under bitMapLock
   assert(_modUnionTable.isAllClear(),
       "Should be clear by end of the final marking");
-  assert(_ct->klass_rem_set()->mod_union_is_clear(),
+  assert(_ct->cld_rem_set()->mod_union_is_clear(),
       "Should be clear by end of the final marking");
 }
 
@@ -4332,26 +4333,26 @@
   void do_work_steal(int i, ParMarkRefsIntoAndScanClosure* cl, int* seed);
 };
 
-class RemarkKlassClosure : public KlassClosure {
-  KlassToOopClosure _cm_klass_closure;
+class RemarkCLDClosure : public CLDClosure {
+  CLDToOopClosure _cm_closure;
  public:
-  RemarkKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
-  void do_klass(Klass* k) {
-    // Check if we have modified any oops in the Klass during the concurrent marking.
-    if (k->has_accumulated_modified_oops()) {
-      k->clear_accumulated_modified_oops();
+  RemarkCLDClosure(OopClosure* oop_closure) : _cm_closure(oop_closure) {}
+  void do_cld(ClassLoaderData* cld) {
+    // Check if we have modified any oops in the CLD during the concurrent marking.
+    if (cld->has_accumulated_modified_oops()) {
+      cld->clear_accumulated_modified_oops();
 
       // We could have transfered the current modified marks to the accumulated marks,
       // like we do with the Card Table to Mod Union Table. But it's not really necessary.
-    } else if (k->has_modified_oops()) {
+    } else if (cld->has_modified_oops()) {
       // Don't clear anything, this info is needed by the next young collection.
     } else {
-      // No modified oops in the Klass.
+      // No modified oops in the ClassLoaderData.
       return;
     }
 
     // The klass has modified fields, need to scan the klass.
-    _cm_klass_closure.do_klass(k);
+    _cm_closure.do_cld(cld);
   }
 };
 
@@ -4439,24 +4440,24 @@
     log_trace(gc, task)("Finished unhandled CLD scanning work in %dth thread: %3.3f sec", worker_id, _timer.seconds());
   }
 
-  // ---------- dirty klass scanning ----------
+  // We might have added oops to ClassLoaderData::_handles during the
+  // concurrent marking phase. These oops do not always point to newly allocated objects
+  // that are guaranteed to be kept alive.  Hence,
+  // we do have to revisit the _handles block during the remark phase.
+
+  // ---------- dirty CLD scanning ----------
   if (worker_id == 0) { // Single threaded at the moment.
     _timer.reset();
     _timer.start();
 
     // Scan all classes that was dirtied during the concurrent marking phase.
-    RemarkKlassClosure remark_klass_closure(&par_mrias_cl);
-    ClassLoaderDataGraph::classes_do(&remark_klass_closure);
+    RemarkCLDClosure remark_closure(&par_mrias_cl);
+    ClassLoaderDataGraph::cld_do(&remark_closure);
 
     _timer.stop();
-    log_trace(gc, task)("Finished dirty klass scanning work in %dth thread: %3.3f sec", worker_id, _timer.seconds());
-  }
-
-  // We might have added oops to ClassLoaderData::_handles during the
-  // concurrent marking phase. These oops point to newly allocated objects
-  // that are guaranteed to be kept alive. Either by the direct allocation
-  // code, or when the young collector processes the roots. Hence,
-  // we don't have to revisit the _handles block during the remark phase.
+    log_trace(gc, task)("Finished dirty CLD scanning work in %dth thread: %3.3f sec", worker_id, _timer.seconds());
+  }
+
 
   // ---------- rescan dirty cards ------------
   _timer.reset();
@@ -4981,23 +4982,21 @@
     verify_work_stacks_empty();
   }
 
+  // We might have added oops to ClassLoaderData::_handles during the
+  // concurrent marking phase. These oops do not point to newly allocated objects
+  // that are guaranteed to be kept alive.  Hence,
+  // we do have to revisit the _handles block during the remark phase.
   {
-    GCTraceTime(Trace, gc, phases) t("Dirty Klass Scan", _gc_timer_cm);
+    GCTraceTime(Trace, gc, phases) t("Dirty CLD Scan", _gc_timer_cm);
 
     verify_work_stacks_empty();
 
-    RemarkKlassClosure remark_klass_closure(&mrias_cl);
-    ClassLoaderDataGraph::classes_do(&remark_klass_closure);
+    RemarkCLDClosure remark_closure(&mrias_cl);
+    ClassLoaderDataGraph::cld_do(&remark_closure);
 
     verify_work_stacks_empty();
   }
 
-  // We might have added oops to ClassLoaderData::_handles during the
-  // concurrent marking phase. These oops point to newly allocated objects
-  // that are guaranteed to be kept alive. Either by the direct allocation
-  // code, or when the young collector processes the roots. Hence,
-  // we don't have to revisit the _handles block during the remark phase.
-
   verify_work_stacks_empty();
   // Restore evacuated mark words, if any, used for overflow list links
   restore_preserved_marks_if_any();
@@ -7974,7 +7973,7 @@
 
 // Multi-threaded; use CAS to prepend to overflow list
 void CMSCollector::par_push_on_overflow_list(oop p) {
-  NOT_PRODUCT(Atomic::inc_ptr(&_num_par_pushes);)
+  NOT_PRODUCT(Atomic::inc(&_num_par_pushes);)
   assert(oopDesc::is_oop(p), "Not an oop");
   par_preserve_mark_if_necessary(p);
   oop observed_overflow_list = _overflow_list;

--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -777,7 +777,7 @@
   // Does precleaning work, returning a quantity indicative of
   // the amount of "useful work" done.
   size_t preclean_work(bool clean_refs, bool clean_survivors);
-  void preclean_klasses(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock);
+  void preclean_cld(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock);
   void abortable_preclean(); // Preclean while looking for possible abort
   void initialize_sequential_subtasks_for_young_gen_rescan(int i);
   // Helper function for above; merge-sorts the per-thread plab samples

--- a/src/hotspot/share/gc/cms/parNewGeneration.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/cms/parNewGeneration.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -493,7 +493,7 @@
 
 ParScanClosure::ParScanClosure(ParNewGeneration* g,
                                ParScanThreadState* par_scan_state) :
-  OopsInKlassOrGenClosure(g), _par_scan_state(par_scan_state), _g(g) {
+  OopsInClassLoaderDataOrGenClosure(g), _par_scan_state(par_scan_state), _g(g) {
   _boundary = _g->reserved().end();
 }
 
@@ -601,11 +601,8 @@
 
   par_scan_state.set_young_old_boundary(_young_old_boundary);
 
-  KlassScanClosure klass_scan_closure(&par_scan_state.to_space_root_closure(),
-                                      gch->rem_set()->klass_rem_set());
-  CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
-                                           &par_scan_state.to_space_root_closure(),
-                                           false);
+  CLDScanClosure cld_scan_closure(&par_scan_state.to_space_root_closure(),
+                                  gch->rem_set()->cld_rem_set()->accumulate_modified_oops());
 
   par_scan_state.start_strong_roots();
   gch->young_process_roots(_strong_roots_scope,
@@ -1281,7 +1278,7 @@
     // XXX This is horribly inefficient when a promotion failure occurs
     // and should be fixed. XXX FIX ME !!!
 #ifndef PRODUCT
-    Atomic::inc_ptr(&_num_par_pushes);
+    Atomic::inc(&_num_par_pushes);
     assert(_num_par_pushes > 0, "Tautology");
 #endif
     if (from_space_obj->forwardee() == from_space_obj) {

--- a/src/hotspot/share/gc/cms/parOopClosures.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/cms/parOopClosures.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,7 +37,7 @@
 typedef GenericTaskQueueSet<ObjToScanQueue, mtGC> ObjToScanQueueSet;
 class ParallelTaskTerminator;
 
-class ParScanClosure: public OopsInKlassOrGenClosure {
+class ParScanClosure: public OopsInClassLoaderDataOrGenClosure {
  protected:
   ParScanThreadState* _par_scan_state;
   ParNewGeneration*   _g;

--- a/src/hotspot/share/gc/cms/parOopClosures.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/cms/parOopClosures.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -126,8 +126,8 @@
           (void)_par_scan_state->trim_queues(10 * ParallelGCThreads);
         }
       }
-      if (is_scanning_a_klass()) {
-        do_klass_barrier();
+      if (is_scanning_a_cld()) {
+        do_cld_barrier();
       } else if (gc_barrier) {
         // Now call parent closure
         par_do_barrier(p);

--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -63,7 +63,6 @@
 class GenerationSpec;
 class G1ParScanThreadState;
 class G1ParScanThreadStateSet;
-class G1KlassScanClosure;
 class G1ParScanThreadState;
 class ObjectClosure;
 class SpaceClosure;

--- a/src/hotspot/share/gc/g1/g1HeapVerifier.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1HeapVerifier.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -161,18 +161,18 @@
   void reset_count() { _count = 0; };
 };
 
-class VerifyKlassClosure: public KlassClosure {
+class VerifyCLDClosure: public CLDClosure {
   YoungRefCounterClosure _young_ref_counter_closure;
   OopClosure *_oop_closure;
  public:
-  VerifyKlassClosure(G1CollectedHeap* g1h, OopClosure* cl) : _young_ref_counter_closure(g1h), _oop_closure(cl) {}
-  void do_klass(Klass* k) {
-    k->oops_do(_oop_closure);
+  VerifyCLDClosure(G1CollectedHeap* g1h, OopClosure* cl) : _young_ref_counter_closure(g1h), _oop_closure(cl) {}
+  void do_cld(ClassLoaderData* cld) {
+    cld->oops_do(_oop_closure, false);
 
     _young_ref_counter_closure.reset_count();
-    k->oops_do(&_young_ref_counter_closure);
+    cld->oops_do(&_young_ref_counter_closure, false);
     if (_young_ref_counter_closure.count() > 0) {
-      guarantee(k->has_modified_oops(), "Klass " PTR_FORMAT ", has young refs but is not dirty.", p2i(k));
+      guarantee(cld->has_modified_oops(), "CLD " PTR_FORMAT ", has young %d refs but is not dirty.", p2i(cld), _young_ref_counter_closure.count());
     }
   }
 };
@@ -390,8 +390,7 @@
 
   log_debug(gc, verify)("Roots");
   VerifyRootsClosure rootsCl(vo);
-  VerifyKlassClosure klassCl(_g1h, &rootsCl);
-  CLDToKlassAndOopClosure cldCl(&klassCl, &rootsCl, false);
+  VerifyCLDClosure cldCl(_g1h, &rootsCl);
 
   // We apply the relevant closures to all the oops in the
   // system dictionary, class loader data graph, the string table

--- a/src/hotspot/share/gc/g1/g1OopClosures.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1OopClosures.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
   _g1(g1),
   _par_scan_state(par_scan_state),
   _worker_id(par_scan_state->worker_id()),
-  _scanned_klass(NULL),
+  _scanned_cld(NULL),
   _cm(_g1->concurrent_mark())
 { }
 
@@ -42,20 +42,20 @@
   _g1(g1), _par_scan_state(par_scan_state), _from(NULL)
 { }
 
-void G1KlassScanClosure::do_klass(Klass* klass) {
-  // If the klass has not been dirtied we know that there's
+void G1CLDScanClosure::do_cld(ClassLoaderData* cld) {
+  // If the class loader data has not been dirtied we know that there's
   // no references into  the young gen and we can skip it.
-  if (!_process_only_dirty || klass->has_modified_oops()) {
-    // Clean the klass since we're going to scavenge all the metadata.
-    klass->clear_modified_oops();
+  if (!_process_only_dirty || cld->has_modified_oops()) {
 
-    // Tell the closure that this klass is the Klass to scavenge
+    // Tell the closure that this class loader data is the CLD to scavenge
     // and is the one to dirty if oops are left pointing into the young gen.
-    _closure->set_scanned_klass(klass);
+    _closure->set_scanned_cld(cld);
 
-    klass->oops_do(_closure);
+    // Clean the cld since we're going to scavenge all the metadata.
+    // Clear modified oops only if this cld is claimed.
+    cld->oops_do(_closure, _must_claim, /*clear_modified_oops*/true);
 
-    _closure->set_scanned_klass(NULL);
+    _closure->set_scanned_cld(NULL);
   }
   _count++;
 }

--- a/src/hotspot/share/gc/g1/g1OopClosures.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1OopClosures.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -107,7 +107,7 @@
   G1CollectedHeap* _g1;
   G1ParScanThreadState* _par_scan_state;
   uint _worker_id;              // Cache value from par_scan_state.
-  Klass* _scanned_klass;
+  ClassLoaderData* _scanned_cld;
   G1ConcurrentMark* _cm;
 
   // Mark the object if it's not already marked. This is used to mark
@@ -124,13 +124,13 @@
   ~G1ParCopyHelper() { }
 
  public:
-  void set_scanned_klass(Klass* k) { _scanned_klass = k; }
-  template <class T> inline void do_klass_barrier(T* p, oop new_obj);
+  void set_scanned_cld(ClassLoaderData* cld) { _scanned_cld = cld; }
+  inline void do_cld_barrier(oop new_obj);
 };
 
 enum G1Barrier {
   G1BarrierNone,
-  G1BarrierKlass
+  G1BarrierCLD
 };
 
 enum G1Mark {
@@ -150,14 +150,16 @@
   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
 };
 
-class G1KlassScanClosure : public KlassClosure {
+class G1CLDScanClosure : public CLDClosure {
  G1ParCopyHelper* _closure;
  bool             _process_only_dirty;
+ bool             _must_claim;
  int              _count;
  public:
-  G1KlassScanClosure(G1ParCopyHelper* closure, bool process_only_dirty)
-      : _process_only_dirty(process_only_dirty), _closure(closure), _count(0) {}
-  void do_klass(Klass* klass);
+  G1CLDScanClosure(G1ParCopyHelper* closure,
+                   bool process_only_dirty, bool must_claim)
+      : _process_only_dirty(process_only_dirty), _must_claim(must_claim), _closure(closure), _count(0) {}
+  void do_cld(ClassLoaderData* cld);
 };
 
 // Closure for iterating over object fields during concurrent marking

--- a/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -195,10 +195,9 @@
   }
 }
 
-template <class T>
-void G1ParCopyHelper::do_klass_barrier(T* p, oop new_obj) {
+void G1ParCopyHelper::do_cld_barrier(oop new_obj) {
   if (_g1->heap_region_containing(new_obj)->is_young()) {
-    _scanned_klass->record_modified_oops();
+    _scanned_cld->record_modified_oops();
   }
 }
 
@@ -249,8 +248,8 @@
       mark_forwarded_object(obj, forwardee);
     }
 
-    if (barrier == G1BarrierKlass) {
-      do_klass_barrier(p, forwardee);
+    if (barrier == G1BarrierCLD) {
+      do_cld_barrier(forwardee);
     }
   } else {
     if (state.is_humongous()) {
@@ -267,5 +266,4 @@
     }
   }
 }
-
 #endif // SHARE_VM_GC_G1_G1OOPCLOSURES_INLINE_HPP

--- a/src/hotspot/share/gc/g1/g1SharedClosures.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1SharedClosures.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,18 +34,17 @@
 template <G1Mark Mark, bool use_ext = false>
 class G1SharedClosures VALUE_OBJ_CLASS_SPEC {
 public:
-  G1ParCopyClosure<G1BarrierNone,  Mark, use_ext> _oops;
-  G1ParCopyClosure<G1BarrierKlass, Mark, use_ext> _oop_in_klass;
-  G1KlassScanClosure                              _klass_in_cld_closure;
-  CLDToKlassAndOopClosure                         _clds;
-  G1CodeBlobClosure                               _codeblobs;
-  BufferingOopClosure                             _buffered_oops;
+  G1ParCopyClosure<G1BarrierNone, Mark, use_ext> _oops;
+  G1ParCopyClosure<G1BarrierCLD,  Mark, use_ext> _oops_in_cld;
 
-  G1SharedClosures(G1CollectedHeap* g1h, G1ParScanThreadState* pss, bool process_only_dirty_klasses, bool must_claim_cld) :
+  G1CLDScanClosure                _clds;
+  G1CodeBlobClosure               _codeblobs;
+  BufferingOopClosure             _buffered_oops;
+
+  G1SharedClosures(G1CollectedHeap* g1h, G1ParScanThreadState* pss, bool process_only_dirty, bool must_claim_cld) :
     _oops(g1h, pss),
-    _oop_in_klass(g1h, pss),
-    _klass_in_cld_closure(&_oop_in_klass, process_only_dirty_klasses),
-    _clds(&_klass_in_cld_closure, &_oops, must_claim_cld),
+    _oops_in_cld(g1h, pss),
+    _clds(&_oops_in_cld, process_only_dirty, must_claim_cld),
     _codeblobs(&_oops),
     _buffered_oops(&_oops) {}
 };

--- a/src/hotspot/share/gc/g1/g1StringDedupQueue.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1StringDedupQueue.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -90,7 +90,7 @@
     }
   } else {
     // Queue is full, drop the string and update the statistics
-    Atomic::inc_ptr(&_queue->_dropped);
+    Atomic::inc(&_queue->_dropped);
   }
 }

--- a/src/hotspot/share/gc/g1/heapRegionType.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/g1/heapRegionType.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -32,6 +32,8 @@
   assert(is_valid((tag)), "invalid HR type: %u", (uint) (tag))
 
 class HeapRegionType VALUE_OBJ_CLASS_SPEC {
+friend class VMStructs;
+
 private:
   // We encode the value of the heap region type so the generation can be
   // determined quickly. The tag is split into two parts:

--- a/src/hotspot/share/gc/g1/vmStructs_g1.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/g1/vmStructs_g1.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,10 @@
   static_field(HeapRegion, GrainBytes,        size_t)                         \
   static_field(HeapRegion, LogOfHRGrainBytes, int)                            \
                                                                               \
+  nonstatic_field(HeapRegion, _type,          HeapRegionType)                 \
+                                                                              \
+  nonstatic_field(HeapRegionType, _tag,       HeapRegionType::Tag volatile)   \
+                                                                              \
   nonstatic_field(G1ContiguousSpace, _top,              HeapWord* volatile)   \
                                                                               \
   nonstatic_field(G1HeapRegionTable, _base,             address)              \
@@ -67,9 +71,16 @@
 
 
 #define VM_INT_CONSTANTS_G1(declare_constant, declare_constant_with_value)    \
+  declare_constant(HeapRegionType::FreeTag)                                   \
+  declare_constant(HeapRegionType::YoungMask)                                 \
+  declare_constant(HeapRegionType::HumongousMask)                             \
+  declare_constant(HeapRegionType::PinnedMask)                                \
+  declare_constant(HeapRegionType::OldMask)
 
 
-#define VM_TYPES_G1(declare_type, declare_toplevel_type)                      \
+#define VM_TYPES_G1(declare_type,                                             \
+                    declare_toplevel_type,                                    \
+                    declare_integer_type)                                     \
                                                                               \
   declare_toplevel_type(G1HeapRegionTable)                                    \
                                                                               \
@@ -81,9 +92,12 @@
   declare_toplevel_type(HeapRegionSetBase)                                    \
   declare_toplevel_type(G1MonitoringSupport)                                  \
   declare_toplevel_type(PtrQueue)                                             \
+  declare_toplevel_type(HeapRegionType)                                       \
                                                                               \
   declare_toplevel_type(G1CollectedHeap*)                                     \
   declare_toplevel_type(HeapRegion*)                                          \
   declare_toplevel_type(G1MonitoringSupport*)                                 \
+                                                                              \
+  declare_integer_type(HeapRegionType::Tag volatile)
 
 #endif // SHARE_VM_GC_G1_VMSTRUCTS_G1_HPP

--- a/src/hotspot/share/gc/parallel/parMarkBitMap.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/parMarkBitMap.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -89,7 +89,7 @@
     const idx_t end_bit = addr_to_bit(addr + size - 1);
     bool end_bit_ok = _end_bits.par_set_bit(end_bit);
     assert(end_bit_ok, "concurrency problem");
-    DEBUG_ONLY(Atomic::inc_ptr(&mark_bitmap_count));
+    DEBUG_ONLY(Atomic::inc(&mark_bitmap_count));
     DEBUG_ONLY(Atomic::add_ptr(size, &mark_bitmap_size));
     return true;
   }

--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -574,16 +574,10 @@
 }
 
 void ParallelScavengeHeap::print_tracing_info() const {
-  if (TraceYoungGenTime) {
-    double time = PSScavenge::accumulated_time()->seconds();
-    tty->print_cr("[Accumulated GC generation 0 time %3.7f secs]", time);
-  }
-  if (TraceOldGenTime) {
-    double time = UseParallelOldGC ? PSParallelCompact::accumulated_time()->seconds() : PSMarkSweep::accumulated_time()->seconds();
-    tty->print_cr("[Accumulated GC generation 1 time %3.7f secs]", time);
-  }
-
   AdaptiveSizePolicyOutput::print();
+  log_debug(gc, heap, exit)("Accumulated young generation GC time %3.7f secs", PSScavenge::accumulated_time()->seconds());
+  log_debug(gc, heap, exit)("Accumulated old generation GC time %3.7f secs",
+      UseParallelOldGC ? PSParallelCompact::accumulated_time()->seconds() : PSMarkSweep::accumulated_time()->seconds());
 }

--- a/src/hotspot/share/gc/parallel/pcTasks.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/pcTasks.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -81,7 +81,6 @@
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
   ParCompactionManager::MarkAndPushClosure mark_and_push_closure(cm);
-  ParCompactionManager::FollowKlassClosure follow_klass_closure(&mark_and_push_closure);
 
   switch (_root_type) {
     case universe:
@@ -117,7 +116,7 @@
       break;
 
     case class_loader_data:
-      ClassLoaderDataGraph::always_strong_oops_do(&mark_and_push_closure, &follow_klass_closure, true);
+      ClassLoaderDataGraph::always_strong_oops_do(&mark_and_push_closure, true);
       break;
 
     case code_cache:

--- a/src/hotspot/share/gc/parallel/psCompactionManager.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/psCompactionManager.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -196,17 +196,6 @@
     FollowStackClosure(ParCompactionManager* cm) : _compaction_manager(cm) { }
     virtual void do_void();
   };
-
-  // The one and only place to start following the classes.
-  // Should only be applied to the ClassLoaderData klasses list.
-  class FollowKlassClosure : public KlassClosure {
-   private:
-    MarkAndPushClosure* _mark_and_push_closure;
-   public:
-    FollowKlassClosure(MarkAndPushClosure* mark_and_push_closure) :
-        _mark_and_push_closure(mark_and_push_closure) { }
-    void do_klass(Klass* klass);
-  };
 };
 
 inline ParCompactionManager* ParCompactionManager::manager_array(uint index) {

--- a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -98,15 +98,10 @@
   _compaction_manager->follow_marking_stacks();
 }
 
-inline void ParCompactionManager::FollowKlassClosure::do_klass(Klass* klass) {
-  klass->oops_do(_mark_and_push_closure);
-}
-
 inline void ParCompactionManager::follow_class_loader(ClassLoaderData* cld) {
   MarkAndPushClosure mark_and_push_closure(this);
-  FollowKlassClosure follow_klass_closure(&mark_and_push_closure);
 
-  cld->oops_do(&mark_and_push_closure, &follow_klass_closure, true);
+  cld->oops_do(&mark_and_push_closure, true);
 }
 
 inline void ParCompactionManager::follow_contents(oop obj) {

--- a/src/hotspot/share/gc/parallel/psMarkSweep.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/psMarkSweep.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -173,7 +173,9 @@
     TraceCollectorStats tcs(counters());
     TraceMemoryManagerStats tms(true /* Full GC */,gc_cause);
 
-    if (TraceOldGenTime) accumulated_time()->start();
+    if (log_is_enabled(Debug, gc, heap, exit)) {
+      accumulated_time()->start();
+    }
 
     // Let the size policy know we're starting
     size_policy->major_collection_begin();
@@ -342,7 +344,9 @@
     // We collected the heap, recalculate the metaspace capacity
     MetaspaceGC::compute_new_size();
 
-    if (TraceOldGenTime) accumulated_time()->stop();
+    if (log_is_enabled(Debug, gc, heap, exit)) {
+      accumulated_time()->stop();
+    }
 
     young_gen->print_used_change(young_gen_prev_used);
     old_gen->print_used_change(old_gen_prev_used);

--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -520,7 +520,7 @@
   const size_t beg_region = obj_ofs >> Log2RegionSize;
   const size_t end_region = (obj_ofs + len - 1) >> Log2RegionSize;
 
-  DEBUG_ONLY(Atomic::inc_ptr(&add_obj_count);)
+  DEBUG_ONLY(Atomic::inc(&add_obj_count);)
   DEBUG_ONLY(Atomic::add_ptr(len, &add_obj_size);)
 
   if (beg_region == end_region) {
@@ -838,11 +838,6 @@
 
 bool PSParallelCompact::IsAliveClosure::do_object_b(oop p) { return mark_bitmap()->is_marked(p); }
 
-void PSParallelCompact::AdjustKlassClosure::do_klass(Klass* klass) {
-  PSParallelCompact::AdjustPointerClosure closure(_cm);
-  klass->oops_do(&closure);
-}
-
 void PSParallelCompact::post_initialize() {
   ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
   MemRegion mr = heap->reserved_region();
@@ -1778,7 +1773,9 @@
     TraceCollectorStats tcs(counters());
     TraceMemoryManagerStats tms(true /* Full GC */,gc_cause);
 
-    if (TraceOldGenTime) accumulated_time()->start();
+    if (log_is_enabled(Debug, gc, heap, exit)) {
+      accumulated_time()->start();
+    }
 
     // Let the size policy know we're starting
     size_policy->major_collection_begin();
@@ -1897,7 +1894,7 @@
     // Resize the metaspace capacity after a collection
     MetaspaceGC::compute_new_size();
 
-    if (TraceOldGenTime) {
+    if (log_is_enabled(Debug, gc, heap, exit)) {
       accumulated_time()->stop();
     }
 
@@ -2160,7 +2157,6 @@
   ClassLoaderDataGraph::clear_claimed_marks();
 
   PSParallelCompact::AdjustPointerClosure oop_closure(cm);
-  PSParallelCompact::AdjustKlassClosure klass_closure(cm);
 
   // General strong roots.
   Universe::oops_do(&oop_closure);
@@ -2170,7 +2166,7 @@
   Management::oops_do(&oop_closure);
   JvmtiExport::oops_do(&oop_closure);
   SystemDictionary::oops_do(&oop_closure);
-  ClassLoaderDataGraph::oops_do(&oop_closure, &klass_closure, true);
+  ClassLoaderDataGraph::oops_do(&oop_closure, true);
 
   // Now adjust pointers in remaining weak roots.  (All of which should
   // have been cleared if they pointed to non-surviving objects.)

--- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -517,7 +517,7 @@
   OrderAccess::release();
   _blocks_filled = true;
   // Debug builds count the number of times the table was filled.
-  DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count));
+  DEBUG_ONLY(Atomic::inc(&_blocks_filled_count));
 }
 
 inline void

--- a/src/hotspot/share/gc/parallel/psScavenge.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/psScavenge.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -306,7 +306,9 @@
     TraceCollectorStats tcs(counters());
     TraceMemoryManagerStats tms(false /* not full GC */,gc_cause);
 
-    if (TraceYoungGenTime) accumulated_time()->start();
+    if (log_is_enabled(Debug, gc, heap, exit)) {
+      accumulated_time()->start();
+    }
 
     // Let the size policy know we're starting
     size_policy->minor_collection_begin();
@@ -607,7 +609,9 @@
       CardTableExtension::verify_all_young_refs_imprecise();
     }
 
-    if (TraceYoungGenTime) accumulated_time()->stop();
+    if (log_is_enabled(Debug, gc, heap, exit)) {
+      accumulated_time()->stop();
+    }
 
     young_gen->print_used_change(pre_gc_values.young_gen_used());
     old_gen->print_used_change(pre_gc_values.old_gen_used());

--- a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,15 +85,15 @@
 typedef PSRootsClosure</*promote_immediately=*/false> PSScavengeRootsClosure;
 typedef PSRootsClosure</*promote_immediately=*/true> PSPromoteRootsClosure;
 
-// Scavenges a single oop in a Klass.
-class PSScavengeFromKlassClosure: public OopClosure {
+// Scavenges a single oop in a ClassLoaderData.
+class PSScavengeFromCLDClosure: public OopClosure {
  private:
   PSPromotionManager* _pm;
-  // Used to redirty a scanned klass if it has oops
+  // Used to redirty a scanned cld if it has oops
   // pointing to the young generation after being scanned.
-  Klass*             _scanned_klass;
+  ClassLoaderData*    _scanned_cld;
  public:
-  PSScavengeFromKlassClosure(PSPromotionManager* pm) : _pm(pm), _scanned_klass(NULL) { }
+  PSScavengeFromCLDClosure(PSPromotionManager* pm) : _pm(pm), _scanned_cld(NULL) { }
   void do_oop(narrowOop* p) { ShouldNotReachHere(); }
   void do_oop(oop* p)       {
     ParallelScavengeHeap* psh = ParallelScavengeHeap::heap();
@@ -111,48 +111,46 @@
       oopDesc::encode_store_heap_oop_not_null(p, new_obj);
 
       if (PSScavenge::is_obj_in_young(new_obj)) {
-        do_klass_barrier();
+        do_cld_barrier();
       }
     }
   }
 
-  void set_scanned_klass(Klass* klass) {
-    assert(_scanned_klass == NULL || klass == NULL, "Should always only handling one klass at a time");
-    _scanned_klass = klass;
+  void set_scanned_cld(ClassLoaderData* cld) {
+    assert(_scanned_cld == NULL || cld == NULL, "Should always only handling one cld at a time");
+    _scanned_cld = cld;
   }
 
  private:
-  void do_klass_barrier() {
-    assert(_scanned_klass != NULL, "Should not be called without having a scanned klass");
-    _scanned_klass->record_modified_oops();
+  void do_cld_barrier() {
+    assert(_scanned_cld != NULL, "Should not be called without having a scanned cld");
+    _scanned_cld->record_modified_oops();
   }
-
 };
 
-// Scavenges the oop in a Klass.
-class PSScavengeKlassClosure: public KlassClosure {
+// Scavenges the oop in a ClassLoaderData.
+class PSScavengeCLDClosure: public CLDClosure {
  private:
-  PSScavengeFromKlassClosure _oop_closure;
+  PSScavengeFromCLDClosure _oop_closure;
  protected:
  public:
-  PSScavengeKlassClosure(PSPromotionManager* pm) : _oop_closure(pm) { }
-  void do_klass(Klass* klass) {
-    // If the klass has not been dirtied we know that there's
+  PSScavengeCLDClosure(PSPromotionManager* pm) : _oop_closure(pm) { }
+  void do_cld(ClassLoaderData* cld) {
+    // If the cld has not been dirtied we know that there's
     // no references into  the young gen and we can skip it.
 
-    if (klass->has_modified_oops()) {
-      // Clean the klass since we're going to scavenge all the metadata.
-      klass->clear_modified_oops();
-
-      // Setup the promotion manager to redirty this klass
+    if (cld->has_modified_oops()) {
+      // Setup the promotion manager to redirty this cld
       // if references are left in the young gen.
-      _oop_closure.set_scanned_klass(klass);
+      _oop_closure.set_scanned_cld(cld);
 
-      klass->oops_do(&_oop_closure);
+      // Clean the cld since we're going to scavenge all the metadata.
+      cld->oops_do(&_oop_closure, false, /*clear_modified_oops*/true);
 
-      _oop_closure.set_scanned_klass(NULL);
+      _oop_closure.set_scanned_cld(NULL);
     }
   }
 };
 
+
 #endif // SHARE_VM_GC_PARALLEL_PSSCAVENGE_INLINE_HPP

--- a/src/hotspot/share/gc/parallel/psTasks.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/parallel/psTasks.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -79,8 +79,8 @@
 
     case class_loader_data:
     {
-      PSScavengeKlassClosure klass_closure(pm);
-      ClassLoaderDataGraph::oops_do(&roots_closure, &klass_closure, false);
+      PSScavengeCLDClosure cld_closure(pm);
+      ClassLoaderDataGraph::cld_do(&cld_closure);
     }
     break;

--- a/src/hotspot/share/gc/serial/defNewGeneration.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/serial/defNewGeneration.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -121,7 +121,7 @@
 }
 
 ScanClosure::ScanClosure(DefNewGeneration* g, bool gc_barrier) :
-    OopsInKlassOrGenClosure(g), _g(g), _gc_barrier(gc_barrier)
+    OopsInClassLoaderDataOrGenClosure(g), _g(g), _gc_barrier(gc_barrier)
 {
   _boundary = _g->reserved().end();
 }
@@ -130,7 +130,7 @@
 void ScanClosure::do_oop(narrowOop* p) { ScanClosure::do_oop_work(p); }
 
 FastScanClosure::FastScanClosure(DefNewGeneration* g, bool gc_barrier) :
-    OopsInKlassOrGenClosure(g), _g(g), _gc_barrier(gc_barrier)
+    OopsInClassLoaderDataOrGenClosure(g), _g(g), _gc_barrier(gc_barrier)
 {
   _boundary = _g->reserved().end();
 }
@@ -138,30 +138,28 @@
 void FastScanClosure::do_oop(oop* p)       { FastScanClosure::do_oop_work(p); }
 void FastScanClosure::do_oop(narrowOop* p) { FastScanClosure::do_oop_work(p); }
 
-void KlassScanClosure::do_klass(Klass* klass) {
+void CLDScanClosure::do_cld(ClassLoaderData* cld) {
   NOT_PRODUCT(ResourceMark rm);
-  log_develop_trace(gc, scavenge)("KlassScanClosure::do_klass " PTR_FORMAT ", %s, dirty: %s",
-                                  p2i(klass),
-                                  klass->external_name(),
-                                  klass->has_modified_oops() ? "true" : "false");
+  log_develop_trace(gc, scavenge)("CLDScanClosure::do_cld " PTR_FORMAT ", %s, dirty: %s",
+                                  p2i(cld),
+                                  cld->loader_name(),
+                                  cld->has_modified_oops() ? "true" : "false");
 
-  // If the klass has not been dirtied we know that there's
+  // If the cld has not been dirtied we know that there's
   // no references into  the young gen and we can skip it.
-  if (klass->has_modified_oops()) {
+  if (cld->has_modified_oops()) {
     if (_accumulate_modified_oops) {
-      klass->accumulate_modified_oops();
+      cld->accumulate_modified_oops();
     }
 
-    // Clear this state since we're going to scavenge all the metadata.
-    klass->clear_modified_oops();
-
-    // Tell the closure which Klass is being scanned so that it can be dirtied
+    // Tell the closure which CLD is being scanned so that it can be dirtied
     // if oops are left pointing into the young gen.
-    _scavenge_closure->set_scanned_klass(klass);
+    _scavenge_closure->set_scanned_cld(cld);
 
-    klass->oops_do(_scavenge_closure);
+    // Clean the cld since we're going to scavenge all the metadata.
+    cld->oops_do(_scavenge_closure, false, /*clear_modified_oops*/true);
 
-    _scavenge_closure->set_scanned_klass(NULL);
+    _scavenge_closure->set_scanned_cld(NULL);
   }
 }
 
@@ -177,12 +175,6 @@
 void FilteringClosure::do_oop(oop* p)       { FilteringClosure::do_oop_work(p); }
 void FilteringClosure::do_oop(narrowOop* p) { FilteringClosure::do_oop_work(p); }
 
-KlassScanClosure::KlassScanClosure(OopsInKlassOrGenClosure* scavenge_closure,
-                                   KlassRemSet* klass_rem_set)
-    : _scavenge_closure(scavenge_closure),
-      _accumulate_modified_oops(klass_rem_set->accumulate_modified_oops()) {}
-
-
 DefNewGeneration::DefNewGeneration(ReservedSpace rs,
                                    size_t initial_size,
                                    const char* policy)
@@ -629,11 +621,8 @@
   FastScanClosure fsc_with_no_gc_barrier(this, false);
   FastScanClosure fsc_with_gc_barrier(this, true);
 
-  KlassScanClosure klass_scan_closure(&fsc_with_no_gc_barrier,
-                                      gch->rem_set()->klass_rem_set());
-  CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
-                                           &fsc_with_no_gc_barrier,
-                                           false);
+  CLDScanClosure cld_scan_closure(&fsc_with_no_gc_barrier,
+                                  gch->rem_set()->cld_rem_set()->accumulate_modified_oops());
 
   set_promo_failure_scan_stack_closure(&fsc_with_no_gc_barrier);
   FastEvacuateFollowersClosure evacuate_followers(gch,

--- a/src/hotspot/share/gc/shared/cardTableModRefBS.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/shared/cardTableModRefBS.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,10 +30,10 @@
 #include "runtime/orderAccess.inline.hpp"
 
 template <class T> inline void CardTableModRefBS::inline_write_ref_field(T* field, oop newVal, bool release) {
-  jbyte* byte = byte_for((void*)field);
+  volatile jbyte* byte = byte_for((void*)field);
   if (release) {
     // Perform a releasing store if requested.
-    OrderAccess::release_store((volatile jbyte*) byte, dirty_card);
+    OrderAccess::release_store(byte, jbyte(dirty_card));
   } else {
     *byte = dirty_card;
   }

--- a/src/hotspot/share/gc/shared/cardTableRS.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/shared/cardTableRS.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -34,16 +34,16 @@
 #include "runtime/os.hpp"
 #include "utilities/macros.hpp"
 
-class HasAccumulatedModifiedOopsClosure : public KlassClosure {
+class HasAccumulatedModifiedOopsClosure : public CLDClosure {
   bool _found;
  public:
   HasAccumulatedModifiedOopsClosure() : _found(false) {}
-  void do_klass(Klass* klass) {
+  void do_cld(ClassLoaderData* cld) {
     if (_found) {
       return;
     }
 
-    if (klass->has_accumulated_modified_oops()) {
+    if (cld->has_accumulated_modified_oops()) {
       _found = true;
     }
   }
@@ -52,28 +52,29 @@
   }
 };
 
-bool KlassRemSet::mod_union_is_clear() {
+bool CLDRemSet::mod_union_is_clear() {
   HasAccumulatedModifiedOopsClosure closure;
-  ClassLoaderDataGraph::classes_do(&closure);
+  ClassLoaderDataGraph::cld_do(&closure);
 
   return !closure.found();
 }
 
 
-class ClearKlassModUnionClosure : public KlassClosure {
+class ClearCLDModUnionClosure : public CLDClosure {
  public:
-  void do_klass(Klass* klass) {
-    if (klass->has_accumulated_modified_oops()) {
-      klass->clear_accumulated_modified_oops();
+  void do_cld(ClassLoaderData* cld) {
+    if (cld->has_accumulated_modified_oops()) {
+      cld->clear_accumulated_modified_oops();
     }
   }
 };
 
-void KlassRemSet::clear_mod_union() {
-  ClearKlassModUnionClosure closure;
-  ClassLoaderDataGraph::classes_do(&closure);
+void CLDRemSet::clear_mod_union() {
+  ClearCLDModUnionClosure closure;
+  ClassLoaderDataGraph::cld_do(&closure);
 }
 
+
 CardTableRS::CardTableRS(MemRegion whole_heap) :
   _bs(NULL),
   _cur_youngergen_card_val(youngergenP1_card)

--- a/src/hotspot/share/gc/shared/cardTableRS.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/shared/cardTableRS.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,11 +31,11 @@
 class Space;
 class OopsInGenClosure;
 
-// Helper to remember modified oops in all klasses.
-class KlassRemSet {
+// Helper to remember modified oops in all clds.
+class CLDRemSet {
   bool _accumulate_modified_oops;
  public:
-  KlassRemSet() : _accumulate_modified_oops(false) {}
+  CLDRemSet() : _accumulate_modified_oops(false) {}
   void set_accumulate_modified_oops(bool value) { _accumulate_modified_oops = value; }
   bool accumulate_modified_oops() { return _accumulate_modified_oops; }
   bool mod_union_is_clear();
@@ -64,7 +64,7 @@
     return CardTableModRefBSForCTRS::card_is_dirty_wrt_gen_iter(cv);
   }
 
-  KlassRemSet _klass_rem_set;
+  CLDRemSet _cld_rem_set;
   BarrierSet* _bs;
 
   CardTableModRefBSForCTRS* _ct_bs;
@@ -121,7 +121,7 @@
   // Set the barrier set.
   void set_bs(BarrierSet* bs) { _bs = bs; }
 
-  KlassRemSet* klass_rem_set() { return &_klass_rem_set; }
+  CLDRemSet* cld_rem_set() { return &_cld_rem_set; }
 
   CardTableModRefBSForCTRS* ct_bs() { return _ct_bs; }

--- a/src/hotspot/share/gc/shared/genCollectedHeap.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/shared/genCollectedHeap.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1157,11 +1157,10 @@
 }
 
 void GenCollectedHeap::print_tracing_info() const {
-  if (TraceYoungGenTime) {
-    _young_gen->print_summary_info();
-  }
-  if (TraceOldGenTime) {
-    _old_gen->print_summary_info();
+  if (log_is_enabled(Debug, gc, heap, exit)) {
+    LogStreamHandle(Debug, gc, heap, exit) lsh;
+    _young_gen->print_summary_info_on(&lsh);
+    _old_gen->print_summary_info_on(&lsh);
   }
 }

--- a/src/hotspot/share/gc/shared/genOopClosures.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/shared/genOopClosures.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -81,24 +81,25 @@
 
 };
 
-// Super class for scan closures. It contains code to dirty scanned Klasses.
-class OopsInKlassOrGenClosure: public OopsInGenClosure {
-  Klass* _scanned_klass;
+// Super class for scan closures. It contains code to dirty scanned class loader data.
+class OopsInClassLoaderDataOrGenClosure: public OopsInGenClosure {
+  ClassLoaderData* _scanned_cld;
  public:
-  OopsInKlassOrGenClosure(Generation* g) : OopsInGenClosure(g), _scanned_klass(NULL) {}
-  void set_scanned_klass(Klass* k) {
-    assert(k == NULL || _scanned_klass == NULL, "Must be");
-    _scanned_klass = k;
+  OopsInClassLoaderDataOrGenClosure(Generation* g) : OopsInGenClosure(g), _scanned_cld(NULL) {}
+  void set_scanned_cld(ClassLoaderData* cld) {
+    assert(cld == NULL || _scanned_cld == NULL, "Must be");
+    _scanned_cld = cld;
   }
-  bool is_scanning_a_klass() { return _scanned_klass != NULL; }
-  void do_klass_barrier();
+  bool is_scanning_a_cld() { return _scanned_cld != NULL; }
+  void do_cld_barrier();
 };
 
+
 // Closure for scanning DefNewGeneration.
 //
 // This closure will perform barrier store calls for ALL
 // pointers in scanned oops.
-class ScanClosure: public OopsInKlassOrGenClosure {
+class ScanClosure: public OopsInClassLoaderDataOrGenClosure {
  protected:
   DefNewGeneration* _g;
   HeapWord*         _boundary;
@@ -117,7 +118,7 @@
 // This closure only performs barrier store calls on
 // pointers into the DefNewGeneration. This is less
 // precise, but faster, than a ScanClosure
-class FastScanClosure: public OopsInKlassOrGenClosure {
+class FastScanClosure: public OopsInClassLoaderDataOrGenClosure {
  protected:
   DefNewGeneration* _g;
   HeapWord*         _boundary;
@@ -131,14 +132,15 @@
   inline void do_oop_nv(narrowOop* p);
 };
 
-class KlassScanClosure: public KlassClosure {
-  OopsInKlassOrGenClosure* _scavenge_closure;
+class CLDScanClosure: public CLDClosure {
+  OopsInClassLoaderDataOrGenClosure*   _scavenge_closure;
   // true if the the modified oops state should be saved.
-  bool                     _accumulate_modified_oops;
+  bool                                 _accumulate_modified_oops;
  public:
-  KlassScanClosure(OopsInKlassOrGenClosure* scavenge_closure,
-                   KlassRemSet* klass_rem_set_policy);
-  void do_klass(Klass* k);
+  CLDScanClosure(OopsInClassLoaderDataOrGenClosure* scavenge_closure,
+                 bool accumulate_modified_oops) :
+       _scavenge_closure(scavenge_closure), _accumulate_modified_oops(accumulate_modified_oops) {}
+  void do_cld(ClassLoaderData* cld);
 };
 
 class FilteringClosure: public ExtendedOopClosure {

--- a/src/hotspot/share/gc/shared/genOopClosures.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/shared/genOopClosures.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -68,9 +68,11 @@
   }
 }
 
-inline void OopsInKlassOrGenClosure::do_klass_barrier() {
-  assert(_scanned_klass != NULL, "Must be");
-  _scanned_klass->record_modified_oops();
+inline void OopsInClassLoaderDataOrGenClosure::do_cld_barrier() {
+  assert(_scanned_cld != NULL, "Must be");
+  if (!_scanned_cld->has_modified_oops()) {
+    _scanned_cld->record_modified_oops();
+  }
 }
 
 // NOTE! Any changes made here should also be made
@@ -87,8 +89,8 @@
       oopDesc::encode_store_heap_oop_not_null(p, new_obj);
     }
 
-    if (is_scanning_a_klass()) {
-      do_klass_barrier();
+    if (is_scanning_a_cld()) {
+      do_cld_barrier();
     } else if (_gc_barrier) {
       // Now call parent closure
       do_barrier(p);
@@ -111,8 +113,8 @@
       oop new_obj = obj->is_forwarded() ? obj->forwardee()
                                         : _g->copy_to_survivor_space(obj);
       oopDesc::encode_store_heap_oop_not_null(p, new_obj);
-      if (is_scanning_a_klass()) {
-        do_klass_barrier();
+      if (is_scanning_a_cld()) {
+        do_cld_barrier();
       } else if (_gc_barrier) {
         // Now call parent closure
         do_barrier(p);

--- a/src/hotspot/share/gc/shared/generation.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/shared/generation.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -94,22 +94,14 @@
               p2i(_virtual_space.high_boundary()));
 }
 
-void Generation::print_summary_info() { print_summary_info_on(tty); }
-
 void Generation::print_summary_info_on(outputStream* st) {
   StatRecord* sr = stat_record();
   double time = sr->accumulated_time.seconds();
-  // I didn't want to change the logging when removing the level concept,
-  // but I guess this logging could say young/old or something instead of 0/1.
-  uint level;
-  if (GenCollectedHeap::heap()->is_young_gen(this)) {
-    level = 0;
-  } else {
-    level = 1;
-  }
-  st->print_cr("[Accumulated GC generation %d time %3.7f secs, "
-               "%u GC's, avg GC time %3.7f]",
-               level, time, sr->invocations,
+  st->print_cr("Accumulated %s generation GC time %3.7f secs, "
+               "%u GC's, avg GC time %3.7f",
+               GenCollectedHeap::heap()->is_young_gen(this) ? "young" : "old" ,
+               time,
+               sr->invocations,
                sr->invocations > 0 ? time / sr->invocations : 0.0);
 }

--- a/src/hotspot/share/gc/shared/generation.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/shared/generation.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -549,7 +549,6 @@
 public:
   StatRecord* stat_record() { return &_stat_record; }
 
-  virtual void print_summary_info();
   virtual void print_summary_info_on(outputStream* st);
 
   // Performance Counter support

--- a/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -272,7 +272,7 @@
 
 double ReferenceProcessorPhaseTimes::ref_proc_time_ms(ReferenceType ref_type) const {
   ASSERT_REF_TYPE(ref_type);
-  return _par_phase_time_ms[ref_type_2_index(ref_type)];
+  return _ref_proc_time_ms[ref_type_2_index(ref_type)];
 }
 
 void ReferenceProcessorPhaseTimes::set_ref_proc_time_ms(ReferenceType ref_type,

--- a/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -412,6 +412,7 @@
       } else if (strcmp(vmField.typeString, "address") == 0 ||
                  strcmp(vmField.typeString, "intptr_t") == 0 ||
                  strcmp(vmField.typeString, "uintptr_t") == 0 ||
+                 strcmp(vmField.typeString, "OopHandle") == 0 ||
                  strcmp(vmField.typeString, "size_t") == 0 ||
                  // All foo* types are addresses.
                  vmField.typeString[strlen(vmField.typeString) - 1] == '*') {

--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,7 @@
 #include "jvmci/vmStructs_compiler_runtime.hpp"
 #include "jvmci/vmStructs_jvmci.hpp"
 #include "oops/oop.hpp"
+#include "oops/oopHandle.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/sharedRuntime.hpp"
@@ -192,7 +193,7 @@
   nonstatic_field(Klass,                       _name,                                         Symbol*)                               \
   nonstatic_field(Klass,                       _prototype_header,                             markOop)                               \
   nonstatic_field(Klass,                       _next_sibling,                                 Klass*)                                \
-  nonstatic_field(Klass,                       _java_mirror,                                  oop)                                   \
+  nonstatic_field(Klass,                       _java_mirror,                                  OopHandle)                             \
   nonstatic_field(Klass,                       _modifier_flags,                               jint)                                  \
   nonstatic_field(Klass,                       _access_flags,                                 AccessFlags)                           \
                                                                                                                                      \
@@ -761,6 +762,14 @@
   declare_constant(VM_Version::ISA_XMONT)               \
   declare_constant(VM_Version::ISA_PAUSE_NSEC)          \
   declare_constant(VM_Version::ISA_VAMASK)              \
+  declare_constant(VM_Version::ISA_SPARC6)              \
+  declare_constant(VM_Version::ISA_DICTUNP)             \
+  declare_constant(VM_Version::ISA_FPCMPSHL)            \
+  declare_constant(VM_Version::ISA_RLE)                 \
+  declare_constant(VM_Version::ISA_SHA3)                \
+  declare_constant(VM_Version::ISA_VIS3C)               \
+  declare_constant(VM_Version::ISA_SPARC5B)             \
+  declare_constant(VM_Version::ISA_MME)                 \
   declare_constant(VM_Version::CPU_FAST_IDIV)           \
   declare_constant(VM_Version::CPU_FAST_RDPC)           \
   declare_constant(VM_Version::CPU_FAST_BIS)            \

--- a/src/hotspot/share/memory/allocation.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/allocation.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -233,7 +233,6 @@
   void print_address_on(outputStream* st) const;  // nonvirtual address printing
 
 #define METASPACE_OBJ_TYPES_DO(f) \
-  f(Unknown) \
   f(Class) \
   f(Symbol) \
   f(TypeArrayU1) \

--- a/src/hotspot/share/memory/filemap.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/filemap.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -182,6 +182,7 @@
   _obj_alignment = ObjectAlignmentInBytes;
   _compact_strings = CompactStrings;
   _narrow_oop_mode = Universe::narrow_oop_mode();
+  _narrow_oop_base = Universe::narrow_oop_base();
   _narrow_oop_shift = Universe::narrow_oop_shift();
   _max_heap_size = MaxHeapSize;
   _narrow_klass_base = Universe::narrow_klass_base();
@@ -687,8 +688,14 @@
 // open archive objects.
 void FileMapInfo::map_heap_regions() {
   if (MetaspaceShared::is_heap_object_archiving_allowed()) {
+      log_info(cds)("Archived narrow_oop_mode = %d, narrow_oop_base = " PTR_FORMAT ", narrow_oop_shift = %d",
+                    narrow_oop_mode(), p2i(narrow_oop_base()), narrow_oop_shift());
+      log_info(cds)("Archived narrow_klass_base = " PTR_FORMAT ", narrow_klass_shift = %d",
+                    p2i(narrow_klass_base()), narrow_klass_shift());
+
     // Check that all the narrow oop and klass encodings match the archive
     if (narrow_oop_mode() != Universe::narrow_oop_mode() ||
+        narrow_oop_base() != Universe::narrow_oop_base() ||
         narrow_oop_shift() != Universe::narrow_oop_shift() ||
         narrow_klass_base() != Universe::narrow_klass_base() ||
         narrow_klass_shift() != Universe::narrow_klass_shift()) {
@@ -697,6 +704,11 @@
                       "The current CompressedOops/CompressedClassPointers encoding differs from "
                       "that archived due to heap size change. The archive was dumped using max heap "
                       "size " UINTX_FORMAT "M.", max_heap_size()/M);
+        log_info(cds)("Current narrow_oop_mode = %d, narrow_oop_base = " PTR_FORMAT ", narrow_oop_shift = %d",
+                      Universe::narrow_oop_mode(), p2i(Universe::narrow_oop_base()),
+                      Universe::narrow_oop_shift());
+        log_info(cds)("Current narrow_klass_base = " PTR_FORMAT ", narrow_klass_shift = %d",
+                      p2i(Universe::narrow_klass_base()), Universe::narrow_klass_shift());
       }
     } else {
       // First, map string regions as closed archive heap regions.

--- a/src/hotspot/share/memory/filemap.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/filemap.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -112,6 +112,7 @@
     int    _version;                  // (from enum, above.)
     size_t _alignment;                // how shared archive should be aligned
     int    _obj_alignment;            // value of ObjectAlignmentInBytes
+    address _narrow_oop_base;         // compressed oop encoding base
     int    _narrow_oop_shift;         // compressed oop encoding shift
     bool   _compact_strings;          // value of CompactStrings
     uintx  _max_heap_size;            // java max heap size during dumping
@@ -203,12 +204,13 @@
   int    version()                    { return _header->_version; }
   size_t alignment()                  { return _header->_alignment; }
   Universe::NARROW_OOP_MODE narrow_oop_mode() { return _header->_narrow_oop_mode; }
-  int    narrow_oop_shift()           { return _header->_narrow_oop_shift; }
-  uintx  max_heap_size()              { return _header->_max_heap_size; }
-  address narrow_klass_base() const   { return _header->_narrow_klass_base; }
+  address narrow_oop_base()    const  { return _header->_narrow_oop_base; }
+  int     narrow_oop_shift()   const  { return _header->_narrow_oop_shift; }
+  uintx   max_heap_size()      const  { return _header->_max_heap_size; }
+  address narrow_klass_base()  const  { return _header->_narrow_klass_base; }
   int     narrow_klass_shift() const  { return _header->_narrow_klass_shift; }
-  struct FileMapHeader* header()      { return _header; }
-  char* misc_data_patching_start()            { return _header->_misc_data_patching_start; }
+  struct  FileMapHeader* header()     { return _header; }
+  char*   misc_data_patching_start()          { return _header->_misc_data_patching_start; }
   void set_misc_data_patching_start(char* p)  { _header->_misc_data_patching_start = p; }
   char* read_only_tables_start()              { return _header->_read_only_tables_start; }
   void set_read_only_tables_start(char* p)    { _header->_read_only_tables_start = p; }

--- a/src/hotspot/share/memory/iterator.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/iterator.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,17 +29,8 @@
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
 
-void KlassToOopClosure::do_klass(Klass* k) {
-  assert(_oop_closure != NULL, "Not initialized?");
-  k->oops_do(_oop_closure);
-}
-
 void CLDToOopClosure::do_cld(ClassLoaderData* cld) {
-  cld->oops_do(_oop_closure, &_klass_closure, _must_claim_cld);
-}
-
-void CLDToKlassAndOopClosure::do_cld(ClassLoaderData* cld) {
-  cld->oops_do(_oop_closure, _klass_closure, _must_claim_cld);
+  cld->oops_do(_oop_closure, _must_claim_cld);
 }
 
 void ObjectToOopClosure::do_object(oop obj) {

--- a/src/hotspot/share/memory/iterator.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/iterator.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -138,67 +138,27 @@
   virtual void do_cld(ClassLoaderData* cld) = 0;
 };
 
-class KlassToOopClosure : public KlassClosure {
-  friend class MetadataAwareOopClosure;
-  friend class MetadataAwareOopsInGenClosure;
-
-  OopClosure* _oop_closure;
-
-  // Used when _oop_closure couldn't be set in an initialization list.
-  void initialize(OopClosure* oop_closure) {
-    assert(_oop_closure == NULL, "Should only be called once");
-    _oop_closure = oop_closure;
-  }
-
- public:
-  KlassToOopClosure(OopClosure* oop_closure = NULL) : _oop_closure(oop_closure) {}
-
-  virtual void do_klass(Klass* k);
-};
 
 class CLDToOopClosure : public CLDClosure {
   OopClosure*       _oop_closure;
-  KlassToOopClosure _klass_closure;
   bool              _must_claim_cld;
 
  public:
   CLDToOopClosure(OopClosure* oop_closure, bool must_claim_cld = true) :
       _oop_closure(oop_closure),
-      _klass_closure(oop_closure),
       _must_claim_cld(must_claim_cld) {}
 
   void do_cld(ClassLoaderData* cld);
 };
 
-class CLDToKlassAndOopClosure : public CLDClosure {
-  friend class G1CollectedHeap;
- protected:
-  OopClosure*   _oop_closure;
-  KlassClosure* _klass_closure;
-  bool          _must_claim_cld;
- public:
-  CLDToKlassAndOopClosure(KlassClosure* klass_closure,
-                          OopClosure* oop_closure,
-                          bool must_claim_cld) :
-                              _oop_closure(oop_closure),
-                              _klass_closure(klass_closure),
-                              _must_claim_cld(must_claim_cld) {}
-  void do_cld(ClassLoaderData* cld);
-};
-
 // The base class for all concurrent marking closures,
 // that participates in class unloading.
 // It's used to proxy through the metadata to the oops defined in them.
 class MetadataAwareOopClosure: public ExtendedOopClosure {
-  KlassToOopClosure _klass_closure;
 
  public:
-  MetadataAwareOopClosure() : ExtendedOopClosure() {
-    _klass_closure.initialize(this);
-  }
-  MetadataAwareOopClosure(ReferenceProcessor* rp) : ExtendedOopClosure(rp) {
-    _klass_closure.initialize(this);
-  }
+  MetadataAwareOopClosure() : ExtendedOopClosure() { }
+  MetadataAwareOopClosure(ReferenceProcessor* rp) : ExtendedOopClosure(rp) { }
 
   bool do_metadata_nv()      { return true; }
   virtual bool do_metadata() { return do_metadata_nv(); }

--- a/src/hotspot/share/memory/iterator.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/iterator.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,10 +37,8 @@
 #include "utilities/debug.hpp"
 
 inline void MetadataAwareOopClosure::do_cld_nv(ClassLoaderData* cld) {
-  assert(_klass_closure._oop_closure == this, "Must be");
-
   bool claim = true;  // Must claim the class loader data before processing.
-  cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
+  cld->oops_do(this, claim);
 }
 
 inline void MetadataAwareOopClosure::do_klass_nv(Klass* k) {

--- a/src/hotspot/share/memory/metaspace.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/metaspace.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1291,7 +1291,7 @@
 }
 
 size_t VirtualSpaceList::free_bytes() {
-  return virtual_space_list()->free_words_in_vs() * BytesPerWord;
+  return current_virtual_space()->free_words_in_vs() * BytesPerWord;
 }
 
 // Allocate another meta virtual space and add it to the list.
@@ -2718,7 +2718,7 @@
 
 
 size_t MetaspaceAux::_capacity_words[] = {0, 0};
-size_t MetaspaceAux::_used_words[] = {0, 0};
+volatile size_t MetaspaceAux::_used_words[] = {0, 0};
 
 size_t MetaspaceAux::free_bytes(Metaspace::MetadataType mdtype) {
   VirtualSpaceList* list = Metaspace::get_space_list(mdtype);
@@ -3103,10 +3103,16 @@
 
   Universe::set_narrow_klass_base(lower_base);
 
-  if ((uint64_t)(higher_address - lower_base) <= UnscaledClassSpaceMax) {
+  // CDS uses LogKlassAlignmentInBytes for narrow_klass_shift. See
+  // MetaspaceShared::initialize_dumptime_shared_and_meta_spaces() for
+  // how dump time narrow_klass_shift is set. Although, CDS can work
+  // with zero-shift mode also, to be consistent with AOT it uses
+  // LogKlassAlignmentInBytes for klass shift so archived java heap objects
+  // can be used at same time as AOT code.
+  if (!UseSharedSpaces
+      && (uint64_t)(higher_address - lower_base) <= UnscaledClassSpaceMax) {
     Universe::set_narrow_klass_shift(0);
   } else {
-    assert(!UseSharedSpaces, "Cannot shift with UseSharedSpaces");
     Universe::set_narrow_klass_shift(LogKlassAlignmentInBytes);
   }
   AOTLoader::set_narrow_klass_shift();
@@ -3325,50 +3331,25 @@
 
 #if INCLUDE_CDS
   if (DumpSharedSpaces) {
-    MetaspaceShared::initialize_shared_rs();
+    MetaspaceShared::initialize_dumptime_shared_and_meta_spaces();
   } else if (UseSharedSpaces) {
-    // If using shared space, open the file that contains the shared space
-    // and map in the memory before initializing the rest of metaspace (so
-    // the addresses don't conflict)
-    address cds_address = NULL;
-    FileMapInfo* mapinfo = new FileMapInfo();
-
-    // Open the shared archive file, read and validate the header. If
-    // initialization fails, shared spaces [UseSharedSpaces] are
-    // disabled and the file is closed.
-    // Map in spaces now also
-    if (mapinfo->initialize() && MetaspaceShared::map_shared_spaces(mapinfo)) {
-      size_t cds_total = MetaspaceShared::core_spaces_size();
-      cds_address = (address)mapinfo->header()->region_addr(0);
+    // If any of the archived space fails to map, UseSharedSpaces
+    // is reset to false. Fall through to the
+    // (!DumpSharedSpaces && !UseSharedSpaces) case to set up class
+    // metaspace.
+    MetaspaceShared::initialize_runtime_shared_and_meta_spaces();
+  }
+
+  if (!DumpSharedSpaces && !UseSharedSpaces)
+#endif // INCLUDE_CDS
+  {
 #ifdef _LP64
-      if (using_class_space()) {
-        char* cds_end = (char*)(cds_address + cds_total);
-        cds_end = (char *)align_up(cds_end, _reserve_alignment);
-        // If UseCompressedClassPointers is set then allocate the metaspace area
-        // above the heap and above the CDS area (if it exists).
-        allocate_metaspace_compressed_klass_ptrs(cds_end, cds_address);
-        // map_heap_regions() compares the current narrow oop and klass encodings
-        // with the archived ones, so it must be done after all encodings are determined.
-        mapinfo->map_heap_regions();
-      }
-#endif // _LP64
-    } else {
-      assert(!mapinfo->is_open() && !UseSharedSpaces,
-             "archive file not closed or shared spaces not disabled.");
-    }
-  }
-#endif // INCLUDE_CDS
-
-#ifdef _LP64
-  if (!UseSharedSpaces && using_class_space()) {
-    if (DumpSharedSpaces) {
-      // Already initialized inside MetaspaceShared::initialize_shared_rs()
-    } else {
+    if (using_class_space()) {
       char* base = (char*)align_up(Universe::heap()->reserved_region().end(), _reserve_alignment);
       allocate_metaspace_compressed_klass_ptrs(base, 0);
     }
+#endif // _LP64
   }
-#endif // _LP64
 
   // Initialize these before initializing the VirtualSpaceList
   _first_chunk_word_size = InitialBootClassLoaderMetaspaceSize / BytesPerWord;

--- a/src/hotspot/share/memory/metaspace.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/metaspace.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -179,6 +179,10 @@
     assert(DumpSharedSpaces, "sanity");
     DEBUG_ONLY(_frozen = true;)
   }
+#ifdef _LP64
+  static void allocate_metaspace_compressed_klass_ptrs(char* requested_addr, address cds_base);
+#endif
+
  private:
 
 #ifdef _LP64
@@ -187,8 +191,6 @@
   // Returns true if can use CDS with metaspace allocated as specified address.
   static bool can_use_cds_with_metaspace_addr(char* metaspace_base, address cds_base);
 
-  static void allocate_metaspace_compressed_klass_ptrs(char* requested_addr, address cds_base);
-
   static void initialize_class_space(ReservedSpace rs);
 #endif
   size_t class_chunk_size(size_t word_size);
@@ -273,7 +275,7 @@
   // Running sum of space in all Metachunks that
   // are being used for metadata. One for each
   // type of Metadata.
-  static size_t _used_words[Metaspace:: MetadataTypeCount];
+  static volatile size_t _used_words[Metaspace:: MetadataTypeCount];
 
  public:
   // Decrement and increment _allocated_capacity_words

--- a/src/hotspot/share/memory/metaspaceShared.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/metaspaceShared.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -165,7 +165,7 @@
   }
 
   void print(size_t total_bytes) const {
-    tty->print_cr("%s space: " SIZE_FORMAT_W(9) " [ %4.1f%% of total] out of " SIZE_FORMAT_W(9) " bytes [%5.1f%% used] at " INTPTR_FORMAT,
+    tty->print_cr("%-3s space: " SIZE_FORMAT_W(9) " [ %4.1f%% of total] out of " SIZE_FORMAT_W(9) " bytes [%5.1f%% used] at " INTPTR_FORMAT,
                   _name, used(), perc(used(), total_bytes), reserved(), perc(used(), reserved()), p2i(_base));
   }
   void print_out_of_space_msg(const char* failing_region, size_t needed_bytes) {
@@ -214,7 +214,42 @@
   return _ro_region.allocate(num_bytes);
 }
 
-void MetaspaceShared::initialize_shared_rs() {
+void MetaspaceShared::initialize_runtime_shared_and_meta_spaces() {
+  assert(UseSharedSpaces, "Must be called when UseSharedSpaces is enabled");
+
+  // If using shared space, open the file that contains the shared space
+  // and map in the memory before initializing the rest of metaspace (so
+  // the addresses don't conflict)
+  address cds_address = NULL;
+  FileMapInfo* mapinfo = new FileMapInfo();
+
+  // Open the shared archive file, read and validate the header. If
+  // initialization fails, shared spaces [UseSharedSpaces] are
+  // disabled and the file is closed.
+  // Map in spaces now also
+  if (mapinfo->initialize() && map_shared_spaces(mapinfo)) {
+    size_t cds_total = core_spaces_size();
+    cds_address = (address)mapinfo->header()->region_addr(0);
+#ifdef _LP64
+    if (Metaspace::using_class_space()) {
+      char* cds_end = (char*)(cds_address + cds_total);
+      cds_end = (char *)align_up(cds_end, Metaspace::reserve_alignment());
+      // If UseCompressedClassPointers is set then allocate the metaspace area
+      // above the heap and above the CDS area (if it exists).
+      Metaspace::allocate_metaspace_compressed_klass_ptrs(cds_end, cds_address);
+      // map_heap_regions() compares the current narrow oop and klass encodings
+      // with the archived ones, so it must be done after all encodings are determined.
+      mapinfo->map_heap_regions();
+    }
+#endif // _LP64
+  } else {
+    assert(!mapinfo->is_open() && !UseSharedSpaces,
+           "archive file not closed or shared spaces not disabled.");
+  }
+}
+
+void MetaspaceShared::initialize_dumptime_shared_and_meta_spaces() {
+  assert(DumpSharedSpaces, "should be called for dump time only");
   const size_t reserve_alignment = Metaspace::reserve_alignment();
   bool large_pages = false; // No large pages when dumping the CDS archive.
   char* shared_base = (char*)align_up((char*)SharedBaseAddress, reserve_alignment);
@@ -223,12 +258,12 @@
   // On 64-bit VM, the heap and class space layout will be the same as if
   // you're running in -Xshare:on mode:
   //
-  //                         +-- SharedBaseAddress (default = 0x800000000)
-  //                         v
-  // +-..---------+----+ ... +----+----+----+----+----+---------------+
-  // |    Heap    | ST |     | MC | RW | RO | MD | OD | class space   |
-  // +-..---------+----+ ... +----+----+----+----+----+---------------+
-  // |<--MaxHeapSize->|     |<-- UnscaledClassSpaceMax = 4GB ------->|
+  //                              +-- SharedBaseAddress (default = 0x800000000)
+  //                              v
+  // +-..---------+---------+ ... +----+----+----+----+----+---------------+
+  // |    Heap    | Archive |     | MC | RW | RO | MD | OD | class space   |
+  // +-..---------+---------+ ... +----+----+----+----+----+---------------+
+  // |<--   MaxHeapSize  -->|     |<-- UnscaledClassSpaceMax = 4GB ------->|
   //
   const uint64_t UnscaledClassSpaceMax = (uint64_t(max_juint) + 1);
   const size_t cds_total = align_down(UnscaledClassSpaceMax, reserve_alignment);
@@ -268,12 +303,9 @@
 
   // Set up compress class pointers.
   Universe::set_narrow_klass_base((address)_shared_rs.base());
-  if (UseAOT || cds_total > UnscaledClassSpaceMax) {
-    // AOT forces narrow_klass_shift=LogKlassAlignmentInBytes
-    Universe::set_narrow_klass_shift(LogKlassAlignmentInBytes);
-  } else {
-    Universe::set_narrow_klass_shift(0);
-  }
+  // Set narrow_klass_shift to be LogKlassAlignmentInBytes. This is consistent
+  // with AOT.
+  Universe::set_narrow_klass_shift(LogKlassAlignmentInBytes);
 
   Metaspace::initialize_class_space(tmp_class_space);
   tty->print_cr("narrow_klass_base = " PTR_FORMAT ", narrow_klass_shift = %d",
@@ -1405,7 +1437,7 @@
   print_heap_region_stats(_string_regions, "st", total_reserved);
   print_heap_region_stats(_open_archive_heap_regions, "oa", total_reserved);
 
-  tty->print_cr("total   : " SIZE_FORMAT_W(9) " [100.0%% of total] out of " SIZE_FORMAT_W(9) " bytes [%5.1f%% used]",
+  tty->print_cr("total    : " SIZE_FORMAT_W(9) " [100.0%% of total] out of " SIZE_FORMAT_W(9) " bytes [%5.1f%% used]",
                  total_bytes, total_reserved, total_u_perc);
 }
 
@@ -1416,7 +1448,7 @@
       char* start = (char*)heap_mem->at(i).start();
       size_t size = heap_mem->at(i).byte_size();
       char* top = start + size;
-      tty->print_cr("%s%d space: " SIZE_FORMAT_W(9) " [ %4.1f%% of total] out of " SIZE_FORMAT_W(9) " bytes [100%% used] at " INTPTR_FORMAT,
+      tty->print_cr("%s%d space: " SIZE_FORMAT_W(9) " [ %4.1f%% of total] out of " SIZE_FORMAT_W(9) " bytes [100.0%% used] at " INTPTR_FORMAT,
                     name, i, size, size/double(total_size)*100.0, size, p2i(start));
 
   }

--- a/src/hotspot/share/memory/metaspaceShared.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/metaspaceShared.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -146,7 +146,8 @@
   static size_t core_spaces_size() {
     return _core_spaces_size;
   }
-  static void initialize_shared_rs() NOT_CDS_RETURN;
+  static void initialize_dumptime_shared_and_meta_spaces() NOT_CDS_RETURN;
+  static void initialize_runtime_shared_and_meta_spaces() NOT_CDS_RETURN;
 
   // Delta of this object from the bottom of the archive.
   static uintx object_delta(void* obj) {

--- a/src/hotspot/share/memory/resourceArea.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/resourceArea.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,15 @@
 #include "memory/resourceArea.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/thread.inline.hpp"
+#include "services/memTracker.hpp"
+
+void ResourceArea::bias_to(MEMFLAGS new_flags) {
+  if (new_flags != _flags) {
+    MemTracker::record_arena_free(_flags);
+    MemTracker::record_new_arena(new_flags);
+    _flags = new_flags;
+  }
+}
 
 //------------------------------ResourceMark-----------------------------------
 debug_only(int ResourceArea::_warned;)      // to suppress multiple warnings

--- a/src/hotspot/share/memory/resourceArea.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/resourceArea.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,11 +49,11 @@
   debug_only(static int _warned;)       // to suppress multiple warnings
 
 public:
-  ResourceArea() : Arena(mtThread) {
+  ResourceArea(MEMFLAGS flags = mtThread) : Arena(flags) {
     debug_only(_nesting = 0;)
   }
 
-  ResourceArea(size_t init_size) : Arena(mtThread, init_size) {
+  ResourceArea(size_t init_size, MEMFLAGS flags = mtThread) : Arena(flags, init_size) {
     debug_only(_nesting = 0;);
   }
 
@@ -70,7 +70,11 @@
     return (char*)Amalloc(size, alloc_failmode);
   }
 
-  debug_only(int nesting() const { return _nesting; });
+  // Bias this resource area to specific memory type
+  // (by default, ResourceArea is tagged as mtThread, per-thread general purpose storage)
+  void bias_to(MEMFLAGS flags);
+
+  debug_only(int nesting() const { return _nesting; })
 };

--- a/src/hotspot/share/memory/universe.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/memory/universe.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1064,44 +1064,40 @@
 
   Universe::_vm_exception = InstanceKlass::cast(k)->allocate_instance(CHECK_false);
 
-  if (!DumpSharedSpaces) {
-    // These are the only Java fields that are currently set during shared space dumping.
-    // We prefer to not handle this generally, so we always reinitialize these detail messages.
-    Handle msg = java_lang_String::create_from_str("Java heap space", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_java_heap, msg());
+  Handle msg = java_lang_String::create_from_str("Java heap space", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_java_heap, msg());
+
+  msg = java_lang_String::create_from_str("Metaspace", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_metaspace, msg());
+  msg = java_lang_String::create_from_str("Compressed class space", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_class_metaspace, msg());
 
-    msg = java_lang_String::create_from_str("Metaspace", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_metaspace, msg());
-    msg = java_lang_String::create_from_str("Compressed class space", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_class_metaspace, msg());
+  msg = java_lang_String::create_from_str("Requested array size exceeds VM limit", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_array_size, msg());
 
-    msg = java_lang_String::create_from_str("Requested array size exceeds VM limit", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_array_size, msg());
+  msg = java_lang_String::create_from_str("GC overhead limit exceeded", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_gc_overhead_limit, msg());
 
-    msg = java_lang_String::create_from_str("GC overhead limit exceeded", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_gc_overhead_limit, msg());
+  msg = java_lang_String::create_from_str("Java heap space: failed reallocation of scalar replaced objects", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_realloc_objects, msg());
 
-    msg = java_lang_String::create_from_str("Java heap space: failed reallocation of scalar replaced objects", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_realloc_objects, msg());
+  msg = java_lang_String::create_from_str("/ by zero", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_arithmetic_exception_instance, msg());
 
-    msg = java_lang_String::create_from_str("/ by zero", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_arithmetic_exception_instance, msg());
-
-    // Setup the array of errors that have preallocated backtrace
-    k = Universe::_out_of_memory_error_java_heap->klass();
-    assert(k->name() == vmSymbols::java_lang_OutOfMemoryError(), "should be out of memory error");
-    ik = InstanceKlass::cast(k);
+  // Setup the array of errors that have preallocated backtrace
+  k = Universe::_out_of_memory_error_java_heap->klass();
+  assert(k->name() == vmSymbols::java_lang_OutOfMemoryError(), "should be out of memory error");
+  ik = InstanceKlass::cast(k);
 
-    int len = (StackTraceInThrowable) ? (int)PreallocatedOutOfMemoryErrorCount : 0;
-    Universe::_preallocated_out_of_memory_error_array = oopFactory::new_objArray(ik, len, CHECK_false);
-    for (int i=0; i<len; i++) {
-      oop err = ik->allocate_instance(CHECK_false);
-      Handle err_h = Handle(THREAD, err);
-      java_lang_Throwable::allocate_backtrace(err_h, CHECK_false);
-      Universe::preallocated_out_of_memory_errors()->obj_at_put(i, err_h());
-    }
-    Universe::_preallocated_out_of_memory_error_avail_count = (jint)len;
+  int len = (StackTraceInThrowable) ? (int)PreallocatedOutOfMemoryErrorCount : 0;
+  Universe::_preallocated_out_of_memory_error_array = oopFactory::new_objArray(ik, len, CHECK_false);
+  for (int i=0; i<len; i++) {
+    oop err = ik->allocate_instance(CHECK_false);
+    Handle err_h = Handle(THREAD, err);
+    java_lang_Throwable::allocate_backtrace(err_h, CHECK_false);
+    Universe::preallocated_out_of_memory_errors()->obj_at_put(i, err_h());
   }
+  Universe::_preallocated_out_of_memory_error_avail_count = (jint)len;
 
   Universe::initialize_known_methods(CHECK_false);

--- a/src/hotspot/share/metaprogramming/primitiveConversions.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/metaprogramming/primitiveConversions.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -167,4 +167,24 @@
   return Cast<T, U>()(x);
 }
 
+// jfloat and jdouble translation to integral types
+
+template<>
+struct PrimitiveConversions::Translate<jdouble> : public TrueType {
+  typedef double Value;
+  typedef int64_t Decayed;
+
+  static Decayed decay(Value x) { return PrimitiveConversions::cast<Decayed>(x); }
+  static Value recover(Decayed x) { return PrimitiveConversions::cast<Value>(x); }
+};
+
+template<>
+struct PrimitiveConversions::Translate<jfloat> : public TrueType {
+  typedef float Value;
+  typedef int32_t Decayed;
+
+  static Decayed decay(Value x) { return PrimitiveConversions::cast<Decayed>(x); }
+  static Value recover(Decayed x) { return PrimitiveConversions::cast<Value>(x); }
+};
+
 #endif // SHARE_VM_METAPROGRAMMING_PRIMITIVECONVERSIONS_HPP

--- a/src/hotspot/share/oops/constantPool.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/oops/constantPool.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -135,6 +135,16 @@
   return (objArrayOop)_cache->resolved_references();
 }
 
+// Called from outside constant pool resolution where a resolved_reference array
+// may not be present.
+objArrayOop ConstantPool::resolved_references_or_null() const {
+  if (_cache == NULL) {
+    return NULL;
+  } else {
+    return (objArrayOop)_cache->resolved_references();
+  }
+}
+
 // Create resolved_references array and mapping array for original cp indexes
 // The ldc bytecode was rewritten to have the resolved reference array index so need a way
 // to map it back for resolving and some unlikely miscellaneous uses.
@@ -284,6 +294,28 @@
     set_resolved_references(NULL);
   }
 }
+
+void ConstantPool::resolve_class_constants(TRAPS) {
+  assert(DumpSharedSpaces, "used during dump time only");
+  // The _cache may be NULL if the _pool_holder klass fails verification
+  // at dump time due to missing dependencies.
+  if (cache() == NULL || reference_map() == NULL) {
+    return; // nothing to do
+  }
+
+  constantPoolHandle cp(THREAD, this);
+  for (int index = 1; index < length(); index++) { // Index 0 is unused
+    if (tag_at(index).is_string()) {
+      Symbol* sym = cp->unresolved_string_at(index);
+      // Look up only. Only resolve references to already interned strings.
+      oop str = StringTable::lookup(sym);
+      if (str != NULL) {
+        int cache_index = cp->cp_to_object_index(index);
+        cp->string_at_put(index, cache_index, str);
+      }
+    }
+  }
+}
 #endif
 
 // CDS support. Create a new resolved_references array.
@@ -712,22 +744,6 @@
   }
 }
 
-bool ConstantPool::resolve_class_constants(TRAPS) {
-  constantPoolHandle cp(THREAD, this);
-  for (int index = 1; index < length(); index++) { // Index 0 is unused
-    if (tag_at(index).is_string()) {
-      Symbol* sym = cp->unresolved_string_at(index);
-      // Look up only. Only resolve references to already interned strings.
-      oop str = StringTable::lookup(sym);
-      if (str != NULL) {
-        int cache_index = cp->cp_to_object_index(index);
-        cp->string_at_put(index, cache_index, str);
-      }
-    }
-  }
-  return true;
-}
-
 Symbol* ConstantPool::exception_message(const constantPoolHandle& this_cp, int which, constantTag tag, oop pending_exception) {
   // Dig out the detailed message to reuse if possible
   Symbol* message = java_lang_Throwable::detail_message(pending_exception);

--- a/src/hotspot/share/oops/constantPool.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/oops/constantPool.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -226,6 +226,7 @@
 
   // resolved strings, methodHandles and callsite objects from the constant pool
   objArrayOop resolved_references()  const;
+  objArrayOop resolved_references_or_null()  const;
   // mapping resolved object array indexes to cp indexes and back.
   int object_to_cp_index(int index)         { return reference_map()->at(index); }
   int cp_to_object_index(int index);
@@ -716,9 +717,9 @@
 
   // CDS support
   void archive_resolved_references(Thread *THREAD) NOT_CDS_JAVA_HEAP_RETURN;
+  void resolve_class_constants(TRAPS) NOT_CDS_JAVA_HEAP_RETURN;
   void remove_unshareable_info();
   void restore_unshareable_info(TRAPS);
-  bool resolve_class_constants(TRAPS);
   // The ConstantPool vtable is restored by this call when the ConstantPool is
   // in the shared archive.  See patch_klass_vtables() in metaspaceShared.cpp for
   // all the gory details.  SA, dtrace and pstack helpers distinguish metadata

--- a/src/hotspot/share/oops/instanceKlass.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/oops/instanceKlass.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -285,6 +285,9 @@
     java_lang_Class::set_klass(java_mirror(), NULL);
   }
 
+  // Also remove mirror from handles
+  loader_data->remove_handle(_java_mirror);
+
   // Need to take this class off the class loader data list.
   loader_data->remove_class(this);

--- a/src/hotspot/share/oops/klass.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/oops/klass.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -43,9 +43,16 @@
 #include "trace/traceMacros.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/stack.inline.hpp"
-#if INCLUDE_ALL_GCS
-#include "gc/g1/g1SATBCardTableModRefBS.hpp"
-#endif // INCLUDE_ALL_GCS
+
+void Klass::set_java_mirror(Handle m) {
+  assert(!m.is_null(), "New mirror should never be null.");
+  assert(_java_mirror.resolve() == NULL, "should only be used to initialize mirror");
+  _java_mirror = class_loader_data()->add_handle(m);
+}
+
+oop Klass::java_mirror() const {
+  return _java_mirror.resolve();
+}
 
 bool Klass::is_cloneable() const {
   return _access_flags.is_cloneable_fast() ||
@@ -441,51 +448,6 @@
   }
 }
 
-void Klass::klass_update_barrier_set(oop v) {
-  record_modified_oops();
-}
-
-// This barrier is used by G1 to remember the old oop values, so
-// that we don't forget any objects that were live at the snapshot at
-// the beginning. This function is only used when we write oops into Klasses.
-void Klass::klass_update_barrier_set_pre(oop* p, oop v) {
-#if INCLUDE_ALL_GCS
-  if (UseG1GC) {
-    oop obj = *p;
-    if (obj != NULL) {
-      G1SATBCardTableModRefBS::enqueue(obj);
-    }
-  }
-#endif
-}
-
-void Klass::klass_oop_store(oop* p, oop v) {
-  assert(!Universe::heap()->is_in_reserved((void*)p), "Should store pointer into metadata");
-  assert(v == NULL || Universe::heap()->is_in_reserved((void*)v), "Should store pointer to an object");
-
-  // do the store
-  if (always_do_update_barrier) {
-    klass_oop_store((volatile oop*)p, v);
-  } else {
-    klass_update_barrier_set_pre(p, v);
-    *p = v;
-    klass_update_barrier_set(v);
-  }
-}
-
-void Klass::klass_oop_store(volatile oop* p, oop v) {
-  assert(!Universe::heap()->is_in_reserved((void*)p), "Should store pointer into metadata");
-  assert(v == NULL || Universe::heap()->is_in_reserved((void*)v), "Should store pointer to an object");
-
-  klass_update_barrier_set_pre((oop*)p, v); // Cast away volatile.
-  OrderAccess::release_store_ptr(p, v);
-  klass_update_barrier_set(v);
-}
-
-void Klass::oops_do(OopClosure* cl) {
-  cl->do_oop(&_java_mirror);
-}
-
 void Klass::metaspace_pointers_do(MetaspaceClosure* it) {
   if (log_is_enabled(Trace, cds)) {
     ResourceMark rm;
@@ -532,7 +494,8 @@
     ResourceMark rm;
     log_trace(cds, unshareable)("remove java_mirror: %s", external_name());
   }
-  set_java_mirror(NULL);
+  // Just null out the mirror.  The class_loader_data() no longer exists.
+  _java_mirror = NULL;
 }
 
 void Klass::restore_unshareable_info(ClassLoaderData* loader_data, Handle protection_domain, TRAPS) {

--- a/src/hotspot/share/oops/klass.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/oops/klass.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -30,6 +30,7 @@
 #include "memory/memRegion.hpp"
 #include "oops/metadata.hpp"
 #include "oops/oop.hpp"
+#include "oops/oopHandle.hpp"
 #include "trace/traceMacros.hpp"
 #include "utilities/accessFlags.hpp"
 #include "utilities/macros.hpp"
@@ -119,7 +120,7 @@
   // Ordered list of all primary supertypes
   Klass*      _primary_supers[_primary_super_limit];
   // java/lang/Class instance mirroring this class
-  oop       _java_mirror;
+  OopHandle _java_mirror;
   // Superclass
   Klass*      _super;
   // First subclass (NULL if none); _subklass->next_sibling() is next one
@@ -148,10 +149,6 @@
   // vtable length
   int _vtable_len;
 
-  // Remembered sets support for the oops in the klasses.
-  jbyte _modified_oops;             // Card Table Equivalent (YC/CMS support)
-  jbyte _accumulated_modified_oops; // Mod Union Equivalent (CMS support)
-
 private:
   // This is an index into FileMapHeader::_classpath_entry_table[], to
   // associate this class with the JAR file where it's loaded from during
@@ -228,13 +225,15 @@
     }
   }
 
-  // store an oop into a field of a Klass
-  void klass_oop_store(oop* p, oop v);
-  void klass_oop_store(volatile oop* p, oop v);
+  // java mirror
+  oop java_mirror() const;
+  void set_java_mirror(Handle m);
 
-  // java mirror
-  oop java_mirror() const              { return _java_mirror; }
-  void set_java_mirror(oop m) { klass_oop_store(&_java_mirror, m); }
+  // Temporary mirror switch used by RedefineClasses
+  // Both mirrors are on the ClassLoaderData::_handles list already so no
+  // barriers are needed.
+  void set_java_mirror_handle(OopHandle mirror) { _java_mirror = mirror; }
+  OopHandle java_mirror_handle() const          { return _java_mirror; }
 
   // modifier flags
   jint modifier_flags() const          { return _modifier_flags; }
@@ -260,17 +259,6 @@
   ClassLoaderData* class_loader_data() const               { return _class_loader_data; }
   void set_class_loader_data(ClassLoaderData* loader_data) {  _class_loader_data = loader_data; }
 
-  // The Klasses are not placed in the Heap, so the Card Table or
-  // the Mod Union Table can't be used to mark when klasses have modified oops.
-  // The CT and MUT bits saves this information for the individual Klasses.
-  void record_modified_oops()            { _modified_oops = 1; }
-  void clear_modified_oops()             { _modified_oops = 0; }
-  bool has_modified_oops()               { return _modified_oops == 1; }
-
-  void accumulate_modified_oops()        { if (has_modified_oops()) _accumulated_modified_oops = 1; }
-  void clear_accumulated_modified_oops() { _accumulated_modified_oops = 0; }
-  bool has_accumulated_modified_oops()   { return _accumulated_modified_oops == 1; }
-
   int shared_classpath_index() const   {
     return _shared_class_path_index;
   };
@@ -598,9 +586,6 @@
 
   TRACE_DEFINE_TRACE_ID_METHODS;
 
-  // garbage collection support
-  void oops_do(OopClosure* cl);
-
   virtual void metaspace_pointers_do(MetaspaceClosure* iter);
   virtual MetaspaceObj::Type type() const { return ClassType; }
 
@@ -687,11 +672,6 @@
 
   static Klass* decode_klass_not_null(narrowKlass v);
   static Klass* decode_klass(narrowKlass v);
-
- private:
-  // barriers used by klass_oop_store
-  void klass_update_barrier_set(oop v);
-  void klass_update_barrier_set_pre(oop* p, oop v);
 };
 
 // Helper to convert the oop iterate macro suffixes into bool values that can be used by template functions.

--- a/src/hotspot/share/oops/klassVtable.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/oops/klassVtable.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -479,13 +479,15 @@
           allocate_new = false;
         }
 
-        if (checkconstraints) {
-        // Override vtable entry if passes loader constraint check
-        // if loader constraint checking requested
-        // No need to visit his super, since he and his super
-        // have already made any needed loader constraints.
-        // Since loader constraints are transitive, it is enough
-        // to link to the first super, and we get all the others.
+        // Do not check loader constraints for overpass methods because overpass
+        // methods are created by the jvm to throw exceptions.
+        if (checkconstraints && !target_method()->is_overpass()) {
+          // Override vtable entry if passes loader constraint check
+          // if loader constraint checking requested
+          // No need to visit his super, since he and his super
+          // have already made any needed loader constraints.
+          // Since loader constraints are transitive, it is enough
+          // to link to the first super, and we get all the others.
           Handle super_loader(THREAD, super_klass->class_loader());
 
           if (target_loader() != super_loader()) {
@@ -495,21 +497,23 @@
                                                         super_loader, true,
                                                         CHECK_(false));
             if (failed_type_symbol != NULL) {
-              const char* msg = "loader constraint violation: when resolving "
-                "overridden method \"%s\" the class loader (instance"
-                " of %s) of the current class, %s, and its superclass loader "
-                "(instance of %s), have different Class objects for the type "
-                "%s used in the signature";
+              const char* msg = "loader constraint violation for class %s: when selecting "
+                "overriding method \"%s\" the class loader (instance of %s) of the "
+                "selected method's type %s, and the class loader (instance of %s) for its super "
+                "type %s have different Class objects for the type %s used in the signature";
+              char* curr_class = klass->name()->as_C_string();
               char* sig = target_method()->name_and_sig_as_C_string();
               const char* loader1 = SystemDictionary::loader_name(target_loader());
-              char* current = target_klass->name()->as_C_string();
+              char* sel_class = target_klass->name()->as_C_string();
               const char* loader2 = SystemDictionary::loader_name(super_loader());
+              char* super_class = super_klass->name()->as_C_string();
               char* failed_type_name = failed_type_symbol->as_C_string();
-              size_t buflen = strlen(msg) + strlen(sig) + strlen(loader1) +
-                strlen(current) + strlen(loader2) + strlen(failed_type_name);
+              size_t buflen = strlen(msg) + strlen(curr_class) + strlen(sig) +
+                strlen(loader1) + strlen(sel_class) + strlen(loader2) +
+                strlen(super_class) + strlen(failed_type_name);
               char* buf = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, buflen);
-              jio_snprintf(buf, buflen, msg, sig, loader1, current, loader2,
-                           failed_type_name);
+              jio_snprintf(buf, buflen, msg, curr_class, sig, loader1, sel_class, loader2,
+                           super_class, failed_type_name);
               THROW_MSG_(vmSymbols::java_lang_LinkageError(), buf, false);
             }
           }
@@ -1193,13 +1197,15 @@
       // to correctly enforce loader constraints for interface method inheritance
       target = LinkResolver::lookup_instance_method_in_klasses(_klass, m->name(), m->signature(), CHECK);
     }
-    if (target == NULL || !target->is_public() || target->is_abstract()) {
-      // Entry does not resolve. Leave it empty for AbstractMethodError.
-        if (!(target == NULL) && !target->is_public()) {
-          // Stuff an IllegalAccessError throwing method in there instead.
-          itableOffsetEntry::method_entry(_klass, method_table_offset)[m->itable_index()].
-              initialize(Universe::throw_illegal_access_error());
-        }
+    if (target == NULL || !target->is_public() || target->is_abstract() || target->is_overpass()) {
+      assert(target == NULL || !target->is_overpass() || target->is_public(),
+             "Non-public overpass method!");
+      // Entry does not resolve. Leave it empty for AbstractMethodError or other error.
+      if (!(target == NULL) && !target->is_public()) {
+        // Stuff an IllegalAccessError throwing method in there instead.
+        itableOffsetEntry::method_entry(_klass, method_table_offset)[m->itable_index()].
+            initialize(Universe::throw_illegal_access_error());
+      }
     } else {
       // Entry did resolve, check loader constraints before initializing
       // if checkconstraints requested
@@ -1213,24 +1219,24 @@
                                                       interface_loader,
                                                       true, CHECK);
           if (failed_type_symbol != NULL) {
-            const char* msg = "loader constraint violation in interface "
-              "itable initialization: when resolving method \"%s\" the class"
-              " loader (instance of %s) of the current class, %s, "
-              "and the class loader (instance of %s) for interface "
-              "%s have different Class objects for the type %s "
-              "used in the signature";
-            char* sig = target()->name_and_sig_as_C_string();
-            const char* loader1 = SystemDictionary::loader_name(method_holder_loader());
+            const char* msg = "loader constraint violation in interface itable"
+              " initialization for class %s: when selecting method \"%s\" the"
+              " class loader (instance of %s) for super interface %s, and the class"
+              " loader (instance of %s) of the selected method's type, %s have"
+              " different Class objects for the type %s used in the signature";
             char* current = _klass->name()->as_C_string();
-            const char* loader2 = SystemDictionary::loader_name(interface_loader());
+            char* sig = m->name_and_sig_as_C_string();
+            const char* loader1 = SystemDictionary::loader_name(interface_loader());
             char* iface = InstanceKlass::cast(interf)->name()->as_C_string();
+            const char* loader2 = SystemDictionary::loader_name(method_holder_loader());
+            char* mclass = target()->method_holder()->name()->as_C_string();
             char* failed_type_name = failed_type_symbol->as_C_string();
-            size_t buflen = strlen(msg) + strlen(sig) + strlen(loader1) +
-              strlen(current) + strlen(loader2) + strlen(iface) +
+            size_t buflen = strlen(msg) + strlen(current) + strlen(sig) +
+              strlen(loader1) + strlen(iface) + strlen(loader2) + strlen(mclass) +
               strlen(failed_type_name);
             char* buf = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, buflen);
-            jio_snprintf(buf, buflen, msg, sig, loader1, current, loader2,
-                         iface, failed_type_name);
+            jio_snprintf(buf, buflen, msg, current, sig, loader1, iface,
+                         loader2, mclass, failed_type_name);
             THROW_MSG(vmSymbols::java_lang_LinkageError(), buf);
           }
         }

--- a/src/hotspot/share/oops/oop.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/oops/oop.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -501,7 +501,7 @@
 void oopDesc::release_char_field_put(int offset, jchar contents)      { OrderAccess::release_store(char_field_addr(offset), contents); }
 
 jboolean oopDesc::bool_field_acquire(int offset) const                { return OrderAccess::load_acquire(bool_field_addr(offset));     }
-void oopDesc::release_bool_field_put(int offset, jboolean contents)   { OrderAccess::release_store(bool_field_addr(offset), (contents & 1)); }
+void oopDesc::release_bool_field_put(int offset, jboolean contents)   { OrderAccess::release_store(bool_field_addr(offset), jboolean(contents & 1)); }
 
 jint oopDesc::int_field_acquire(int offset) const                     { return OrderAccess::load_acquire(int_field_addr(offset));      }
 void oopDesc::release_int_field_put(int offset, jint contents)        { OrderAccess::release_store(int_field_addr(offset), contents);  }

--- a/src/hotspot/share/oops/oopsHierarchy.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/oops/oopsHierarchy.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -177,6 +177,15 @@
             (void)const_cast<oop&>(oop::operator=(o));                     \
             return *this;                                                  \
        }                                                                   \
+   };                                                                      \
+                                                                           \
+   template<>                                                              \
+   struct PrimitiveConversions::Translate<type##Oop> : public TrueType {   \
+     typedef type##Oop Value;                                              \
+     typedef type##OopDesc* Decayed;                                       \
+                                                                           \
+     static Decayed decay(Value x) { return (type##OopDesc*)x.obj(); }     \
+     static Value recover(Decayed x) { return type##Oop(x); }              \
    };
 
 DEF_OOP(instance);

--- a/src/hotspot/share/opto/bytecodeInfo.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/bytecodeInfo.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -644,7 +644,8 @@
       C->log()->elem("inline_level_discount caller='%d' callee='%d'", id1, id2);
     }
   }
-  InlineTree* ilt = new InlineTree(C, this, callee_method, caller_jvms, caller_bci, recur_frequency, _max_inline_level + max_inline_level_adjust);
+  // Allocate in the comp_arena to make sure the InlineTree is live when dumping a replay compilation file
+  InlineTree* ilt = new (C->comp_arena()) InlineTree(C, this, callee_method, caller_jvms, caller_bci, recur_frequency, _max_inline_level + max_inline_level_adjust);
   _subtrees.append(ilt);
 
   NOT_PRODUCT( _count_inlines += 1; )

--- a/src/hotspot/share/opto/c2compiler.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/c2compiler.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -410,6 +410,9 @@
   case vmIntrinsics::_multiplyExactL:
     if (!Matcher::match_rule_supported(Op_OverflowMulL)) return false;
     break;
+  case vmIntrinsics::_multiplyHigh:
+    if (!Matcher::match_rule_supported(Op_MulHiL)) return false;
+    break;
   case vmIntrinsics::_getCallerClass:
     if (SystemDictionary::reflect_CallerSensitive_klass() == NULL) return false;
     break;

--- a/src/hotspot/share/opto/chaitin.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/chaitin.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -348,8 +348,8 @@
   _alternate = 0;
   _matcher._allocation_started = true;
 
-  ResourceArea split_arena;     // Arena for Split local resources
-  ResourceArea live_arena;      // Arena for liveness & IFG info
+  ResourceArea split_arena(mtCompiler);     // Arena for Split local resources
+  ResourceArea live_arena(mtCompiler);      // Arena for liveness & IFG info
   ResourceMark rm(&live_arena);
 
   // Need live-ness for the IFG; need the IFG for coalescing.  If the

--- a/src/hotspot/share/opto/gcm.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/gcm.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1424,7 +1424,7 @@
   // Enabling the scheduler for register pressure plus finding blocks of size to schedule for it
   // is key to enabling this feature.
   PhaseChaitin regalloc(C->unique(), *this, _matcher, true);
-  ResourceArea live_arena;      // Arena for liveness
+  ResourceArea live_arena(mtCompiler);      // Arena for liveness
   ResourceMark rm_live(&live_arena);
   PhaseLive live(*this, regalloc._lrg_map.names(), &live_arena, true);
   PhaseIFG ifg(&live_arena);

--- a/src/hotspot/share/opto/library_call.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/library_call.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -231,6 +231,7 @@
   bool inline_math_addExactL(bool is_increment);
   bool inline_math_multiplyExactI();
   bool inline_math_multiplyExactL();
+  bool inline_math_multiplyHigh();
   bool inline_math_negateExactI();
   bool inline_math_negateExactL();
   bool inline_math_subtractExactI(bool is_decrement);
@@ -549,6 +550,7 @@
   case vmIntrinsics::_incrementExactL:          return inline_math_addExactL(true /* increment */);
   case vmIntrinsics::_multiplyExactI:           return inline_math_multiplyExactI();
   case vmIntrinsics::_multiplyExactL:           return inline_math_multiplyExactL();
+  case vmIntrinsics::_multiplyHigh:             return inline_math_multiplyHigh();
   case vmIntrinsics::_negateExactI:             return inline_math_negateExactI();
   case vmIntrinsics::_negateExactL:             return inline_math_negateExactL();
   case vmIntrinsics::_subtractExactI:           return inline_math_subtractExactI(false /* subtract */);
@@ -1897,6 +1899,11 @@
   return inline_math_overflow<OverflowMulLNode>(argument(0), argument(2));
 }
 
+bool LibraryCallKit::inline_math_multiplyHigh() {
+  set_result(_gvn.transform(new MulHiLNode(argument(0), argument(2))));
+  return true;
+}
+
 Node*
 LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
   // These are the candidate return value:
@@ -3453,7 +3460,8 @@
 // Given a klass oop, load its java mirror (a java.lang.Class oop).
 Node* LibraryCallKit::load_mirror_from_klass(Node* klass) {
   Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset()));
-  return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT, MemNode::unordered);
+  Node* load = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS, MemNode::unordered);
+  return make_load(NULL, load, TypeInstPtr::MIRROR, T_OBJECT, MemNode::unordered);
 }
 
 //-----------------------load_klass_from_mirror_common-------------------------

--- a/src/hotspot/share/opto/loopTransform.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/loopTransform.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -666,7 +666,7 @@
   _local_loop_unroll_limit = LoopUnrollLimit;
   _local_loop_unroll_factor = 4;
   int future_unroll_ct = cl->unrolled_count() * 2;
-  if (!cl->do_unroll_only()) {
+  if (!cl->is_vectorized_loop()) {
     if (future_unroll_ct > LoopMaxUnroll) return false;
   } else {
     // obey user constraints on vector mapped loops with additional unrolling applied

--- a/src/hotspot/share/opto/loopopts.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/loopopts.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -311,6 +311,7 @@
       }
       return NULL;
     }
+    assert(m->is_Phi() || is_dominator(get_ctrl(m), n_ctrl), "m has strange control");
   }
 
   return n_ctrl;
@@ -615,6 +616,7 @@
   // Now replace all Phis with CMOV's
   Node *cmov_ctrl = iff->in(0);
   uint flip = (lp->Opcode() == Op_IfTrue);
+  Node_List wq;
   while (1) {
     PhiNode* phi = NULL;
     for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
@@ -627,17 +629,21 @@
     if (phi == NULL)  break;
     if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); }
     // Move speculative ops
-    for (uint j = 1; j < region->req(); j++) {
-      Node *proj = region->in(j);
-      Node *inp = phi->in(j);
-      if (get_ctrl(inp) == proj) { // Found local op
+    wq.push(phi);
+    while (wq.size() > 0) {
+      Node *n = wq.pop();
+      for (uint j = 1; j < n->req(); j++) {
+        Node* m = n->in(j);
+        if (m != NULL && !is_dominator(get_ctrl(m), cmov_ctrl)) {
 #ifndef PRODUCT
-        if (PrintOpto && VerifyLoopOptimizations) {
-          tty->print("  speculate: ");
-          inp->dump();
+          if (PrintOpto && VerifyLoopOptimizations) {
+            tty->print("  speculate: ");
+            m->dump();
+          }
+#endif
+          set_ctrl(m, cmov_ctrl);
+          wq.push(m);
         }
-#endif
-        set_ctrl(inp, cmov_ctrl);
       }
     }
     Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi));

--- a/src/hotspot/share/opto/machnode.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/machnode.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -115,6 +115,18 @@
   ConditionRegister as_ConditionRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
     return ::as_ConditionRegister(reg(ra_, node, idx));
   }
+  VectorRegister as_VectorRegister(PhaseRegAlloc *ra_, const Node *node) const {
+    return ::as_VectorRegister(reg(ra_, node));
+  }
+  VectorRegister as_VectorRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
+    return ::as_VectorRegister(reg(ra_, node, idx));
+  }
+  VectorSRegister as_VectorSRegister(PhaseRegAlloc *ra_, const Node *node) const {
+    return ::as_VectorSRegister(reg(ra_, node));
+  }
+  VectorSRegister as_VectorSRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
+    return ::as_VectorSRegister(reg(ra_, node, idx));
+  }
 #endif
 
   virtual intptr_t  constant() const;

--- a/src/hotspot/share/opto/matcher.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/matcher.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -69,7 +69,7 @@
   _register_save_type(register_save_type),
   _ruleName(ruleName),
   _allocation_started(false),
-  _states_arena(Chunk::medium_size),
+  _states_arena(Chunk::medium_size, mtCompiler),
   _visited(&_states_arena),
   _shared(&_states_arena),
   _dontcare(&_states_arena) {

--- a/src/hotspot/share/opto/memnode.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/memnode.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1771,6 +1771,23 @@
             Opcode() == Op_LoadKlass,
             "Field accesses must be precise" );
     // For klass/static loads, we expect the _type to be precise
+  } else if (tp->base() == Type::RawPtr && adr->is_Load() && off == 0) {
+    /* With mirrors being an indirect in the Klass*
+     * the VM is now using two loads. LoadKlass(LoadP(LoadP(Klass, mirror_offset), zero_offset))
+     * The LoadP from the Klass has a RawPtr type (see LibraryCallKit::load_mirror_from_klass).
+     *
+     * So check the type and klass of the node before the LoadP.
+     */
+    Node* adr2 = adr->in(MemNode::Address);
+    const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr();
+    if (tkls != NULL && !StressReflectiveCode) {
+      ciKlass* klass = tkls->klass();
+      if (klass->is_loaded() && tkls->klass_is_exact() && tkls->offset() == in_bytes(Klass::java_mirror_offset())) {
+        assert(adr->Opcode() == Op_LoadP, "must load an oop from _java_mirror");
+        assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror");
+        return TypeInstPtr::make(klass->java_mirror());
+      }
+    }
   }
 
   const TypeKlassPtr *tkls = tp->isa_klassptr();
@@ -1798,12 +1815,6 @@
       }
       const Type* aift = load_array_final_field(tkls, klass);
       if (aift != NULL)  return aift;
-      if (tkls->offset() == in_bytes(Klass::java_mirror_offset())) {
-        // The field is Klass::_java_mirror.  Return its (constant) value.
-        // (Folds up the 2nd indirection in anObjConstant.getClass().)
-        assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror");
-        return TypeInstPtr::make(klass->java_mirror());
-      }
     }
 
     // We can still check if we are loading from the primary_supers array at a
@@ -2203,22 +2214,24 @@
   // This improves reflective code, often making the Class
   // mirror go completely dead.  (Current exception:  Class
   // mirrors may appear in debug info, but we could clean them out by
-  // introducing a new debug info operator for Klass*.java_mirror).
+  // introducing a new debug info operator for Klass.java_mirror).
+
   if (toop->isa_instptr() && toop->klass() == phase->C->env()->Class_klass()
       && offset == java_lang_Class::klass_offset_in_bytes()) {
-    // We are loading a special hidden field from a Class mirror,
-    // the field which points to its Klass or ArrayKlass metaobject.
     if (base->is_Load()) {
-      Node* adr2 = base->in(MemNode::Address);
-      const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr();
-      if (tkls != NULL && !tkls->empty()
-          && (tkls->klass()->is_instance_klass() ||
+      Node* base2 = base->in(MemNode::Address);
+      if (base2->is_Load()) { /* direct load of a load which is the oophandle */
+        Node* adr2 = base2->in(MemNode::Address);
+        const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr();
+        if (tkls != NULL && !tkls->empty()
+            && (tkls->klass()->is_instance_klass() ||
               tkls->klass()->is_array_klass())
-          && adr2->is_AddP()
-          ) {
-        int mirror_field = in_bytes(Klass::java_mirror_offset());
-        if (tkls->offset() == mirror_field) {
-          return adr2->in(AddPNode::Base);
+            && adr2->is_AddP()
+           ) {
+          int mirror_field = in_bytes(Klass::java_mirror_offset());
+          if (tkls->offset() == mirror_field) {
+            return adr2->in(AddPNode::Base);
+          }
         }
       }
     }

--- a/src/hotspot/share/opto/subnode.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/subnode.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -877,8 +877,8 @@
 }
 
 static inline Node* isa_java_mirror_load(PhaseGVN* phase, Node* n) {
-  // Return the klass node for
-  //   LoadP(AddP(foo:Klass, #java_mirror))
+  // Return the klass node for (indirect load from OopHandle)
+  //   LoadP(LoadP(AddP(foo:Klass, #java_mirror)))
   //   or NULL if not matching.
   if (n->Opcode() != Op_LoadP) return NULL;
 
@@ -886,6 +886,10 @@
   if (!tp || tp->klass() != phase->C->env()->Class_klass()) return NULL;
 
   Node* adr = n->in(MemNode::Address);
+  // First load from OopHandle
+  if (adr->Opcode() != Op_LoadP || !phase->type(adr)->isa_rawptr()) return NULL;
+  adr = adr->in(MemNode::Address);
+
   intptr_t off = 0;
   Node* k = AddPNode::Ideal_base_and_offset(adr, phase, off);
   if (k == NULL)  return NULL;

--- a/src/hotspot/share/opto/superword.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/superword.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -145,6 +145,8 @@
   // Skip any loops already optimized by slp
   if (cl->is_vectorized_loop()) return;
 
+  if (cl->do_unroll_only()) return;
+
   if (cl->is_main_loop()) {
     // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
     CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
@@ -2163,7 +2165,15 @@
 //------------------------------output---------------------------
 // Convert packs into vector node operations
 void SuperWord::output() {
-  if (_packset.length() == 0) return;
+  CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
+  Compile* C = _phase->C;
+  if (_packset.length() == 0) {
+    // Instigate more unrolling for optimization when vectorization fails.
+    C->set_major_progress();
+    cl->set_notpassed_slp();
+    cl->mark_do_unroll_only();
+    return;
+  }
 
 #ifndef PRODUCT
   if (TraceLoopOpts) {
@@ -2172,7 +2182,6 @@
   }
 #endif
 
-  CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
   if (cl->is_main_loop()) {
     // MUST ENSURE main loop's initial value is properly aligned:
     //  (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
@@ -2185,7 +2194,6 @@
     }
   }
 
-  Compile* C = _phase->C;
   uint max_vlen_in_bytes = 0;
   uint max_vlen = 0;
   bool can_process_post_loop = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
@@ -4493,4 +4501,3 @@
 
   return true;
 }
-

--- a/src/hotspot/share/opto/type.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/opto/type.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -67,7 +67,13 @@
   { Bad,             T_ILLEGAL,    "vectorx:",      false, 0,                    relocInfo::none          },  // VectorX
   { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
   { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
-#elif defined(PPC64) || defined(S390)
+#elif defined(PPC64)
+  { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
+  { Bad,             T_ILLEGAL,    "vectord:",      false, Op_RegL,              relocInfo::none          },  // VectorD
+  { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
+  { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
+  { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
+#elif defined(S390)
   { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_RegL,              relocInfo::none          },  // VectorD
   { Bad,             T_ILLEGAL,    "vectorx:",      false, 0,                    relocInfo::none          },  // VectorX

--- a/src/hotspot/share/prims/jvmtiRedefineClasses.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/prims/jvmtiRedefineClasses.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -158,6 +158,11 @@
         ClassLoaderData* cld = _scratch_classes[i]->class_loader_data();
         // Free the memory for this class at class unloading time.  Not before
         // because CMS might think this is still live.
+        InstanceKlass* ik = get_ik(_class_defs[i].klass);
+        if (ik->get_cached_class_file() == _scratch_classes[i]->get_cached_class_file()) {
+          // Don't double-free cached_class_file copied from the original class if error.
+          _scratch_classes[i]->set_cached_class_file(NULL);
+        }
         cld->add_to_deallocate_list(InstanceKlass::cast(_scratch_classes[i]));
       }
     }
@@ -3946,12 +3951,12 @@
   // with them was cached on the scratch class, move to the_class.
   // Note: we still want to do this if nothing needed caching since it
   // should get cleared in the_class too.
-  if (the_class->get_cached_class_file_bytes() == 0) {
+  if (the_class->get_cached_class_file() == 0) {
     // the_class doesn't have a cache yet so copy it
     the_class->set_cached_class_file(scratch_class->get_cached_class_file());
   }
-  else if (scratch_class->get_cached_class_file_bytes() !=
-           the_class->get_cached_class_file_bytes()) {
+  else if (scratch_class->get_cached_class_file() !=
+           the_class->get_cached_class_file()) {
     // The same class can be present twice in the scratch classes list or there
     // are multiple concurrent RetransformClasses calls on different threads.
     // In such cases we have to deallocate scratch_class cached_class_file.

--- a/src/hotspot/share/prims/jvmtiTagMap.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/prims/jvmtiTagMap.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -3026,8 +3026,7 @@
   // Preloaded classes and loader from the system dictionary
   blk.set_kind(JVMTI_HEAP_REFERENCE_SYSTEM_CLASS);
   SystemDictionary::always_strong_oops_do(&blk);
-  KlassToOopClosure klass_blk(&blk);
-  ClassLoaderDataGraph::always_strong_oops_do(&blk, &klass_blk, false);
+  ClassLoaderDataGraph::always_strong_oops_do(&blk, false);
   if (blk.stopped()) {
     return false;
   }

--- a/src/hotspot/share/prims/jvmtiThreadState.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/prims/jvmtiThreadState.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -411,21 +411,21 @@
  private:
   JvmtiThreadState* _state;
   Klass*            _scratch_class;
-  Handle            _scratch_mirror;
+  OopHandle         _scratch_mirror;
 
  public:
   RedefineVerifyMark(Klass* the_class, Klass* scratch_class,
                      JvmtiThreadState *state) : _state(state), _scratch_class(scratch_class)
   {
     _state->set_class_versions_map(the_class, scratch_class);
-    _scratch_mirror = Handle(Thread::current(), _scratch_class->java_mirror());
-    _scratch_class->set_java_mirror(the_class->java_mirror());
+    _scratch_mirror = _scratch_class->java_mirror_handle();
+    _scratch_class->set_java_mirror_handle(the_class->java_mirror_handle());
   }
 
   ~RedefineVerifyMark() {
     // Restore the scratch class's mirror, so when scratch_class is removed
     // the correct mirror pointing to it can be cleared.
-    _scratch_class->set_java_mirror(_scratch_mirror());
+    _scratch_class->set_java_mirror_handle(_scratch_mirror);
     _state->clear_class_versions_map();
   }
 };

--- a/src/hotspot/share/runtime/arguments.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/runtime/arguments.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -377,6 +377,7 @@
   // --- Non-alias flags - sorted by obsolete_in then expired_in:
   { "MaxGCMinorPauseMillis",        JDK_Version::jdk(8), JDK_Version::undefined(), JDK_Version::undefined() },
   { "UseConcMarkSweepGC",           JDK_Version::jdk(9), JDK_Version::undefined(), JDK_Version::undefined() },
+  { "AssumeMP",                     JDK_Version::jdk(10),JDK_Version::undefined(), JDK_Version::undefined() },
   { "MonitorInUseLists",            JDK_Version::jdk(10),JDK_Version::undefined(), JDK_Version::undefined() },
   { "MaxRAMFraction",               JDK_Version::jdk(10),  JDK_Version::undefined(), JDK_Version::undefined() },
   { "MinRAMFraction",               JDK_Version::jdk(10),  JDK_Version::undefined(), JDK_Version::undefined() },
@@ -4476,16 +4477,6 @@
 
   set_shared_spaces_flags();
 
-#if defined(SPARC)
-  // BIS instructions require 'membar' instruction regardless of the number
-  // of CPUs because in virtualized/container environments which might use only 1
-  // CPU, BIS instructions may produce incorrect results.
-
-  if (FLAG_IS_DEFAULT(AssumeMP)) {
-    FLAG_SET_DEFAULT(AssumeMP, true);
-  }
-#endif
-
   // Check the GC selections again.
   if (!check_gc_consistency()) {
     return JNI_EINVAL;

--- a/src/hotspot/share/runtime/atomic.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/runtime/atomic.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -44,7 +44,7 @@
 };
 
 class Atomic : AllStatic {
- public:
+public:
   // Atomic operations on jlong types are not available on all 32-bit
   // platforms. If atomic ops on jlongs are defined here they must only
   // be used from code that verifies they are available at runtime and
@@ -64,24 +64,25 @@
   // we can prove that a weaker form is sufficiently safe.
 
   // Atomically store to a location
-  inline static void store    (jbyte    store_value, jbyte*    dest);
-  inline static void store    (jshort   store_value, jshort*   dest);
-  inline static void store    (jint     store_value, jint*     dest);
-  // See comment above about using jlong atomics on 32-bit platforms
-  inline static void store    (jlong    store_value, jlong*    dest);
-  inline static void store_ptr(intptr_t store_value, intptr_t* dest);
-  inline static void store_ptr(void*    store_value, void*     dest);
+  // The type T must be either a pointer type convertible to or equal
+  // to D, an integral/enum type equal to D, or a type equal to D that
+  // is primitive convertible using PrimitiveConversions.
+  template<typename T, typename D>
+  inline static void store(T store_value, volatile D* dest);
+
+  inline static void store_ptr(intptr_t store_value, volatile intptr_t* dest) {
+    Atomic::store(store_value, dest);
+  }
 
-  inline static void store    (jbyte    store_value, volatile jbyte*    dest);
-  inline static void store    (jshort   store_value, volatile jshort*   dest);
-  inline static void store    (jint     store_value, volatile jint*     dest);
-  // See comment above about using jlong atomics on 32-bit platforms
-  inline static void store    (jlong    store_value, volatile jlong*    dest);
-  inline static void store_ptr(intptr_t store_value, volatile intptr_t* dest);
-  inline static void store_ptr(void*    store_value, volatile void*     dest);
+  inline static void store_ptr(void*    store_value, volatile void*     dest) {
+    Atomic::store(store_value, reinterpret_cast<void* volatile*>(dest));
+  }
 
-  // See comment above about using jlong atomics on 32-bit platforms
-  inline static jlong load(const volatile jlong* src);
+  // Atomically load from a location
+  // The type T must be either a pointer type, an integral/enum type,
+  // or a type that is primitive convertible using PrimitiveConversions.
+  template<typename T>
+  inline static T load(const volatile T* dest);
 
   // Atomically add to a location. Returns updated value. add*() provide:
   // <fence> add-value-to-dest <membar StoreLoad|StoreStore>
@@ -97,29 +98,38 @@
     return add(add_value, reinterpret_cast<char* volatile*>(dest));
   }
 
-  // Atomically increment location. inc*() provide:
+  // Atomically increment location. inc() provide:
   // <fence> increment-dest <membar StoreLoad|StoreStore>
-  inline static void inc    (volatile jint*     dest);
-  inline static void inc    (volatile jshort*   dest);
-  inline static void inc    (volatile size_t*   dest);
-  inline static void inc_ptr(volatile intptr_t* dest);
-  inline static void inc_ptr(volatile void*     dest);
+  // The type D may be either a pointer type, or an integral
+  // type. If it is a pointer type, then the increment is
+  // scaled to the size of the type pointed to by the pointer.
+  template<typename D>
+  inline static void inc(D volatile* dest);
 
-  // Atomically decrement a location. dec*() provide:
+  // Atomically decrement a location. dec() provide:
   // <fence> decrement-dest <membar StoreLoad|StoreStore>
-  inline static void dec    (volatile jint*     dest);
-  inline static void dec    (volatile jshort*   dest);
-  inline static void dec    (volatile size_t*   dest);
-  inline static void dec_ptr(volatile intptr_t* dest);
-  inline static void dec_ptr(volatile void*     dest);
+  // The type D may be either a pointer type, or an integral
+  // type. If it is a pointer type, then the decrement is
+  // scaled to the size of the type pointed to by the pointer.
+  template<typename D>
+  inline static void dec(D volatile* dest);
 
   // Performs atomic exchange of *dest with exchange_value. Returns old
   // prior value of *dest. xchg*() provide:
   // <fence> exchange-value-with-dest <membar StoreLoad|StoreStore>
-  inline static jint         xchg    (jint         exchange_value, volatile jint*         dest);
-  inline static unsigned int xchg    (unsigned int exchange_value, volatile unsigned int* dest);
-  inline static intptr_t     xchg_ptr(intptr_t     exchange_value, volatile intptr_t*     dest);
-  inline static void*        xchg_ptr(void*        exchange_value, volatile void*         dest);
+  // The type T must be either a pointer type convertible to or equal
+  // to D, an integral/enum type equal to D, or a type equal to D that
+  // is primitive convertible using PrimitiveConversions.
+  template<typename T, typename D>
+  inline static D xchg(T exchange_value, volatile D* dest);
+
+  inline static intptr_t xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
+    return xchg(exchange_value, dest);
+  }
+
+  inline static void*    xchg_ptr(void*    exchange_value, volatile void*     dest) {
+    return xchg(exchange_value, reinterpret_cast<void* volatile*>(dest));
+  }
 
   // Performs atomic compare of *dest and compare_value, and exchanges
   // *dest with exchange_value if the comparison succeeded. Returns prior
@@ -165,6 +175,59 @@
   // that is needed here.
   template<typename From, typename To> struct IsPointerConvertible;
 
+protected:
+  // Dispatch handler for store.  Provides type-based validity
+  // checking and limited conversions around calls to the platform-
+  // specific implementation layer provided by PlatformOp.
+  template<typename T, typename D, typename PlatformOp, typename Enable = void>
+  struct StoreImpl;
+
+  // Platform-specific implementation of store.  Support for sizes
+  // of 1, 2, 4, and (if different) pointer size bytes are required.
+  // The class is a function object that must be default constructable,
+  // with these requirements:
+  //
+  // either:
+  // - dest is of type D*, an integral, enum or pointer type.
+  // - new_value are of type T, an integral, enum or pointer type D or
+  //   pointer type convertible to D.
+  // or:
+  // - T and D are the same and are primitive convertible using PrimitiveConversions
+  // and either way:
+  // - platform_store is an object of type PlatformStore<sizeof(T)>.
+  //
+  // Then
+  //   platform_store(new_value, dest)
+  // must be a valid expression.
+  //
+  // The default implementation is a volatile store. If a platform
+  // requires more for e.g. 64 bit stores, a specialization is required
+  template<size_t byte_size> struct PlatformStore;
+
+  // Dispatch handler for load.  Provides type-based validity
+  // checking and limited conversions around calls to the platform-
+  // specific implementation layer provided by PlatformOp.
+  template<typename T, typename PlatformOp, typename Enable = void>
+  struct LoadImpl;
+
+  // Platform-specific implementation of load. Support for sizes of
+  // 1, 2, 4 bytes and (if different) pointer size bytes are required.
+  // The class is a function object that must be default
+  // constructable, with these requirements:
+  //
+  // - dest is of type T*, an integral, enum or pointer type, or
+  //   T is convertible to a primitive type using PrimitiveConversions
+  // - platform_load is an object of type PlatformLoad<sizeof(T)>.
+  //
+  // Then
+  //   platform_load(src)
+  // must be a valid expression, returning a result convertible to T.
+  //
+  // The default implementation is a volatile load. If a platform
+  // requires more for e.g. 64 bit loads, a specialization is required
+  template<size_t byte_size> struct PlatformLoad;
+
+private:
   // Dispatch handler for add.  Provides type-based validity checking
   // and limited conversions around calls to the platform-specific
   // implementation layer provided by PlatformAdd.
@@ -280,6 +343,45 @@
 public: // Temporary, can't be private: C++03 11.4/2. Fixed by C++11.
   struct CmpxchgByteUsingInt;
 private:
+
+  // Dispatch handler for xchg.  Provides type-based validity
+  // checking and limited conversions around calls to the
+  // platform-specific implementation layer provided by
+  // PlatformXchg.
+  template<typename T, typename D, typename Enable = void>
+  struct XchgImpl;
+
+  // Platform-specific implementation of xchg.  Support for sizes
+  // of 4, and sizeof(intptr_t) are required.  The class is a function
+  // object that must be default constructable, with these requirements:
+  //
+  // - dest is of type T*.
+  // - exchange_value is of type T.
+  // - platform_xchg is an object of type PlatformXchg<sizeof(T)>.
+  //
+  // Then
+  //   platform_xchg(exchange_value, dest)
+  // must be a valid expression, returning a result convertible to T.
+  //
+  // A default definition is provided, which declares a function template
+  //   T operator()(T, T volatile*, T, cmpxchg_memory_order) const
+  //
+  // For each required size, a platform must either provide an
+  // appropriate definition of that function, or must entirely
+  // specialize the class template for that size.
+  template<size_t byte_size> struct PlatformXchg;
+
+  // Support for platforms that implement some variants of xchg
+  // using a (typically out of line) non-template helper function.
+  // The generic arguments passed to PlatformXchg need to be
+  // translated to the appropriate type for the helper function, the
+  // helper invoked on the translated arguments, and the result
+  // translated back.  Type is the parameter / return type of the
+  // helper function.
+  template<typename Type, typename Fn, typename T>
+  static T xchg_using_helper(Fn fn,
+                             T exchange_value,
+                             T volatile* dest);
 };
 
 template<typename From, typename To>
@@ -296,6 +398,131 @@
   static const bool value = (sizeof(yes) == sizeof(test(test_value)));
 };
 
+// Handle load for pointer, integral and enum types.
+template<typename T, typename PlatformOp>
+struct Atomic::LoadImpl<
+  T,
+  PlatformOp,
+  typename EnableIf<IsIntegral<T>::value || IsRegisteredEnum<T>::value || IsPointer<T>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T operator()(T const volatile* dest) const {
+    // Forward to the platform handler for the size of T.
+    return PlatformOp()(dest);
+  }
+};
+
+// Handle load for types that have a translator.
+//
+// All the involved types must be identical.
+//
+// This translates the original call into a call on the decayed
+// arguments, and returns the recovered result of that translated
+// call.
+template<typename T, typename PlatformOp>
+struct Atomic::LoadImpl<
+  T,
+  PlatformOp,
+  typename EnableIf<PrimitiveConversions::Translate<T>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T operator()(T const volatile* dest) const {
+    typedef PrimitiveConversions::Translate<T> Translator;
+    typedef typename Translator::Decayed Decayed;
+    STATIC_ASSERT(sizeof(T) == sizeof(Decayed));
+    Decayed result = PlatformOp()(reinterpret_cast<Decayed const volatile*>(dest));
+    return Translator::recover(result);
+  }
+};
+
+// Default implementation of atomic load if a specific platform
+// does not provide a specialization for a certain size class.
+// For increased safety, the default implementation only allows
+// load types that are pointer sized or smaller. If a platform still
+// supports wide atomics, then it has to use specialization
+// of Atomic::PlatformLoad for that wider size class.
+template<size_t byte_size>
+struct Atomic::PlatformLoad VALUE_OBJ_CLASS_SPEC {
+  template<typename T>
+  T operator()(T const volatile* dest) const {
+    STATIC_ASSERT(sizeof(T) <= sizeof(void*)); // wide atomics need specialization
+    return *dest;
+  }
+};
+
+// Handle store for integral and enum types.
+//
+// All the involved types must be identical.
+template<typename T, typename PlatformOp>
+struct Atomic::StoreImpl<
+  T, T,
+  PlatformOp,
+  typename EnableIf<IsIntegral<T>::value || IsRegisteredEnum<T>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  void operator()(T new_value, T volatile* dest) const {
+    // Forward to the platform handler for the size of T.
+    PlatformOp()(new_value, dest);
+  }
+};
+
+// Handle store for pointer types.
+//
+// The new_value must be implicitly convertible to the
+// destination's type; it must be type-correct to store the
+// new_value in the destination.
+template<typename T, typename D, typename PlatformOp>
+struct Atomic::StoreImpl<
+  T*, D*,
+  PlatformOp,
+  typename EnableIf<Atomic::IsPointerConvertible<T*, D*>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  void operator()(T* new_value, D* volatile* dest) const {
+    // Allow derived to base conversion, and adding cv-qualifiers.
+    D* value = new_value;
+    PlatformOp()(value, dest);
+  }
+};
+
+// Handle store for types that have a translator.
+//
+// All the involved types must be identical.
+//
+// This translates the original call into a call on the decayed
+// arguments.
+template<typename T, typename PlatformOp>
+struct Atomic::StoreImpl<
+  T, T,
+  PlatformOp,
+  typename EnableIf<PrimitiveConversions::Translate<T>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  void operator()(T new_value, T volatile* dest) const {
+    typedef PrimitiveConversions::Translate<T> Translator;
+    typedef typename Translator::Decayed Decayed;
+    STATIC_ASSERT(sizeof(T) == sizeof(Decayed));
+    PlatformOp()(Translator::decay(new_value),
+                 reinterpret_cast<Decayed volatile*>(dest));
+  }
+};
+
+// Default implementation of atomic store if a specific platform
+// does not provide a specialization for a certain size class.
+// For increased safety, the default implementation only allows
+// storing types that are pointer sized or smaller. If a platform still
+// supports wide atomics, then it has to use specialization
+// of Atomic::PlatformStore for that wider size class.
+template<size_t byte_size>
+struct Atomic::PlatformStore VALUE_OBJ_CLASS_SPEC {
+  template<typename T>
+  void operator()(T new_value,
+                  T volatile* dest) const {
+    STATIC_ASSERT(sizeof(T) <= sizeof(void*)); // wide atomics need specialization
+    (void)const_cast<T&>(*dest = new_value);
+  }
+};
+
 // Define FetchAndAdd and AddAndFetch helper classes before including
 // platform file, which may use these as base classes, requiring they
 // be complete.
@@ -312,6 +539,22 @@
   D operator()(I add_value, D volatile* dest) const;
 };
 
+template<typename D>
+inline void Atomic::inc(D volatile* dest) {
+  STATIC_ASSERT(IsPointer<D>::value || IsIntegral<D>::value);
+  typedef typename Conditional<IsPointer<D>::value, ptrdiff_t, D>::type I;
+  Atomic::add(I(1), dest);
+}
+
+template<typename D>
+inline void Atomic::dec(D volatile* dest) {
+  STATIC_ASSERT(IsPointer<D>::value || IsIntegral<D>::value);
+  typedef typename Conditional<IsPointer<D>::value, ptrdiff_t, D>::type I;
+  // Assumes two's complement integer representation.
+  #pragma warning(suppress: 4146)
+  Atomic::add(I(-1), dest);
+}
+
 // Define the class before including platform file, which may specialize
 // the operator definition.  No generic definition of specializations
 // of the operator template are provided, nor are there any generic
@@ -337,6 +580,18 @@
                cmpxchg_memory_order order) const;
 };
 
+// Define the class before including platform file, which may specialize
+// the operator definition.  No generic definition of specializations
+// of the operator template are provided, nor are there any generic
+// specializations of the class.  The platform file is responsible for
+// providing those.
+template<size_t byte_size>
+struct Atomic::PlatformXchg VALUE_OBJ_CLASS_SPEC {
+  template<typename T>
+  T operator()(T exchange_value,
+               T volatile* dest) const;
+};
+
 // platform specific in-line definitions - must come before shared definitions
 
 #include OS_CPU_HEADER(atomic)
@@ -348,6 +603,16 @@
 #error size_t is not WORD_SIZE, interesting platform, but missing implementation here
 #endif
 
+template<typename T>
+inline T Atomic::load(const volatile T* dest) {
+  return LoadImpl<T, PlatformLoad<sizeof(T)> >()(dest);
+}
+
+template<typename T, typename D>
+inline void Atomic::store(T store_value, volatile D* dest) {
+  StoreImpl<T, D, PlatformStore<sizeof(D)> >()(store_value, dest);
+}
+
 template<typename I, typename D>
 inline D Atomic::add(I add_value, D volatile* dest) {
   return AddImpl<I, D>()(add_value, dest);
@@ -437,14 +702,6 @@
        reinterpret_cast<Type volatile*>(dest)));
 }
 
-inline void Atomic::inc(volatile size_t* dest) {
-  inc_ptr((volatile intptr_t*) dest);
-}
-
-inline void Atomic::dec(volatile size_t* dest) {
-  dec_ptr((volatile intptr_t*) dest);
-}
-
 template<typename T, typename D, typename U>
 inline D Atomic::cmpxchg(T exchange_value,
                          D volatile* dest,
@@ -586,17 +843,75 @@
   return PrimitiveConversions::cast<T>(cur_as_bytes[offset]);
 }
 
-inline unsigned Atomic::xchg(unsigned int exchange_value, volatile unsigned int* dest) {
-  assert(sizeof(unsigned int) == sizeof(jint), "more work to do");
-  return (unsigned int)Atomic::xchg((jint)exchange_value, (volatile jint*)dest);
+// Handle xchg for integral and enum types.
+//
+// All the involved types must be identical.
+template<typename T>
+struct Atomic::XchgImpl<
+  T, T,
+  typename EnableIf<IsIntegral<T>::value || IsRegisteredEnum<T>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T operator()(T exchange_value, T volatile* dest) const {
+    // Forward to the platform handler for the size of T.
+    return PlatformXchg<sizeof(T)>()(exchange_value, dest);
+  }
+};
+
+// Handle xchg for pointer types.
+//
+// The exchange_value must be implicitly convertible to the
+// destination's type; it must be type-correct to store the
+// exchange_value in the destination.
+template<typename T, typename D>
+struct Atomic::XchgImpl<
+  T*, D*,
+  typename EnableIf<Atomic::IsPointerConvertible<T*, D*>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  D* operator()(T* exchange_value, D* volatile* dest) const {
+    // Allow derived to base conversion, and adding cv-qualifiers.
+    D* new_value = exchange_value;
+    return PlatformXchg<sizeof(D*)>()(new_value, dest);
+  }
+};
+
+// Handle xchg for types that have a translator.
+//
+// All the involved types must be identical.
+//
+// This translates the original call into a call on the decayed
+// arguments, and returns the recovered result of that translated
+// call.
+template<typename T>
+struct Atomic::XchgImpl<
+  T, T,
+  typename EnableIf<PrimitiveConversions::Translate<T>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T operator()(T exchange_value, T volatile* dest) const {
+    typedef PrimitiveConversions::Translate<T> Translator;
+    typedef typename Translator::Decayed Decayed;
+    STATIC_ASSERT(sizeof(T) == sizeof(Decayed));
+    return Translator::recover(
+      xchg(Translator::decay(exchange_value),
+           reinterpret_cast<Decayed volatile*>(dest)));
+  }
+};
+
+template<typename Type, typename Fn, typename T>
+inline T Atomic::xchg_using_helper(Fn fn,
+                                   T exchange_value,
+                                   T volatile* dest) {
+  STATIC_ASSERT(sizeof(Type) == sizeof(T));
+  return PrimitiveConversions::cast<T>(
+    fn(PrimitiveConversions::cast<Type>(exchange_value),
+       reinterpret_cast<Type volatile*>(dest)));
 }
 
-inline void Atomic::inc(volatile jshort* dest) {
-  (void)add(jshort(1), dest);
-}
-
-inline void Atomic::dec(volatile jshort* dest) {
-  (void)add(jshort(-1), dest);
+template<typename T, typename D>
+inline D Atomic::xchg(T exchange_value, volatile D* dest) {
+  return XchgImpl<T, D>()(exchange_value, dest);
 }
 
 #endif // SHARE_VM_RUNTIME_ATOMIC_HPP

--- a/src/hotspot/share/runtime/globals.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/runtime/globals.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -592,8 +592,8 @@
           range(8, 256)                                                     \
           constraint(ObjectAlignmentInBytesConstraintFunc,AtParse)          \
                                                                             \
-  product(bool, AssumeMP, false,                                            \
-          "Instruct the VM to assume multiple processors are available")    \
+  product(bool, AssumeMP, true,                                             \
+          "(Deprecated) Instruct the VM to assume multiple processors are available")\
                                                                             \
   /* UseMembar is theoretically a temp flag used for memory barrier      */ \
   /* removal testing.  It was supposed to be removed before FCS but has  */ \
@@ -2344,12 +2344,6 @@
           range(30*K, max_uintx/BytesPerWord)                               \
           constraint(InitialBootClassLoaderMetaspaceSizeConstraintFunc, AfterErgo)\
                                                                             \
-  product(bool, TraceYoungGenTime, false,                                   \
-          "Trace accumulated time for young collection")                    \
-                                                                            \
-  product(bool, TraceOldGenTime, false,                                     \
-          "Trace accumulated time for old collection")                      \
-                                                                            \
   product(bool, PrintHeapAtSIGBREAK, true,                                  \
           "Print heap layout in response to SIGBREAK")                      \
                                                                             \

--- a/src/hotspot/share/runtime/mutex.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/runtime/mutex.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -526,7 +526,7 @@
   // Note that the OrderAccess::storeload() fence that appears after unlock store
   // provides for progress conditions and succession and is _not related to exclusion
   // safety or lock release consistency.
-  OrderAccess::release_store(&_LockWord.Bytes[_LSBINDEX], 0); // drop outer lock
+  OrderAccess::release_store(&_LockWord.Bytes[_LSBINDEX], jbyte(0)); // drop outer lock
 
   OrderAccess::storeload();
   ParkEvent * const w = _OnDeck; // raw load as we will just return if non-NULL

--- a/src/hotspot/share/runtime/orderAccess.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/runtime/orderAccess.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -26,6 +26,7 @@
 #define SHARE_VM_RUNTIME_ORDERACCESS_HPP
 
 #include "memory/allocation.hpp"
+#include "runtime/atomic.hpp"
 
 //                Memory Access Ordering Model
 //
@@ -252,7 +253,7 @@
   void postfix() { ScopedFenceGeneral<T>::postfix(); }
 };
 
-class OrderAccess : AllStatic {
+class OrderAccess : private Atomic {
  public:
   // barriers
   static void     loadload();
@@ -264,44 +265,20 @@
   static void     release();
   static void     fence();
 
-  static jbyte    load_acquire(const volatile jbyte*   p);
-  static jshort   load_acquire(const volatile jshort*  p);
-  static jint     load_acquire(const volatile jint*    p);
-  static jlong    load_acquire(const volatile jlong*   p);
-  static jubyte   load_acquire(const volatile jubyte*  p);
-  static jushort  load_acquire(const volatile jushort* p);
-  static juint    load_acquire(const volatile juint*   p);
-  static julong   load_acquire(const volatile julong*  p);
-  static jfloat   load_acquire(const volatile jfloat*  p);
-  static jdouble  load_acquire(const volatile jdouble* p);
+  template <typename T>
+  static T        load_acquire(const volatile T* p);
 
   static intptr_t load_ptr_acquire(const volatile intptr_t* p);
   static void*    load_ptr_acquire(const volatile void*     p);
 
-  static void     release_store(volatile jbyte*   p, jbyte   v);
-  static void     release_store(volatile jshort*  p, jshort  v);
-  static void     release_store(volatile jint*    p, jint    v);
-  static void     release_store(volatile jlong*   p, jlong   v);
-  static void     release_store(volatile jubyte*  p, jubyte  v);
-  static void     release_store(volatile jushort* p, jushort v);
-  static void     release_store(volatile juint*   p, juint   v);
-  static void     release_store(volatile julong*  p, julong  v);
-  static void     release_store(volatile jfloat*  p, jfloat  v);
-  static void     release_store(volatile jdouble* p, jdouble v);
+  template <typename T, typename D>
+  static void     release_store(volatile D* p, T v);
 
   static void     release_store_ptr(volatile intptr_t* p, intptr_t v);
   static void     release_store_ptr(volatile void*     p, void*    v);
 
-  static void     release_store_fence(volatile jbyte*   p, jbyte   v);
-  static void     release_store_fence(volatile jshort*  p, jshort  v);
-  static void     release_store_fence(volatile jint*    p, jint    v);
-  static void     release_store_fence(volatile jlong*   p, jlong   v);
-  static void     release_store_fence(volatile jubyte*  p, jubyte  v);
-  static void     release_store_fence(volatile jushort* p, jushort v);
-  static void     release_store_fence(volatile juint*   p, juint   v);
-  static void     release_store_fence(volatile julong*  p, julong  v);
-  static void     release_store_fence(volatile jfloat*  p, jfloat  v);
-  static void     release_store_fence(volatile jdouble* p, jdouble v);
+  template <typename T, typename D>
+  static void     release_store_fence(volatile D* p, T v);
 
   static void     release_store_ptr_fence(volatile intptr_t* p, intptr_t v);
   static void     release_store_ptr_fence(volatile void*     p, void*    v);
@@ -313,45 +290,34 @@
   static void StubRoutines_fence();
 
   // Give platforms a variation point to specialize.
-  template<typename T> static T    specialized_load_acquire       (const volatile T* p);
-  template<typename T> static void specialized_release_store      (volatile T* p, T v);
-  template<typename T> static void specialized_release_store_fence(volatile T* p, T v);
+  template<size_t byte_size, ScopedFenceType type> struct PlatformOrderedStore;
+  template<size_t byte_size, ScopedFenceType type> struct PlatformOrderedLoad;
 
   template<typename FieldType, ScopedFenceType FenceType>
   static void ordered_store(volatile FieldType* p, FieldType v);
 
   template<typename FieldType, ScopedFenceType FenceType>
   static FieldType ordered_load(const volatile FieldType* p);
+};
 
-  static void    store(volatile jbyte*   p, jbyte   v);
-  static void    store(volatile jshort*  p, jshort  v);
-  static void    store(volatile jint*    p, jint    v);
-  static void    store(volatile jlong*   p, jlong   v);
-  static void    store(volatile jdouble* p, jdouble v);
-  static void    store(volatile jfloat*  p, jfloat  v);
-
-  static jbyte   load(const volatile jbyte*   p);
-  static jshort  load(const volatile jshort*  p);
-  static jint    load(const volatile jint*    p);
-  static jlong   load(const volatile jlong*   p);
-  static jdouble load(const volatile jdouble* p);
-  static jfloat  load(const volatile jfloat*  p);
+// The following methods can be specialized using simple template specialization
+// in the platform specific files for optimization purposes. Otherwise the
+// generalized variant is used.
 
-  // The following store_fence methods are deprecated and will be removed
-  // when all repos conform to the new generalized OrderAccess.
-  static void    store_fence(jbyte*   p, jbyte   v);
-  static void    store_fence(jshort*  p, jshort  v);
-  static void    store_fence(jint*    p, jint    v);
-  static void    store_fence(jlong*   p, jlong   v);
-  static void    store_fence(jubyte*  p, jubyte  v);
-  static void    store_fence(jushort* p, jushort v);
-  static void    store_fence(juint*   p, juint   v);
-  static void    store_fence(julong*  p, julong  v);
-  static void    store_fence(jfloat*  p, jfloat  v);
-  static void    store_fence(jdouble* p, jdouble v);
+template<size_t byte_size, ScopedFenceType type>
+struct OrderAccess::PlatformOrderedStore VALUE_OBJ_CLASS_SPEC {
+  template <typename T>
+  void operator()(T v, volatile T* p) const {
+    ordered_store<T, type>(p, v);
+  }
+};
 
-  static void    store_ptr_fence(intptr_t* p, intptr_t v);
-  static void    store_ptr_fence(void**    p, void*    v);
+template<size_t byte_size, ScopedFenceType type>
+struct OrderAccess::PlatformOrderedLoad VALUE_OBJ_CLASS_SPEC {
+  template <typename T>
+  T operator()(const volatile T* p) const {
+    return ordered_load<T, type>(p);
+  }
 };
 
 #endif // SHARE_VM_RUNTIME_ORDERACCESS_HPP

--- a/src/hotspot/share/runtime/orderAccess.inline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/runtime/orderAccess.inline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -26,14 +26,11 @@
 #ifndef SHARE_VM_RUNTIME_ORDERACCESS_INLINE_HPP
 #define SHARE_VM_RUNTIME_ORDERACCESS_INLINE_HPP
 
-#include "runtime/atomic.hpp"
 #include "runtime/orderAccess.hpp"
 #include "utilities/macros.hpp"
 
 #include OS_CPU_HEADER_INLINE(orderAccess)
 
-#ifdef VM_HAS_GENERALIZED_ORDER_ACCESS
-
 template<> inline void ScopedFenceGeneral<X_ACQUIRE>::postfix()       { OrderAccess::acquire(); }
 template<> inline void ScopedFenceGeneral<RELEASE_X>::prefix()        { OrderAccess::release(); }
 template<> inline void ScopedFenceGeneral<RELEASE_X_FENCE>::prefix()  { OrderAccess::release(); }
@@ -43,80 +40,42 @@
 template <typename FieldType, ScopedFenceType FenceType>
 inline void OrderAccess::ordered_store(volatile FieldType* p, FieldType v) {
   ScopedFence<FenceType> f((void*)p);
-  store(p, v);
+  Atomic::store(v, p);
 }
 
 template <typename FieldType, ScopedFenceType FenceType>
 inline FieldType OrderAccess::ordered_load(const volatile FieldType* p) {
   ScopedFence<FenceType> f((void*)p);
-  return load(p);
+  return Atomic::load(p);
+}
+
+template <typename T>
+inline T OrderAccess::load_acquire(const volatile T* p) {
+  return LoadImpl<T, PlatformOrderedLoad<sizeof(T), X_ACQUIRE> >()(p);
+}
+
+inline intptr_t OrderAccess::load_ptr_acquire(const volatile intptr_t*   p) {
+  return load_acquire(p);
+}
+
+inline void*    OrderAccess::load_ptr_acquire(const volatile void*       p) {
+  return load_acquire(static_cast<void* const volatile *>(p));
 }
 
-inline jbyte    OrderAccess::load_acquire(const volatile jbyte*   p) { return specialized_load_acquire(p); }
-inline jshort   OrderAccess::load_acquire(const volatile jshort*  p) { return specialized_load_acquire(p); }
-inline jint     OrderAccess::load_acquire(const volatile jint*    p) { return specialized_load_acquire(p); }
-inline jlong    OrderAccess::load_acquire(const volatile jlong*   p) { return specialized_load_acquire(p); }
-inline jfloat   OrderAccess::load_acquire(const volatile jfloat*  p) { return specialized_load_acquire(p); }
-inline jdouble  OrderAccess::load_acquire(const volatile jdouble* p) { return specialized_load_acquire(p); }
-inline jubyte   OrderAccess::load_acquire(const volatile jubyte*  p) { return (jubyte) specialized_load_acquire((const volatile jbyte*)p);  }
-inline jushort  OrderAccess::load_acquire(const volatile jushort* p) { return (jushort)specialized_load_acquire((const volatile jshort*)p); }
-inline juint    OrderAccess::load_acquire(const volatile juint*   p) { return (juint)  specialized_load_acquire((const volatile jint*)p);   }
-inline julong   OrderAccess::load_acquire(const volatile julong*  p) { return (julong) specialized_load_acquire((const volatile jlong*)p);  }
-
-inline intptr_t OrderAccess::load_ptr_acquire(const volatile intptr_t*   p) { return (intptr_t)specialized_load_acquire(p); }
-inline void*    OrderAccess::load_ptr_acquire(const volatile void*       p) { return (void*)specialized_load_acquire((const volatile intptr_t*)p); }
+template <typename T, typename D>
+inline void OrderAccess::release_store(volatile D* p, T v) {
+  StoreImpl<T, D, PlatformOrderedStore<sizeof(D), RELEASE_X> >()(v, p);
+}
 
-inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) { specialized_release_store(p, v); }
-inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) { specialized_release_store(p, v); }
-inline void     OrderAccess::release_store(volatile jint*    p, jint    v) { specialized_release_store(p, v); }
-inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v) { specialized_release_store(p, v); }
-inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v) { specialized_release_store(p, v); }
-inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v) { specialized_release_store(p, v); }
-inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v) { specialized_release_store((volatile jbyte*) p, (jbyte) v); }
-inline void     OrderAccess::release_store(volatile jushort* p, jushort v) { specialized_release_store((volatile jshort*)p, (jshort)v); }
-inline void     OrderAccess::release_store(volatile juint*   p, juint   v) { specialized_release_store((volatile jint*)  p, (jint)  v); }
-inline void     OrderAccess::release_store(volatile julong*  p, julong  v) { specialized_release_store((volatile jlong*) p, (jlong) v); }
-
-inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { specialized_release_store(p, v); }
-inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) { specialized_release_store((volatile intptr_t*)p, (intptr_t)v); }
+inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release_store(p, v); }
+inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) { release_store(static_cast<void* volatile*>(p), v); }
 
-inline void     OrderAccess::release_store_fence(volatile jbyte*   p, jbyte   v) { specialized_release_store_fence(p, v); }
-inline void     OrderAccess::release_store_fence(volatile jshort*  p, jshort  v) { specialized_release_store_fence(p, v); }
-inline void     OrderAccess::release_store_fence(volatile jint*    p, jint    v) { specialized_release_store_fence(p, v); }
-inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) { specialized_release_store_fence(p, v); }
-inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { specialized_release_store_fence(p, v); }
-inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { specialized_release_store_fence(p, v); }
-inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { specialized_release_store_fence((volatile jbyte*) p, (jbyte) v); }
-inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { specialized_release_store_fence((volatile jshort*)p, (jshort)v); }
-inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { specialized_release_store_fence((volatile jint*)  p, (jint)  v); }
-inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { specialized_release_store_fence((volatile jlong*) p, (jlong) v); }
-
-inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { specialized_release_store_fence(p, v); }
-inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { specialized_release_store_fence((volatile intptr_t*)p, (intptr_t)v); }
+template <typename T, typename D>
+inline void OrderAccess::release_store_fence(volatile D* p, T v) {
+  StoreImpl<T, D, PlatformOrderedStore<sizeof(D), RELEASE_X_FENCE> >()(v, p);
+}
 
-// The following methods can be specialized using simple template specialization
-// in the platform specific files for optimization purposes. Otherwise the
-// generalized variant is used.
-template<typename T> inline T    OrderAccess::specialized_load_acquire       (const volatile T* p)       { return ordered_load<T, X_ACQUIRE>(p);    }
-template<typename T> inline void OrderAccess::specialized_release_store      (volatile T* p, T v)  { ordered_store<T, RELEASE_X>(p, v);       }
-template<typename T> inline void OrderAccess::specialized_release_store_fence(volatile T* p, T v)  { ordered_store<T, RELEASE_X_FENCE>(p, v); }
-
-// Generalized atomic volatile accesses valid in OrderAccess
-// All other types can be expressed in terms of these.
-inline void OrderAccess::store(volatile jbyte*   p, jbyte   v) { *p = v; }
-inline void OrderAccess::store(volatile jshort*  p, jshort  v) { *p = v; }
-inline void OrderAccess::store(volatile jint*    p, jint    v) { *p = v; }
-inline void OrderAccess::store(volatile jlong*   p, jlong   v) { Atomic::store(v, p); }
-inline void OrderAccess::store(volatile jdouble* p, jdouble v) { Atomic::store(jlong_cast(v), (volatile jlong*)p); }
-inline void OrderAccess::store(volatile jfloat*  p, jfloat  v) { *p = v; }
-
-inline jbyte   OrderAccess::load(const volatile jbyte*   p) { return *p; }
-inline jshort  OrderAccess::load(const volatile jshort*  p) { return *p; }
-inline jint    OrderAccess::load(const volatile jint*    p) { return *p; }
-inline jlong   OrderAccess::load(const volatile jlong*   p) { return Atomic::load(p); }
-inline jdouble OrderAccess::load(const volatile jdouble* p) { return jdouble_cast(Atomic::load((const volatile jlong*)p)); }
-inline jfloat  OrderAccess::load(const volatile jfloat*  p) { return *p; }
-
-#endif // VM_HAS_GENERALIZED_ORDER_ACCESS
+inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_fence(p, v); }
+inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { release_store_fence(static_cast<void* volatile*>(p), v); }
 
 #endif // SHARE_VM_RUNTIME_ORDERACCESS_INLINE_HPP

--- a/src/hotspot/share/runtime/os.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/runtime/os.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -213,7 +213,7 @@
     // the bootstrap routine for the stub generator needs to check
     // the processor count directly and leave the bootstrap routine
     // in place until called after initialization has ocurred.
-    return (_processor_count != 1) || AssumeMP;
+    return AssumeMP || (_processor_count != 1);
   }
   static julong available_memory();
   static julong physical_memory();

--- a/src/hotspot/share/runtime/thread.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/runtime/thread.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -3263,6 +3263,9 @@
   _buffer_blob = NULL;
   _compiler = NULL;
 
+  // Compiler uses resource area for compilation, let's bias it to mtCompiler
+  resource_area()->bias_to(mtCompiler);
+
 #ifndef PRODUCT
   _ideal_graph_printer = NULL;
 #endif

--- a/src/hotspot/share/runtime/threadCritical.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/runtime/threadCritical.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -47,11 +47,6 @@
 // or CHeapObj, due to initialization issues.
 
 class ThreadCritical : public StackObj {
- friend class os;
- private:
-  static void initialize();
-  static void release();
-
  public:
   ThreadCritical();
   ~ThreadCritical();

--- a/src/hotspot/share/runtime/vmStructs.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/runtime/vmStructs.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -277,7 +277,7 @@
   nonstatic_field(Klass,                       _secondary_super_cache,                        Klass*)                                \
   nonstatic_field(Klass,                       _secondary_supers,                             Array<Klass*>*)                        \
   nonstatic_field(Klass,                       _primary_supers[0],                            Klass*)                                \
-  nonstatic_field(Klass,                       _java_mirror,                                  oop)                                   \
+  nonstatic_field(Klass,                       _java_mirror,                                  OopHandle)                             \
   nonstatic_field(Klass,                       _modifier_flags,                               jint)                                  \
   nonstatic_field(Klass,                       _super,                                        Klass*)                                \
   nonstatic_field(Klass,                       _subklass,                                     Klass*)                                \
@@ -2726,8 +2726,12 @@
   /* JVMCI */                                                             \
   /****************/                                                      \
                                                                           \
-  declare_preprocessor_constant("INCLUDE_JVMCI", INCLUDE_JVMCI)
-
+  declare_preprocessor_constant("INCLUDE_JVMCI", INCLUDE_JVMCI)           \
+                                                                          \
+  /****************/                                                      \
+  /*  VMRegImpl   */                                                      \
+  /****************/                                                      \
+  declare_constant(VMRegImpl::stack_slot_size)
 
 //--------------------------------------------------------------------------------
 // VM_LONG_CONSTANTS
@@ -3009,7 +3013,8 @@
   VM_TYPES_PARNEW(GENERATE_VM_TYPE_ENTRY)
 
   VM_TYPES_G1(GENERATE_VM_TYPE_ENTRY,
-              GENERATE_TOPLEVEL_VM_TYPE_ENTRY)
+              GENERATE_TOPLEVEL_VM_TYPE_ENTRY,
+              GENERATE_INTEGER_VM_TYPE_ENTRY)
 #endif // INCLUDE_ALL_GCS
 
 #if INCLUDE_TRACE
@@ -3207,6 +3212,7 @@
   VM_TYPES_PARNEW(CHECK_VM_TYPE_ENTRY)
 
   VM_TYPES_G1(CHECK_VM_TYPE_ENTRY,
+              CHECK_SINGLE_ARG_VM_TYPE_NO_OP,
               CHECK_SINGLE_ARG_VM_TYPE_NO_OP);
 
 #endif // INCLUDE_ALL_GCS

--- a/src/hotspot/share/services/heapDumper.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/services/heapDumper.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -856,6 +856,29 @@
     if (fldc.access_flags().is_static()) field_count++;
   }
 
+  // Add in resolved_references which is referenced by the cpCache
+  // The resolved_references is an array per InstanceKlass holding the
+  // strings and other oops resolved from the constant pool.
+  oop resolved_references = ik->constants()->resolved_references_or_null();
+  if (resolved_references != NULL) {
+    field_count++;
+
+    // Add in the resolved_references of the used previous versions of the class
+    // in the case of RedefineClasses
+    InstanceKlass* prev = ik->previous_versions();
+    while (prev != NULL && prev->constants()->resolved_references_or_null() != NULL) {
+      field_count++;
+      prev = prev->previous_versions();
+    }
+  }
+
+  // Also provide a pointer to the init_lock if present, so there aren't unreferenced int[0]
+  // arrays.
+  oop init_lock = ik->init_lock();
+  if (init_lock != NULL) {
+    field_count++;
+  }
+
   writer->write_u2(field_count);
 
   // pass 2 - dump the field descriptors and raw values
@@ -873,6 +896,29 @@
       dump_field_value(writer, sig->byte_at(0), addr);
     }
   }
+
+  // Add resolved_references for each class that has them
+  if (resolved_references != NULL) {
+    writer->write_symbolID(vmSymbols::resolved_references_name());  // name
+    writer->write_u1(sig2tag(vmSymbols::object_array_signature())); // type
+    writer->write_objectID(resolved_references);
+
+    // Also write any previous versions
+    InstanceKlass* prev = ik->previous_versions();
+    while (prev != NULL && prev->constants()->resolved_references_or_null() != NULL) {
+      writer->write_symbolID(vmSymbols::resolved_references_name());  // name
+      writer->write_u1(sig2tag(vmSymbols::object_array_signature())); // type
+      writer->write_objectID(prev->constants()->resolved_references());
+      prev = prev->previous_versions();
+    }
+  }
+
+  // Add init lock to the end if the class is not yet initialized
+  if (init_lock != NULL) {
+    writer->write_symbolID(vmSymbols::init_lock_name());         // name
+    writer->write_u1(sig2tag(vmSymbols::int_array_signature())); // type
+    writer->write_objectID(init_lock);
+  }
 }
 
 // dump the raw values of the instance fields of the given object
@@ -908,7 +954,7 @@
     if (!fld.access_flags().is_static()) {
       Symbol* sig = fld.signature();
 
-      writer->write_symbolID(fld.name());                   // name
+      writer->write_symbolID(fld.name());   // name
       writer->write_u1(sig2tag(sig));       // type
     }
   }
@@ -1822,6 +1868,8 @@
   // HPROF_GC_ROOT_JNI_GLOBAL
   JNIGlobalsDumper jni_dumper(writer());
   JNIHandles::oops_do(&jni_dumper);
+  Universe::oops_do(&jni_dumper);  // technically not jni roots, but global roots
+                                   // for things like preallocated throwable backtraces
   check_segment_length();
 
   // HPROF_GC_ROOT_STICKY_CLASS

--- a/src/hotspot/share/services/jmm.h	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/services/jmm.h	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,7 +50,8 @@
   JMM_VERSION_1_2 = 0x20010200, // JDK 7
   JMM_VERSION_1_2_1 = 0x20010201, // JDK 7 GA
   JMM_VERSION_1_2_2 = 0x20010202,
-  JMM_VERSION     = 0x20010203
+  JMM_VERSION_2  = 0x20020000,  // JDK 10
+  JMM_VERSION     = 0x20020000
 };
 
 typedef struct {
@@ -315,7 +316,8 @@
   jobjectArray (JNICALL *DumpThreads)            (JNIEnv *env,
                                                   jlongArray ids,
                                                   jboolean lockedMonitors,
-                                                  jboolean lockedSynchronizers);
+                                                  jboolean lockedSynchronizers,
+                                                  jint maxDepth);
   void         (JNICALL *SetGCNotificationEnabled) (JNIEnv *env,
                                                     jobject mgr,
                                                     jboolean enabled);

--- a/src/hotspot/share/services/management.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/services/management.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1160,7 +1160,8 @@
 //    locked_monitors - if true, dump locked object monitors
 //    locked_synchronizers - if true, dump locked JSR-166 synchronizers
 //
-JVM_ENTRY(jobjectArray, jmm_DumpThreads(JNIEnv *env, jlongArray thread_ids, jboolean locked_monitors, jboolean locked_synchronizers))
+JVM_ENTRY(jobjectArray, jmm_DumpThreads(JNIEnv *env, jlongArray thread_ids, jboolean locked_monitors,
+                                        jboolean locked_synchronizers, jint maxDepth))
   ResourceMark rm(THREAD);
 
   // make sure the AbstractOwnableSynchronizer klass is loaded before taking thread snapshots
@@ -1181,14 +1182,14 @@
     do_thread_dump(&dump_result,
                    ids_ah,
                    num_threads,
-                   -1, /* entire stack */
+                   maxDepth, /* stack depth */
                    (locked_monitors ? true : false),      /* with locked monitors */
                    (locked_synchronizers ? true : false), /* with locked synchronizers */
                    CHECK_NULL);
   } else {
     // obtain thread dump of all threads
     VM_ThreadDump op(&dump_result,
-                     -1, /* entire stack */
+                     maxDepth, /* stack depth */
                      (locked_monitors ? true : false),     /* with locked monitors */
                      (locked_synchronizers ? true : false) /* with locked synchronizers */);
     VMThread::execute(&op);
@@ -2237,7 +2238,7 @@
 
 void* Management::get_jmm_interface(int version) {
 #if INCLUDE_MANAGEMENT
-  if (version == JMM_VERSION_1_0) {
+  if (version == JMM_VERSION) {
     return (void*) &jmm_interface;
   }
 #endif // INCLUDE_MANAGEMENT

--- a/src/hotspot/share/services/memBaseline.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/services/memBaseline.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -144,6 +144,7 @@
 bool MemBaseline::baseline_summary() {
   MallocMemorySummary::snapshot(&_malloc_memory_snapshot);
   VirtualMemorySummary::snapshot(&_virtual_memory_snapshot);
+  MetaspaceSnapshot::snapshot(_metaspace_snapshot);
   return true;
 }

--- a/src/hotspot/share/services/memBaseline.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/services/memBaseline.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -65,6 +65,7 @@
   // Summary information
   MallocMemorySnapshot   _malloc_memory_snapshot;
   VirtualMemorySnapshot  _virtual_memory_snapshot;
+  MetaspaceSnapshot      _metaspace_snapshot;
 
   size_t               _class_count;
 
@@ -103,6 +104,10 @@
     return &_virtual_memory_snapshot;
   }
 
+  MetaspaceSnapshot* metaspace_snapshot() {
+    return &_metaspace_snapshot;
+  }
+
   MallocSiteIterator malloc_sites(SortingOrder order);
   VirtualMemorySiteIterator virtual_memory_sites(SortingOrder order);

--- a/src/hotspot/share/services/memReporter.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/services/memReporter.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -175,12 +175,44 @@
       amount_in_current_scale(_malloc_snapshot->malloc_overhead()->size()) > 0) {
       out->print_cr("%27s (tracking overhead=" SIZE_FORMAT "%s)", " ",
         amount_in_current_scale(_malloc_snapshot->malloc_overhead()->size()), scale);
+    } else if (flag == mtClass) {
+      // Metadata information
+      report_metadata(Metaspace::NonClassType);
+      if (Metaspace::using_class_space()) {
+        report_metadata(Metaspace::ClassType);
+      }
     }
-
     out->print_cr(" ");
   }
 }
 
+void MemSummaryReporter::report_metadata(Metaspace::MetadataType type) const {
+  assert(type == Metaspace::NonClassType || type == Metaspace::ClassType,
+    "Invalid metadata type");
+  const char* name = (type == Metaspace::NonClassType) ?
+    "Metadata:   " : "Class space:";
+
+  outputStream* out = output();
+  const char* scale = current_scale();
+  size_t committed   = MetaspaceAux::committed_bytes(type);
+  size_t used = MetaspaceAux::used_bytes(type);
+  size_t free = (MetaspaceAux::capacity_bytes(type) - used)
+              + MetaspaceAux::free_chunks_total_bytes(type)
+              + MetaspaceAux::free_bytes(type);
+
+  assert(committed >= used + free, "Sanity");
+  size_t waste = committed - (used + free);
+
+  out->print_cr("%27s (  %s)", " ", name);
+  out->print("%27s (    ", " ");
+  print_total(MetaspaceAux::reserved_bytes(type), committed);
+  out->print_cr(")");
+  out->print_cr("%27s (    used=" SIZE_FORMAT "%s)", " ", amount_in_current_scale(used), scale);
+  out->print_cr("%27s (    free=" SIZE_FORMAT "%s)", " ", amount_in_current_scale(free), scale);
+  out->print_cr("%27s (    waste=" SIZE_FORMAT "%s =%2.2f%%)", " ", amount_in_current_scale(waste),
+    scale, ((float)waste * 100)/committed);
+}
+
 void MemDetailReporter::report_detail() {
   // Start detail report
   outputStream* out = output();
@@ -305,9 +337,13 @@
     MEMFLAGS flag = NMTUtil::index_to_flag(index);
     // thread stack is reported as part of thread category
     if (flag == mtThreadStack) continue;
-    diff_summary_of_type(flag, _early_baseline.malloc_memory(flag),
-      _early_baseline.virtual_memory(flag), _current_baseline.malloc_memory(flag),
-      _current_baseline.virtual_memory(flag));
+    diff_summary_of_type(flag,
+      _early_baseline.malloc_memory(flag),
+      _early_baseline.virtual_memory(flag),
+      _early_baseline.metaspace_snapshot(),
+      _current_baseline.malloc_memory(flag),
+      _current_baseline.virtual_memory(flag),
+      _current_baseline.metaspace_snapshot());
   }
 }
 
@@ -367,9 +403,11 @@
 }
 
 
-void MemSummaryDiffReporter::diff_summary_of_type(MEMFLAGS flag, const MallocMemory* early_malloc,
-  const VirtualMemory* early_vm, const MallocMemory* current_malloc,
-  const VirtualMemory* current_vm) const {
+void MemSummaryDiffReporter::diff_summary_of_type(MEMFLAGS flag,
+  const MallocMemory* early_malloc, const VirtualMemory* early_vm,
+  const MetaspaceSnapshot* early_ms,
+  const MallocMemory* current_malloc, const VirtualMemory* current_vm,
+  const MetaspaceSnapshot* current_ms) const {
 
   outputStream* out = output();
   const char* scale = current_scale();
@@ -486,11 +524,77 @@
         out->print(" %+ld%s", overhead_diff, scale);
       }
       out->print_cr(")");
+    } else if (flag == mtClass) {
+      assert(current_ms != NULL && early_ms != NULL, "Sanity");
+      print_metaspace_diff(current_ms, early_ms);
     }
     out->print_cr(" ");
   }
 }
 
+void MemSummaryDiffReporter::print_metaspace_diff(const MetaspaceSnapshot* current_ms,
+                                                  const MetaspaceSnapshot* early_ms) const {
+  print_metaspace_diff(Metaspace::NonClassType, current_ms, early_ms);
+  if (Metaspace::using_class_space()) {
+    print_metaspace_diff(Metaspace::ClassType, current_ms, early_ms);
+  }
+}
+
+void MemSummaryDiffReporter::print_metaspace_diff(Metaspace::MetadataType type,
+                                                  const MetaspaceSnapshot* current_ms,
+                                                  const MetaspaceSnapshot* early_ms) const {
+  const char* name = (type == Metaspace::NonClassType) ?
+    "Metadata:   " : "Class space:";
+
+  outputStream* out = output();
+  const char* scale = current_scale();
+
+  out->print_cr("%27s (  %s)", " ", name);
+  out->print("%27s (    ", " ");
+  print_virtual_memory_diff(current_ms->reserved_in_bytes(type),
+                            current_ms->committed_in_bytes(type),
+                            early_ms->reserved_in_bytes(type),
+                            early_ms->committed_in_bytes(type));
+  out->print_cr(")");
+
+  long diff_used = diff_in_current_scale(current_ms->used_in_bytes(type),
+                                         early_ms->used_in_bytes(type));
+  long diff_free = diff_in_current_scale(current_ms->free_in_bytes(type),
+                                         early_ms->free_in_bytes(type));
+
+  size_t current_waste = current_ms->committed_in_bytes(type)
+    - (current_ms->used_in_bytes(type) + current_ms->free_in_bytes(type));
+  size_t early_waste = early_ms->committed_in_bytes(type)
+    - (early_ms->used_in_bytes(type) + early_ms->free_in_bytes(type));
+  long diff_waste = diff_in_current_scale(current_waste, early_waste);
+
+  // Diff used
+  out->print("%27s (    used=" SIZE_FORMAT "%s", " ",
+    amount_in_current_scale(current_ms->used_in_bytes(type)), scale);
+  if (diff_used != 0) {
+    out->print(" %+ld%s", diff_used, scale);
+  }
+  out->print_cr(")");
+
+  // Diff free
+  out->print("%27s (    free=" SIZE_FORMAT "%s", " ",
+    amount_in_current_scale(current_ms->free_in_bytes(type)), scale);
+  if (diff_free != 0) {
+    out->print(" %+ld%s", diff_free, scale);
+  }
+  out->print_cr(")");
+
+
+  // Diff waste
+  out->print("%27s (    waste=" SIZE_FORMAT "%s =%2.2f%%", " ",
+    amount_in_current_scale(current_waste), scale,
+    ((float)current_waste * 100) / current_ms->committed_in_bytes(type));
+  if (diff_waste != 0) {
+    out->print(" %+ld%s", diff_waste, scale);
+  }
+  out->print_cr(")");
+}
+
 void MemDetailDiffReporter::report_diff() {
   MemSummaryDiffReporter::report_diff();
   diff_malloc_sites();

--- a/src/hotspot/share/services/memReporter.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/services/memReporter.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -27,6 +27,7 @@
 
 #if INCLUDE_NMT
 
+#include "memory/metaspace.hpp"
 #include "oops/instanceKlass.hpp"
 #include "services/memBaseline.hpp"
 #include "services/nmtCommon.hpp"
@@ -110,6 +111,8 @@
   // Report summary for each memory type
   void report_summary_of_type(MEMFLAGS type, MallocMemory* malloc_memory,
     VirtualMemory* virtual_memory);
+
+  void report_metadata(Metaspace::MetadataType type) const;
 };
 
 /*
@@ -170,7 +173,9 @@
   // report the comparison of each memory type
   void diff_summary_of_type(MEMFLAGS type,
     const MallocMemory* early_malloc, const VirtualMemory* early_vm,
-    const MallocMemory* current_malloc, const VirtualMemory* current_vm) const;
+    const MetaspaceSnapshot* early_ms,
+    const MallocMemory* current_malloc, const VirtualMemory* current_vm,
+    const MetaspaceSnapshot* current_ms) const;
 
  protected:
   void print_malloc_diff(size_t current_amount, size_t current_count,
@@ -179,6 +184,11 @@
     size_t early_reserved, size_t early_committed) const;
   void print_arena_diff(size_t current_amount, size_t current_count,
     size_t early_amount, size_t early_count) const;
+
+  void print_metaspace_diff(const MetaspaceSnapshot* current_ms,
+                            const MetaspaceSnapshot* early_ms) const;
+  void print_metaspace_diff(Metaspace::MetadataType type,
+    const MetaspaceSnapshot* current_ms, const MetaspaceSnapshot* early_ms) const;
 };
 
 /*

--- a/src/hotspot/share/services/threadService.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/services/threadService.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -562,6 +562,10 @@
     vframe* start_vf = _thread->last_java_vframe(&reg_map);
     int count = 0;
     for (vframe* f = start_vf; f; f = f->sender() ) {
+      if (maxDepth >= 0 && count == maxDepth) {
+        // Skip frames if more than maxDepth
+        break;
+      }
       if (f->is_java_frame()) {
         javaVFrame* jvf = javaVFrame::cast(f);
         add_stack_frame(jvf);
@@ -569,10 +573,6 @@
       } else {
         // Ignore non-Java frames
       }
-      if (maxDepth > 0 && count == maxDepth) {
-        // Skip frames if more than maxDepth
-        break;
-      }
     }
   }

--- a/src/hotspot/share/services/virtualMemoryTracker.cpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/services/virtualMemoryTracker.cpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,6 +23,7 @@
  */
 #include "precompiled.hpp"
 
+#include "memory/metaspace.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/os.hpp"
 #include "runtime/threadCritical.hpp"
@@ -492,3 +493,35 @@
 
   return true;
 }
+
+// Metaspace Support
+MetaspaceSnapshot::MetaspaceSnapshot() {
+  for (int index = (int)Metaspace::ClassType; index < (int)Metaspace::MetadataTypeCount; index ++) {
+    Metaspace::MetadataType type = (Metaspace::MetadataType)index;
+    assert_valid_metadata_type(type);
+    _reserved_in_bytes[type]  = 0;
+    _committed_in_bytes[type] = 0;
+    _used_in_bytes[type]      = 0;
+    _free_in_bytes[type]      = 0;
+  }
+}
+
+void MetaspaceSnapshot::snapshot(Metaspace::MetadataType type, MetaspaceSnapshot& mss) {
+  assert_valid_metadata_type(type);
+
+  mss._reserved_in_bytes[type]   = MetaspaceAux::reserved_bytes(type);
+  mss._committed_in_bytes[type]  = MetaspaceAux::committed_bytes(type);
+  mss._used_in_bytes[type]       = MetaspaceAux::used_bytes(type);
+
+  size_t free_in_bytes = (MetaspaceAux::capacity_bytes(type) - MetaspaceAux::used_bytes(type))
+                       + MetaspaceAux::free_chunks_total_bytes(type)
+                       + MetaspaceAux::free_bytes(type);
+  mss._free_in_bytes[type] = free_in_bytes;
+}
+
+void MetaspaceSnapshot::snapshot(MetaspaceSnapshot& mss) {
+  snapshot(Metaspace::ClassType, mss);
+  if (Metaspace::using_class_space()) {
+    snapshot(Metaspace::NonClassType, mss);
+  }
+}

--- a/src/hotspot/share/services/virtualMemoryTracker.hpp	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/hotspot/share/services/virtualMemoryTracker.hpp	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #if INCLUDE_NMT
 
 #include "memory/allocation.hpp"
+#include "memory/metaspace.hpp"
 #include "services/allocationSite.hpp"
 #include "services/nmtCommon.hpp"
 #include "utilities/linkedlist.hpp"
@@ -419,6 +420,31 @@
 };
 
 
+class MetaspaceSnapshot : public ResourceObj {
+private:
+  size_t  _reserved_in_bytes[Metaspace::MetadataTypeCount];
+  size_t  _committed_in_bytes[Metaspace::MetadataTypeCount];
+  size_t  _used_in_bytes[Metaspace::MetadataTypeCount];
+  size_t  _free_in_bytes[Metaspace::MetadataTypeCount];
+
+public:
+  MetaspaceSnapshot();
+  size_t reserved_in_bytes(Metaspace::MetadataType type)   const { assert_valid_metadata_type(type); return _reserved_in_bytes[type]; }
+  size_t committed_in_bytes(Metaspace::MetadataType type)  const { assert_valid_metadata_type(type); return _committed_in_bytes[type]; }
+  size_t used_in_bytes(Metaspace::MetadataType type)       const { assert_valid_metadata_type(type); return _used_in_bytes[type]; }
+  size_t free_in_bytes(Metaspace::MetadataType type)       const { assert_valid_metadata_type(type); return _free_in_bytes[type]; }
+
+  static void snapshot(MetaspaceSnapshot& s);
+
+private:
+  static void snapshot(Metaspace::MetadataType type, MetaspaceSnapshot& s);
+
+  static void assert_valid_metadata_type(Metaspace::MetadataType type) {
+    assert(type == Metaspace::ClassType || type == Metaspace::NonClassType,
+      "Invalid metadata type");
+  }
+};
+
 #endif // INCLUDE_NMT
 
 #endif // SHARE_VM_SERVICES_VIRTUAL_MEMORY_TRACKER_HPP

--- a/src/java.base/share/classes/java/lang/Math.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/java.base/share/classes/java/lang/Math.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1094,6 +1094,7 @@
      * @return the result
      * @since 9
      */
+    @HotSpotIntrinsicCandidate
     public static long multiplyHigh(long x, long y) {
         if (x < 0 || y < 0) {
             // Use technique from section 8-2 of Henry S. Warren, Jr.,

--- a/src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat	Thu Oct 05 18:29:47 2017 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-BOOT
-@@BOOT_MODULE_NAMES@@
-PLATFORM
-@@PLATFORM_MODULE_NAMES@@

--- a/src/java.base/share/lib/security/default.policy	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/java.base/share/lib/security/default.policy	Tue Oct 10 16:29:04 2017 +0200
@@ -154,6 +154,10 @@
     permission java.security.AllPermission;
 };
 
+grant codeBase "jrt:/jdk.internal.vm.compiler.management" {
+    permission java.security.AllPermission;
+};
+
 grant codeBase "jrt:/jdk.jsobject" {
     permission java.security.AllPermission;
 };

--- a/src/java.management/share/classes/java/lang/management/ThreadMXBean.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/java.management/share/classes/java/lang/management/ThreadMXBean.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -687,52 +687,13 @@
 
     /**
      * Returns the thread info for each thread
-     * whose ID is in the input array {@code ids}, with stack trace
-     * and synchronization information.
-     *
-     * <p>
-     * This method obtains a snapshot of the thread information
-     * for each thread including:
-     * <ul>
-     *    <li>the entire stack trace,</li>
-     *    <li>the object monitors currently locked by the thread
-     *        if {@code lockedMonitors} is {@code true}, and</li>
-     *    <li>the <a href="LockInfo.html#OwnableSynchronizer">
-     *        ownable synchronizers</a> currently locked by the thread
-     *        if {@code lockedSynchronizers} is {@code true}.</li>
-     * </ul>
-     * <p>
-     * This method returns an array of the {@code ThreadInfo} objects,
-     * each is the thread information about the thread with the same index
-     * as in the {@code ids} array.
-     * If a thread of the given ID is not alive or does not exist,
-     * {@code null} will be set in the corresponding element
-     * in the returned array.  A thread is alive if
-     * it has been started and has not yet died.
-     * <p>
-     * If a thread does not lock any object monitor or {@code lockedMonitors}
-     * is {@code false}, the returned {@code ThreadInfo} object will have an
-     * empty {@code MonitorInfo} array.  Similarly, if a thread does not
-     * lock any synchronizer or {@code lockedSynchronizers} is {@code false},
-     * the returned {@code ThreadInfo} object
-     * will have an empty {@code LockInfo} array.
-     *
-     * <p>
-     * When both {@code lockedMonitors} and {@code lockedSynchronizers}
-     * parameters are {@code false}, it is equivalent to calling:
-     * <blockquote><pre>
-     *     {@link #getThreadInfo(long[], int)  getThreadInfo(ids, Integer.MAX_VALUE)}
-     * </pre></blockquote>
-     *
-     * <p>
-     * This method is designed for troubleshooting use, but not for
-     * synchronization control.  It might be an expensive operation.
-     *
-     * <p>
-     * <b>MBeanServer access</b>:<br>
-     * The mapped type of {@code ThreadInfo} is
-     * {@code CompositeData} with attributes as specified in the
-     * {@link ThreadInfo#from ThreadInfo.from} method.
+     * whose ID is in the input array {@code ids},
+     * with stack trace and synchronization information.
+     * This is equivalent to calling:
+     * <blockquote>
+     * {@link #getThreadInfo(long[], boolean, boolean, int)
+     * getThreadInfo(ids, lockedMonitors, lockedSynchronizers, Integer.MAX_VALUE)}
+     * </blockquote>
      *
      * @param  ids an array of thread IDs.
      * @param  lockedMonitors if {@code true}, retrieves all locked monitors.
@@ -763,18 +724,110 @@
      *
      * @since 1.6
      */
-    public ThreadInfo[] getThreadInfo(long[] ids, boolean lockedMonitors, boolean lockedSynchronizers);
+    public ThreadInfo[] getThreadInfo(long[] ids, boolean lockedMonitors,
+                                      boolean lockedSynchronizers);
+
+    /**
+     * Returns the thread info for each thread whose ID
+     * is in the input array {@code ids},
+     * with stack trace of the specified maximum number of elements
+     * and synchronization information.
+     * If {@code maxDepth == 0}, no stack trace of the thread
+     * will be dumped.
+     *
+     * <p>
+     * This method obtains a snapshot of the thread information
+     * for each thread including:
+     * <ul>
+     *    <li>stack trace of the specified maximum number of elements,</li>
+     *    <li>the object monitors currently locked by the thread
+     *        if {@code lockedMonitors} is {@code true}, and</li>
+     *    <li>the <a href="LockInfo.html#OwnableSynchronizer">
+     *        ownable synchronizers</a> currently locked by the thread
+     *        if {@code lockedSynchronizers} is {@code true}.</li>
+     * </ul>
+     * <p>
+     * This method returns an array of the {@code ThreadInfo} objects,
+     * each is the thread information about the thread with the same index
+     * as in the {@code ids} array.
+     * If a thread of the given ID is not alive or does not exist,
+     * {@code null} will be set in the corresponding element
+     * in the returned array.  A thread is alive if
+     * it has been started and has not yet died.
+     * <p>
+     * If a thread does not lock any object monitor or {@code lockedMonitors}
+     * is {@code false}, the returned {@code ThreadInfo} object will have an
+     * empty {@code MonitorInfo} array.  Similarly, if a thread does not
+     * lock any synchronizer or {@code lockedSynchronizers} is {@code false},
+     * the returned {@code ThreadInfo} object
+     * will have an empty {@code LockInfo} array.
+     *
+     * <p>
+     * When both {@code lockedMonitors} and {@code lockedSynchronizers}
+     * parameters are {@code false}, it is equivalent to calling:
+     * <blockquote><pre>
+     *     {@link #getThreadInfo(long[], int)  getThreadInfo(ids, maxDepth)}
+     * </pre></blockquote>
+     *
+     * <p>
+     * This method is designed for troubleshooting use, but not for
+     * synchronization control.  It might be an expensive operation.
+     *
+     * <p>
+     * <b>MBeanServer access</b>:<br>
+     * The mapped type of {@code ThreadInfo} is
+     * {@code CompositeData} with attributes as specified in the
+     * {@link ThreadInfo#from ThreadInfo.from} method.
+     *
+     * @implSpec The default implementation throws
+     * {@code UnsupportedOperationException}.
+     *
+     * @param  ids an array of thread IDs.
+     * @param  lockedMonitors if {@code true}, retrieves all locked monitors.
+     * @param  lockedSynchronizers if {@code true}, retrieves all locked
+     *             ownable synchronizers.
+     * @param  maxDepth indicates the maximum number of
+     * {@link StackTraceElement} to be retrieved from the stack trace.
+     *
+     * @return an array of the {@link ThreadInfo} objects, each containing
+     * information about a thread whose ID is in the corresponding
+     * element of the input array of IDs.
+     *
+     * @throws IllegalArgumentException if {@code maxDepth} is negative.
+     * @throws java.lang.SecurityException if a security manager
+     *         exists and the caller does not have
+     *         ManagementPermission("monitor").
+     * @throws java.lang.UnsupportedOperationException
+     *         <ul>
+     *           <li>if {@code lockedMonitors} is {@code true} but
+     *               the Java virtual machine does not support monitoring
+     *               of {@linkplain #isObjectMonitorUsageSupported
+     *               object monitor usage}; or</li>
+     *           <li>if {@code lockedSynchronizers} is {@code true} but
+     *               the Java virtual machine does not support monitoring
+     *               of {@linkplain #isSynchronizerUsageSupported
+     *               ownable synchronizer usage}.</li>
+     *         </ul>
+     *
+     * @see #isObjectMonitorUsageSupported
+     * @see #isSynchronizerUsageSupported
+     *
+     * @since 10
+     */
+
+    public default ThreadInfo[] getThreadInfo(long[] ids, boolean lockedMonitors,
+                                              boolean lockedSynchronizers, int maxDepth) {
+        throw new UnsupportedOperationException();
+    }
 
     /**
      * Returns the thread info for all live threads with stack trace
      * and synchronization information.
-     * Some threads included in the returned array
-     * may have been terminated when this method returns.
-     *
-     * <p>
-     * This method returns an array of {@link ThreadInfo} objects
-     * as specified in the {@link #getThreadInfo(long[], boolean, boolean)}
-     * method.
+     * This is equivalent to calling:
+     * <blockquote>
+     * {@link #dumpAllThreads(boolean, boolean, int)
+     * dumpAllThreads(lockedMonitors, lockedSynchronizers, Integer.MAX_VALUE)}
+     * </blockquote>
      *
      * @param  lockedMonitors if {@code true}, dump all locked monitors.
      * @param  lockedSynchronizers if {@code true}, dump all locked
@@ -803,4 +856,56 @@
      * @since 1.6
      */
     public ThreadInfo[] dumpAllThreads(boolean lockedMonitors, boolean lockedSynchronizers);
+
+
+    /**
+     * Returns the thread info for all live threads
+     * with stack trace of the specified maximum number of elements
+     * and synchronization information.
+     * if {@code maxDepth == 0}, no stack trace of the thread
+     * will be dumped.
+     * Some threads included in the returned array
+     * may have been terminated when this method returns.
+     *
+     * <p>
+     * This method returns an array of {@link ThreadInfo} objects
+     * as specified in the {@link #getThreadInfo(long[], boolean, boolean, int)}
+     * method.
+     *
+     * @implSpec The default implementation throws
+     * {@code UnsupportedOperationException}.
+     *
+     * @param  lockedMonitors if {@code true}, dump all locked monitors.
+     * @param  lockedSynchronizers if {@code true}, dump all locked
+     *             ownable synchronizers.
+     * @param  maxDepth indicates the maximum number of
+     * {@link StackTraceElement} to be retrieved from the stack trace.
+     *
+     * @return an array of {@link ThreadInfo} for all live threads.
+     *
+     * @throws IllegalArgumentException if {@code maxDepth} is negative.
+     * @throws java.lang.SecurityException if a security manager
+     *         exists and the caller does not have
+     *         ManagementPermission("monitor").
+     * @throws java.lang.UnsupportedOperationException
+     *         <ul>
+     *           <li>if {@code lockedMonitors} is {@code true} but
+     *               the Java virtual machine does not support monitoring
+     *               of {@linkplain #isObjectMonitorUsageSupported
+     *               object monitor usage}; or</li>
+     *           <li>if {@code lockedSynchronizers} is {@code true} but
+     *               the Java virtual machine does not support monitoring
+     *               of {@linkplain #isSynchronizerUsageSupported
+     *               ownable synchronizer usage}.</li>
+     *         </ul>
+     *
+     * @see #isObjectMonitorUsageSupported
+     * @see #isSynchronizerUsageSupported
+     *
+     * @since 10
+     */
+    public default ThreadInfo[] dumpAllThreads(boolean lockedMonitors,
+                                               boolean lockedSynchronizers, int maxDepth) {
+        throw new UnsupportedOperationException();
+    }
 }

--- a/src/java.management/share/classes/module-info.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/java.management/share/classes/module-info.java	Tue Oct 10 16:29:04 2017 +0200
@@ -64,7 +64,8 @@
     exports sun.management.counter.perf to
         jdk.management.agent;
     exports sun.management.spi to
-        jdk.management;
+        jdk.management,
+        jdk.internal.vm.compiler.management;
 
     uses javax.management.remote.JMXConnectorProvider;
     uses javax.management.remote.JMXConnectorServerProvider;

--- a/src/java.management/share/classes/sun/management/ThreadImpl.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/java.management/share/classes/sun/management/ThreadImpl.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -463,20 +463,43 @@
     public ThreadInfo[] getThreadInfo(long[] ids,
                                       boolean lockedMonitors,
                                       boolean lockedSynchronizers) {
+        return dumpThreads0(ids, lockedMonitors, lockedSynchronizers,
+                            Integer.MAX_VALUE);
+    }
+
+    public ThreadInfo[] getThreadInfo(long[] ids,
+                                      boolean lockedMonitors,
+                                      boolean lockedSynchronizers,
+                                      int maxDepth) {
+        if (maxDepth < 0) {
+            throw new IllegalArgumentException(
+                    "Invalid maxDepth parameter: " + maxDepth);
+        }
         verifyThreadIds(ids);
         // ids has been verified to be non-null
         // an empty array of ids should return an empty array of ThreadInfos
         if (ids.length == 0) return new ThreadInfo[0];
 
         verifyDumpThreads(lockedMonitors, lockedSynchronizers);
-        return dumpThreads0(ids, lockedMonitors, lockedSynchronizers);
+        return dumpThreads0(ids, lockedMonitors, lockedSynchronizers, maxDepth);
     }
 
     @Override
     public ThreadInfo[] dumpAllThreads(boolean lockedMonitors,
                                        boolean lockedSynchronizers) {
+        return dumpAllThreads(lockedMonitors, lockedSynchronizers,
+                              Integer.MAX_VALUE);
+    }
+
+    public ThreadInfo[] dumpAllThreads(boolean lockedMonitors,
+                                       boolean lockedSynchronizers,
+                                       int maxDepth) {
+        if (maxDepth < 0) {
+            throw new IllegalArgumentException(
+                    "Invalid maxDepth parameter: " + maxDepth);
+        }
         verifyDumpThreads(lockedMonitors, lockedSynchronizers);
-        return dumpThreads0(null, lockedMonitors, lockedSynchronizers);
+        return dumpThreads0(null, lockedMonitors, lockedSynchronizers, maxDepth);
     }
 
     // VM support where maxDepth == -1 to request entire stack dump
@@ -497,7 +520,8 @@
     private static native void resetPeakThreadCount0();
     private static native ThreadInfo[] dumpThreads0(long[] ids,
                                                     boolean lockedMonitors,
-                                                    boolean lockedSynchronizers);
+                                                    boolean lockedSynchronizers,
+                                                    int maxDepth);
 
     // tid == 0 to reset contention times for all threads
     private static native void resetContentionTimes0(long tid);

--- a/src/java.management/share/native/include/jmm.h	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/java.management/share/native/include/jmm.h	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,7 +50,8 @@
   JMM_VERSION_1_2 = 0x20010200, // JDK 7
   JMM_VERSION_1_2_1 = 0x20010201, // JDK 7 GA
   JMM_VERSION_1_2_2 = 0x20010202,
-  JMM_VERSION     = 0x20010203
+  JMM_VERSION_2  = 0x20020000,  // JDK 10
+  JMM_VERSION     = 0x20020000
 };
 
 typedef struct {
@@ -315,7 +316,8 @@
   jobjectArray (JNICALL *DumpThreads)            (JNIEnv *env,
                                                   jlongArray ids,
                                                   jboolean lockedMonitors,
-                                                  jboolean lockedSynchronizers);
+                                                  jboolean lockedSynchronizers,
+                                                  jint maxDepth);
   void         (JNICALL *SetGCNotificationEnabled) (JNIEnv *env,
                                                     jobject mgr,
                                                     jboolean enabled);

--- a/src/java.management/share/native/libmanagement/ThreadImpl.c	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/java.management/share/native/libmanagement/ThreadImpl.c	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -135,7 +135,9 @@
 
 JNIEXPORT jobjectArray JNICALL
 Java_sun_management_ThreadImpl_dumpThreads0
-  (JNIEnv *env, jclass cls, jlongArray ids, jboolean lockedMonitors, jboolean lockedSynchronizers)
+  (JNIEnv *env, jclass cls, jlongArray ids, jboolean lockedMonitors,
+  jboolean lockedSynchronizers, jint maxDepth)
 {
-    return jmm_interface->DumpThreads(env, ids, lockedMonitors, lockedSynchronizers);
+    return jmm_interface->DumpThreads(env, ids, lockedMonitors,
+                                      lockedSynchronizers, maxDepth);
 }

--- a/src/java.management/share/native/libmanagement/management.c	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/java.management/share/native/libmanagement/management.c	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,7 +44,7 @@
         return JNI_ERR;
     }
 
-    jmm_interface = (JmmInterface*) JVM_GetManagement(JMM_VERSION_1_0);
+    jmm_interface = (JmmInterface*) JVM_GetManagement(JMM_VERSION);
     if (jmm_interface == NULL) {
         JNU_ThrowInternalError(env, "Unsupported Management version");
         return JNI_ERR;

--- a/src/jdk.attach/linux/classes/sun/tools/attach/VirtualMachineImpl.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.attach/linux/classes/sun/tools/attach/VirtualMachineImpl.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,6 +32,10 @@
 import java.io.InputStream;
 import java.io.IOException;
 import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.Files;
 
 /*
  * Linux implementation of HotSpotVirtualMachine
@@ -63,12 +67,15 @@
             throw new AttachNotSupportedException("Invalid process identifier");
         }
 
+        // Try to resolve to the "inner most" pid namespace
+        int ns_pid = getNamespacePid(pid);
+
         // Find the socket file. If not found then we attempt to start the
         // attach mechanism in the target VM by sending it a QUIT signal.
         // Then we attempt to find the socket file again.
-        path = findSocketFile(pid);
+        path = findSocketFile(pid, ns_pid);
         if (path == null) {
-            File f = createAttachFile(pid);
+            File f = createAttachFile(pid, ns_pid);
             try {
                 sendQuitTo(pid);
 
@@ -83,7 +90,7 @@
                     try {
                         Thread.sleep(delay);
                     } catch (InterruptedException x) { }
-                    path = findSocketFile(pid);
+                    path = findSocketFile(pid, ns_pid);
 
                     time_spend += delay;
                     if (time_spend > timeout/2 && path == null) {
@@ -262,8 +269,12 @@
     }
 
     // Return the socket file for the given process.
-    private String findSocketFile(int pid) {
-        File f = new File(tmpdir, ".java_pid" + pid);
+    private String findSocketFile(int pid, int ns_pid) {
+        // A process may not exist in the same mount namespace as the caller.
+        // Instead, attach relative to the target root filesystem as exposed by
+        // procfs regardless of namespaces.
+        String root = "/proc/" + pid + "/root/" + tmpdir;
+        File f = new File(root, ".java_pid" + ns_pid);
         if (!f.exists()) {
             return null;
         }
@@ -274,14 +285,23 @@
     // if not already started. The client creates a .attach_pid<pid> file in the
     // target VM's working directory (or temp directory), and the SIGQUIT handler
     // checks for the file.
-    private File createAttachFile(int pid) throws IOException {
-        String fn = ".attach_pid" + pid;
+    private File createAttachFile(int pid, int ns_pid) throws IOException {
+        String fn = ".attach_pid" + ns_pid;
         String path = "/proc/" + pid + "/cwd/" + fn;
         File f = new File(path);
         try {
             f.createNewFile();
         } catch (IOException x) {
-            f = new File(tmpdir, fn);
+            String root;
+            if (pid != ns_pid) {
+                // A process may not exist in the same mount namespace as the caller.
+                // Instead, attach relative to the target root filesystem as exposed by
+                // procfs regardless of namespaces.
+                root = "/proc/" + pid + "/root/" + tmpdir;
+            } else {
+                root = tmpdir;
+            }
+            f = new File(root, fn);
             f.createNewFile();
         }
         return f;
@@ -307,6 +327,40 @@
     }
 
 
+    // Return the inner most namespaced PID if there is one,
+    // otherwise return the original PID.
+    private int getNamespacePid(int pid) throws AttachNotSupportedException, IOException {
+        // Assuming a real procfs sits beneath, reading this doesn't block
+        // nor will it consume a lot of memory.
+        String statusFile = "/proc/" + pid + "/status";
+        File f = new File(statusFile);
+        if (!f.exists()) {
+            return pid; // Likely a bad pid, but this is properly handled later.
+        }
+
+        Path statusPath = Paths.get(statusFile);
+
+        try {
+            for (String line : Files.readAllLines(statusPath, StandardCharsets.UTF_8)) {
+                String[] parts = line.split(":");
+                if (parts.length == 2 && parts[0].trim().equals("NSpid")) {
+                    parts = parts[1].trim().split("\\s+");
+                    // The last entry represents the PID the JVM "thinks" it is.
+                    // Even in non-namespaced pids these entries should be
+                    // valid. You could refer to it as the inner most pid.
+                    int ns_pid = Integer.parseInt(parts[parts.length - 1]);
+                    return ns_pid;
+                }
+            }
+            // Old kernels may not have NSpid field (i.e. 3.10).
+            // Fallback to original pid in the event we cannot deduce.
+            return pid;
+        } catch (NumberFormatException | IOException x) {
+            throw new AttachNotSupportedException("Unable to parse namespace");
+        }
+    }
+
+
     //-- native methods
 
     static native void sendQuitToChildrenOf(int pid) throws IOException;

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,7 @@
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.gc.parallel.*;
 import sun.jvm.hotspot.gc.shared.*;
+import sun.jvm.hotspot.gc.g1.*;
 import sun.jvm.hotspot.interpreter.*;
 import sun.jvm.hotspot.memory.*;
 import sun.jvm.hotspot.oops.*;
@@ -1078,6 +1079,26 @@
                             }
                           }
 
+                        } else if (collHeap instanceof G1CollectedHeap) {
+                          G1CollectedHeap heap = (G1CollectedHeap)collHeap;
+                          HeapRegion region = heap.hrm().getByAddress(handle);
+
+                          if (region.isFree()) {
+                            anno = "Free ";
+                            bad = false;
+                          } else if (region.isYoung()) {
+                            anno = "Young ";
+                            bad = false;
+                          } else if (region.isHumongous()) {
+                            anno = "Humongous ";
+                            bad = false;
+                          } else if (region.isPinned()) {
+                            anno = "Pinned ";
+                            bad = false;
+                          } else if (region.isOld()) {
+                            anno = "Old ";
+                            bad = false;
+                          }
                         } else if (collHeap instanceof ParallelScavengeHeap) {
                           ParallelScavengeHeap heap = (ParallelScavengeHeap) collHeap;
                           if (heap.youngGen().isIn(handle)) {

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/code/NMethod.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/code/NMethod.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -71,7 +71,7 @@
       stack.  An not_entrant method can be removed when there is no
       more activations, i.e., when the _stack_traversal_mark is less than
       current sweep traversal index. */
-  private static JLongField stackTraversalMarkField;
+  private static CIntegerField stackTraversalMarkField;
 
   private static CIntegerField compLevelField;
 
@@ -105,7 +105,7 @@
     verifiedEntryPointField     = type.getAddressField("_verified_entry_point");
     osrEntryPointField          = type.getAddressField("_osr_entry_point");
     lockCountField              = type.getJIntField("_lock_count");
-    stackTraversalMarkField     = type.getJLongField("_stack_traversal_mark");
+    stackTraversalMarkField     = type.getCIntegerField("_stack_traversal_mark");
     compLevelField              = type.getCIntegerField("_comp_level");
     pcDescSize = db.lookupType("PcDesc").getSize();
   }

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/code/VMRegImpl.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/code/VMRegImpl.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,6 +37,7 @@
   private static int stack0Val;
   private static Address stack0Addr;
   private static AddressField regNameField;
+  private static int stackSlotSize;
 
   static {
     VM.registerVMInitializedObserver(new Observer() {
@@ -53,6 +54,7 @@
     stack0Val = (int) stack0Addr.hashCode();
     stack0 = new VMReg(stack0Val);
     regNameField = type.getAddressField("regName[0]");
+    stackSlotSize = db.lookupIntConstant("VMRegImpl::stack_slot_size");
   }
 
   public static VMReg getStack0() {
@@ -67,4 +69,8 @@
     long addrSize = VM.getVM().getAddressSize();
     return CStringUtilities.getString(regName.getAddressAt(index * addrSize));
   }
+
+  public static int getStackSlotSize() {
+    return stackSlotSize;
+  }
 }

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/gc/g1/G1CollectedHeap.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/gc/g1/G1CollectedHeap.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -87,7 +87,7 @@
         return hrm().length();
     }
 
-    private HeapRegionManager hrm() {
+    public HeapRegionManager hrm() {
         Address hrmAddr = addr.addOffsetTo(hrmFieldOffset);
         return (HeapRegionManager) VMObjectFactory.newObject(HeapRegionManager.class,
                                                          hrmAddr);

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/gc/g1/G1HeapRegionTable.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/gc/g1/G1HeapRegionTable.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 import java.util.Observer;
 
 import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.debugger.OopHandle;
 import sun.jvm.hotspot.runtime.VM;
 import sun.jvm.hotspot.runtime.VMObject;
 import sun.jvm.hotspot.runtime.VMObjectFactory;
@@ -36,6 +37,7 @@
 import sun.jvm.hotspot.types.CIntegerField;
 import sun.jvm.hotspot.types.Type;
 import sun.jvm.hotspot.types.TypeDataBase;
+import sun.jvm.hotspot.utilities.Assert;
 
 // Mirror class for G1HeapRegionTable. It's essentially an index -> HeapRegion map.
 
@@ -132,4 +134,13 @@
     public G1HeapRegionTable(Address addr) {
         super(addr);
     }
+
+    public HeapRegion getByAddress(Address addr) {
+        if (Assert.ASSERTS_ENABLED) {
+            Assert.that(addr instanceof OopHandle, "addr should be OopHandle");
+        }
+
+        long biasedIndex = addr.asLongValue() >>> shiftBy();
+        return new HeapRegion(addr.addOffsetToAsOopHandle(biasedIndex * HeapRegion.getPointerSize()));
+    }
 }

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/gc/g1/HeapRegion.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/gc/g1/HeapRegion.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,13 +29,16 @@
 import java.util.Observable;
 import java.util.Observer;
 import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.debugger.OopHandle;
 import sun.jvm.hotspot.gc.shared.CompactibleSpace;
 import sun.jvm.hotspot.memory.MemRegion;
 import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.runtime.VMObjectFactory;
 import sun.jvm.hotspot.types.AddressField;
 import sun.jvm.hotspot.types.CIntegerField;
 import sun.jvm.hotspot.types.Type;
 import sun.jvm.hotspot.types.TypeDataBase;
+import sun.jvm.hotspot.utilities.Assert;
 
 // Mirror class for HeapRegion. Currently we don't actually include
 // any of its fields but only iterate over it.
@@ -44,6 +47,10 @@
     // static int GrainBytes;
     static private CIntegerField grainBytesField;
     static private AddressField topField;
+    private static long typeFieldOffset;
+    private static long pointerSize;
+
+    private HeapRegionType type;
 
     static {
         VM.registerVMInitializedObserver(new Observer() {
@@ -58,7 +65,9 @@
 
         grainBytesField = type.getCIntegerField("GrainBytes");
         topField = type.getAddressField("_top");
+        typeFieldOffset = type.getField("_type").getOffset();
 
+        pointerSize = db.lookupType("HeapRegion*").getSize();
     }
 
     static public long grainBytes() {
@@ -67,6 +76,13 @@
 
     public HeapRegion(Address addr) {
         super(addr);
+
+        if (Assert.ASSERTS_ENABLED) {
+            Assert.that(addr instanceof OopHandle, "addr should be OopHandle");
+        }
+
+        Address typeAddr = addr.addOffsetToAsOopHandle(typeFieldOffset);
+        type = (HeapRegionType)VMObjectFactory.newObject(HeapRegionType.class, typeAddr);
     }
 
     public Address top() {
@@ -89,4 +105,28 @@
     public long free() {
         return end().minus(top());
     }
+
+    public boolean isFree() {
+        return type.isFree();
+    }
+
+    public boolean isYoung() {
+        return type.isYoung();
+    }
+
+    public boolean isHumongous() {
+        return type.isHumongous();
+    }
+
+    public boolean isPinned() {
+        return type.isPinned();
+    }
+
+    public boolean isOld() {
+        return type.isOld();
+    }
+
+    public static long getPointerSize() {
+        return pointerSize;
+    }
 }

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/gc/g1/HeapRegionManager.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/gc/g1/HeapRegionManager.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,4 +85,8 @@
     public HeapRegionManager(Address addr) {
         super(addr);
     }
+
+    public HeapRegion getByAddress(Address addr) {
+      return regions().getByAddress(addr);
+    }
 }

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/gc/g1/HeapRegionType.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.gc.g1;
+
+import java.util.Observable;
+import java.util.Observer;
+import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.runtime.VMObject;
+import sun.jvm.hotspot.types.CIntegerField;
+import sun.jvm.hotspot.types.Type;
+import sun.jvm.hotspot.types.TypeDataBase;
+
+// Mirror class for HeapRegionType. Currently we don't actually include
+// any of its fields but only iterate over it.
+
+public class HeapRegionType extends VMObject {
+
+    private static int freeTag;
+    private static int youngMask;
+    private static int humongousMask;
+    private static int pinnedMask;
+    private static int oldMask;
+    private static CIntegerField tagField;
+    private int tag;
+
+    static {
+        VM.registerVMInitializedObserver(new Observer() {
+                public void update(Observable o, Object data) {
+                    initialize(VM.getVM().getTypeDataBase());
+                }
+        });
+    }
+
+    private static synchronized void initialize(TypeDataBase db) {
+        Type type = db.lookupType("HeapRegionType");
+
+        tagField = type.getCIntegerField("_tag");
+
+        freeTag = db.lookupIntConstant("HeapRegionType::FreeTag");
+        youngMask = db.lookupIntConstant("HeapRegionType::YoungMask");
+        humongousMask = db.lookupIntConstant("HeapRegionType::HumongousMask");
+        pinnedMask = db.lookupIntConstant("HeapRegionType::PinnedMask");
+        oldMask = db.lookupIntConstant("HeapRegionType::OldMask");
+    }
+
+    public boolean isFree() {
+        return tagField.getValue(addr) == freeTag;
+    }
+
+    public boolean isYoung() {
+        return (tagField.getValue(addr) & youngMask) != 0;
+    }
+
+    public boolean isHumongous() {
+        return (tagField.getValue(addr) & humongousMask) != 0;
+    }
+
+    public boolean isPinned() {
+        return (tagField.getValue(addr) & pinnedMask) != 0;
+    }
+
+    public boolean isOld() {
+        return (tagField.getValue(addr) & oldMask) != 0;
+    }
+
+    public HeapRegionType(Address addr) {
+        super(addr);
+    }
+}

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Tue Oct 10 16:29:04 2017 +0200
@@ -269,13 +269,12 @@
 
   public static int  decodeInvokedynamicIndex(int i) { Assert.that(isInvokedynamicIndex(i),  ""); return ~i; }
 
-  // The invokedynamic points at the object index.  The object map points at
-  // the cpCache index and the cpCache entry points at the original constant
-  // pool index.
+  // The invokedynamic points at a CP cache entry.  This entry points back
+  // at the original CP entry (CONSTANT_InvokeDynamic) and also (via f2) at an entry
+  // in the resolved_references array (which provides the appendix argument).
   public int invokedynamicCPCacheIndex(int index) {
     Assert.that(isInvokedynamicIndex(index), "should be a invokedynamic index");
-    int rawIndex = decodeInvokedynamicIndex(index);
-    return referenceMap().at(rawIndex);
+    return decodeInvokedynamicIndex(index);
   }
 
   ConstantPoolCacheEntry invokedynamicCPCacheEntryAt(int index) {

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Klass.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Klass.java	Tue Oct 10 16:29:04 2017 +0200
@@ -51,7 +51,7 @@
 
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
     Type type    = db.lookupType("Klass");
-    javaMirror   = new OopField(type.getOopField("_java_mirror"), 0);
+    javaMirror   = type.getAddressField("_java_mirror");
     superField   = new MetadataField(type.getAddressField("_super"), 0);
     layoutHelper = new IntField(type.getJIntField("_layout_helper"), 0);
     name         = type.getAddressField("_name");
@@ -88,7 +88,7 @@
   public boolean isKlass()             { return true; }
 
   // Fields
-  private static OopField  javaMirror;
+  private static AddressField   javaMirror;
   private static MetadataField  superField;
   private static IntField layoutHelper;
   private static AddressField  name;
@@ -109,7 +109,15 @@
   }
 
   // Accessors for declared fields
-  public Instance getJavaMirror()       { return (Instance) javaMirror.getValue(this);   }
+  public Instance getJavaMirror() {
+    Address handle = javaMirror.getValue(getAddress());
+    if (handle != null) {
+      // Load through the handle
+      OopHandle refs = handle.getOopHandleAt(0);
+      return (Instance)VM.getVM().getObjectHeap().newOop(refs);
+    }
+    return null;
+  }
   public Klass    getSuper()            { return (Klass)    superField.getValue(this);   }
   public Klass    getJavaSuper()        { return null;  }
   public int      getLayoutHelper()     { return (int)           layoutHelper.getValue(this); }
@@ -185,7 +193,7 @@
   }
 
   public void iterateFields(MetadataVisitor visitor) {
-      visitor.doOop(javaMirror, true);
+      // visitor.doOop(javaMirror, true);
     visitor.doMetadata(superField, true);
       visitor.doInt(layoutHelper, true);
       // visitor.doOop(name, true);

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/BasicType.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/BasicType.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,20 +28,23 @@
     VM. */
 
 public class BasicType {
-  public static final int tBoolean  = 4;
-  public static final int tChar     = 5;
-  public static final int tFloat    = 6;
-  public static final int tDouble   = 7;
-  public static final int tByte     = 8;
-  public static final int tShort    = 9;
-  public static final int tInt      = 10;
-  public static final int tLong     = 11;
-  public static final int tObject   = 12;
-  public static final int tArray    = 13;
-  public static final int tVoid     = 14;
-  public static final int tAddress  = 15;
-  public static final int tConflict = 16;
-  public static final int tIllegal  = 99;
+  public static final int tBoolean     = 4;
+  public static final int tChar        = 5;
+  public static final int tFloat       = 6;
+  public static final int tDouble      = 7;
+  public static final int tByte        = 8;
+  public static final int tShort       = 9;
+  public static final int tInt         = 10;
+  public static final int tLong        = 11;
+  public static final int tObject      = 12;
+  public static final int tArray       = 13;
+  public static final int tVoid        = 14;
+  public static final int tAddress     = 15;
+  public static final int tNarrowOop   = 16;
+  public static final int tMetadata    = 17;
+  public static final int tNarrowKlass = 18;
+  public static final int tConflict    = 19;
+  public static final int tIllegal     = 99;
 
   public static final BasicType T_BOOLEAN = new BasicType(tBoolean);
   public static final BasicType T_CHAR = new BasicType(tChar);
@@ -55,6 +58,9 @@
   public static final BasicType T_ARRAY = new BasicType(tArray);
   public static final BasicType T_VOID = new BasicType(tVoid);
   public static final BasicType T_ADDRESS = new BasicType(tAddress);
+  public static final BasicType T_NARROWOOP = new BasicType(tNarrowOop);
+  public static final BasicType T_METADATA = new BasicType(tMetadata);
+  public static final BasicType T_NARROWKLASS = new BasicType(tNarrowKlass);
   public static final BasicType T_CONFLICT = new BasicType(tConflict);
   public static final BasicType T_ILLEGAL = new BasicType(tIllegal);
 
@@ -106,6 +112,18 @@
     return tAddress;
   }
 
+  public static int getTNarrowOop() {
+    return tNarrowOop;
+  }
+
+  public static int getTMetadata() {
+    return tMetadata;
+  }
+
+  public static int getTNarrowKlass() {
+    return tNarrowKlass;
+  }
+
   /** For stack value type with conflicting contents */
   public static int getTConflict() {
     return tConflict;

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Frame.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Frame.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -430,7 +430,7 @@
       // If it is passed in a register, it got spilled in the stub frame.
       return regMap.getLocation(reg);
     } else {
-      long spOffset = VM.getVM().getAddressSize() * reg.minus(stack0);
+      long spOffset = reg.reg2Stack() * VM.getVM().getVMRegImplInfo().getStackSlotSize();
       return getUnextendedSP().addOffsetTo(spOffset);
     }
   }

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/StackValueCollection.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/StackValueCollection.java	Tue Oct 10 16:29:04 2017 +0200
@@ -27,6 +27,7 @@
 import java.util.*;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.types.*;
 
 public class StackValueCollection {
   private List list;
@@ -48,7 +49,15 @@
   public int       intAt(int slot)       { return (int) get(slot).getInteger(); }
   public long      longAt(int slot)      { return VM.getVM().buildLongFromIntsPD((int) get(slot).getInteger(),
                                                                                  (int) get(slot+1).getInteger()); }
-  public OopHandle oopHandleAt(int slot) { return get(slot).getObject(); }
+
+  public OopHandle oopHandleAt(int slot) {
+    StackValue sv = get(slot);
+    if (sv.getType() == BasicType.getTConflict()) {
+      throw new WrongTypeException("Conflict type");
+    }
+    return sv.getObject();
+  }
+
   public float     floatAt(int slot)     { return Float.intBitsToFloat(intAt(slot)); }
   public double    doubleAt(int slot)    { return Double.longBitsToDouble(longAt(slot)); }
 }

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VMReg.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VMReg.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -84,4 +84,8 @@
   public boolean greaterThanOrEqual(VMReg arg)  { return value >= arg.value; }
 
   public int     minus(VMReg arg)               { return value - arg.value;  }
+
+  public int reg2Stack() {
+    return value - VM.getVM().getVMRegImplInfo().getStack0().getValue();
+  }
 }

--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java	Tue Oct 10 16:29:04 2017 +0200
@@ -34,6 +34,7 @@
 import sun.jvm.hotspot.oops.*;
 import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.tools.jcore.*;
+import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.*;
 
 public class HTMLGenerator implements /* imports */ ClassConstants {
@@ -1928,11 +1929,16 @@
          }
 
          if (!method.isStatic() && !method.isNative()) {
-            OopHandle oopHandle = vf.getLocals().oopHandleAt(0);
+            try {
+               OopHandle oopHandle = vf.getLocals().oopHandleAt(0);
 
-            if (oopHandle != null) {
-               buf.append(", oop = ");
-               buf.append(oopHandle.toString());
+               if (oopHandle != null) {
+                  buf.append(", oop = ");
+                  buf.append(oopHandle.toString());
+               }
+            } catch (WrongTypeException e) {
+              // Do nothing.
+              // It might be caused by JIT'ed inline frame.
             }
          }

--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotJVMCIBackendFactory.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotJVMCIBackendFactory.java	Tue Oct 10 16:29:04 2017 +0200
@@ -79,9 +79,15 @@
         if ((config.vmVersionFeatures & 1L << config.sparc_DES) != 0) {
             features.add(CPUFeature.DES);
         }
+        if ((config.vmVersionFeatures & 1L << config.sparc_DICTUNP) != 0) {
+            features.add(CPUFeature.DICTUNP);
+        }
         if ((config.vmVersionFeatures & 1L << config.sparc_FMAF) != 0) {
             features.add(CPUFeature.FMAF);
         }
+        if ((config.vmVersionFeatures & 1L << config.sparc_FPCMPSHL) != 0) {
+            features.add(CPUFeature.FPCMPSHL);
+        }
         if ((config.vmVersionFeatures & 1L << config.sparc_HPC) != 0) {
             features.add(CPUFeature.HPC);
         }
@@ -94,6 +100,9 @@
         if ((config.vmVersionFeatures & 1L << config.sparc_MD5) != 0) {
             features.add(CPUFeature.MD5);
         }
+        if ((config.vmVersionFeatures & 1L << config.sparc_MME) != 0) {
+            features.add(CPUFeature.MME);
+        }
         if ((config.vmVersionFeatures & 1L << config.sparc_MONT) != 0) {
             features.add(CPUFeature.MONT);
         }
@@ -112,18 +121,30 @@
         if ((config.vmVersionFeatures & 1L << config.sparc_POPC) != 0) {
             features.add(CPUFeature.POPC);
         }
+        if ((config.vmVersionFeatures & 1L << config.sparc_RLE) != 0) {
+            features.add(CPUFeature.RLE);
+        }
         if ((config.vmVersionFeatures & 1L << config.sparc_SHA1) != 0) {
             features.add(CPUFeature.SHA1);
         }
         if ((config.vmVersionFeatures & 1L << config.sparc_SHA256) != 0) {
             features.add(CPUFeature.SHA256);
         }
+        if ((config.vmVersionFeatures & 1L << config.sparc_SHA3) != 0) {
+            features.add(CPUFeature.SHA3);
+        }
         if ((config.vmVersionFeatures & 1L << config.sparc_SHA512) != 0) {
             features.add(CPUFeature.SHA512);
         }
         if ((config.vmVersionFeatures & 1L << config.sparc_SPARC5) != 0) {
             features.add(CPUFeature.SPARC5);
         }
+        if ((config.vmVersionFeatures & 1L << config.sparc_SPARC5B) != 0) {
+            features.add(CPUFeature.SPARC5B);
+        }
+        if ((config.vmVersionFeatures & 1L << config.sparc_SPARC6) != 0) {
+            features.add(CPUFeature.SPARC6);
+        }
         if ((config.vmVersionFeatures & 1L << config.sparc_V9) != 0) {
             features.add(CPUFeature.V9);
         }
@@ -142,6 +163,9 @@
         if ((config.vmVersionFeatures & 1L << config.sparc_VIS3B) != 0) {
             features.add(CPUFeature.VIS3B);
         }
+        if ((config.vmVersionFeatures & 1L << config.sparc_VIS3C) != 0) {
+            features.add(CPUFeature.VIS3C);
+        }
         if ((config.vmVersionFeatures & 1L << config.sparc_XMONT) != 0) {
             features.add(CPUFeature.XMONT);
         }

--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotVMConfig.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotVMConfig.java	Tue Oct 10 16:29:04 2017 +0200
@@ -55,27 +55,35 @@
     final int sparc_CBCOND   = getConstant("VM_Version::ISA_CBCOND",   Integer.class);
     final int sparc_CRC32C   = getConstant("VM_Version::ISA_CRC32C",   Integer.class);
     final int sparc_DES      = getConstant("VM_Version::ISA_DES",      Integer.class);
+    final int sparc_DICTUNP  = getConstant("VM_Version::ISA_DICTUNP",  Integer.class);
     final int sparc_FMAF     = getConstant("VM_Version::ISA_FMAF",     Integer.class);
+    final int sparc_FPCMPSHL = getConstant("VM_Version::ISA_FPCMPSHL", Integer.class);
     final int sparc_HPC      = getConstant("VM_Version::ISA_HPC",      Integer.class);
     final int sparc_IMA      = getConstant("VM_Version::ISA_IMA",      Integer.class);
     final int sparc_KASUMI   = getConstant("VM_Version::ISA_KASUMI",   Integer.class);
     final int sparc_MD5      = getConstant("VM_Version::ISA_MD5",      Integer.class);
+    final int sparc_MME      = getConstant("VM_Version::ISA_MME",      Integer.class);
     final int sparc_MONT     = getConstant("VM_Version::ISA_MONT",     Integer.class);
     final int sparc_MPMUL    = getConstant("VM_Version::ISA_MPMUL",    Integer.class);
     final int sparc_MWAIT    = getConstant("VM_Version::ISA_MWAIT",    Integer.class);
     final int sparc_PAUSE    = getConstant("VM_Version::ISA_PAUSE",    Integer.class);
     final int sparc_PAUSE_NSEC = getConstant("VM_Version::ISA_PAUSE_NSEC", Integer.class);
     final int sparc_POPC     = getConstant("VM_Version::ISA_POPC",     Integer.class);
+    final int sparc_RLE      = getConstant("VM_Version::ISA_RLE",      Integer.class);
     final int sparc_SHA1     = getConstant("VM_Version::ISA_SHA1",     Integer.class);
     final int sparc_SHA256   = getConstant("VM_Version::ISA_SHA256",   Integer.class);
+    final int sparc_SHA3     = getConstant("VM_Version::ISA_SHA3",     Integer.class);
     final int sparc_SHA512   = getConstant("VM_Version::ISA_SHA512",   Integer.class);
     final int sparc_SPARC5   = getConstant("VM_Version::ISA_SPARC5",   Integer.class);
+    final int sparc_SPARC5B  = getConstant("VM_Version::ISA_SPARC5B",  Integer.class);
+    final int sparc_SPARC6   = getConstant("VM_Version::ISA_SPARC6",   Integer.class);
     final int sparc_V9       = getConstant("VM_Version::ISA_V9",       Integer.class);
     final int sparc_VAMASK   = getConstant("VM_Version::ISA_VAMASK",   Integer.class);
     final int sparc_VIS1     = getConstant("VM_Version::ISA_VIS1",     Integer.class);
     final int sparc_VIS2     = getConstant("VM_Version::ISA_VIS2",     Integer.class);
     final int sparc_VIS3     = getConstant("VM_Version::ISA_VIS3",     Integer.class);
     final int sparc_VIS3B    = getConstant("VM_Version::ISA_VIS3B",    Integer.class);
+    final int sparc_VIS3C    = getConstant("VM_Version::ISA_VIS3C",    Integer.class);
     final int sparc_XMONT    = getConstant("VM_Version::ISA_XMONT",    Integer.class);
     final int sparc_XMPMUL   = getConstant("VM_Version::ISA_XMPMUL",   Integer.class);

--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotMemoryAccessProviderImpl.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotMemoryAccessProviderImpl.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -138,21 +138,6 @@
         return true;
     }
 
-    private boolean isValidObjectFieldDisplacement(Constant base, long displacement) {
-        if (base instanceof HotSpotMetaspaceConstant) {
-            MetaspaceWrapperObject metaspaceObject = HotSpotMetaspaceConstantImpl.getMetaspaceObject(base);
-            if (metaspaceObject instanceof HotSpotResolvedObjectTypeImpl) {
-                if (displacement == runtime.getConfig().classMirrorOffset) {
-                    // Klass::_java_mirror is valid for all Klass* values
-                    return true;
-                }
-            } else {
-                throw new IllegalArgumentException(String.valueOf(metaspaceObject));
-            }
-        }
-        return false;
-    }
-
     private static long asRawPointer(Constant base) {
         if (base instanceof HotSpotMetaspaceConstantImpl) {
             MetaspaceWrapperObject meta = HotSpotMetaspaceConstantImpl.getMetaspaceObject(base);
@@ -202,7 +187,7 @@
         if (base instanceof HotSpotMetaspaceConstant) {
             MetaspaceWrapperObject metaspaceObject = HotSpotMetaspaceConstantImpl.getMetaspaceObject(base);
             if (metaspaceObject instanceof HotSpotResolvedObjectTypeImpl) {
-                if (displacement == runtime.getConfig().classMirrorOffset) {
+                if (displacement == runtime.getConfig().classMirrorHandleOffset) {
                     assert expected == ((HotSpotResolvedObjectTypeImpl) metaspaceObject).mirror();
                 }
             }
@@ -294,10 +279,18 @@
             Object o = readRawObject(base, displacement, runtime.getConfig().useCompressedOops);
             return HotSpotObjectConstantImpl.forObject(o);
         }
-        if (!isValidObjectFieldDisplacement(base, displacement)) {
-            return null;
+        if (base instanceof HotSpotMetaspaceConstant) {
+            MetaspaceWrapperObject metaspaceObject = HotSpotMetaspaceConstantImpl.getMetaspaceObject(base);
+            if (metaspaceObject instanceof HotSpotResolvedObjectTypeImpl) {
+                 if (displacement == runtime.getConfig().classMirrorHandleOffset) {
+                    // Klass::_java_mirror is valid for all Klass* values
+                    return HotSpotObjectConstantImpl.forObject(((HotSpotResolvedObjectTypeImpl) metaspaceObject).mirror());
+                 }
+             } else {
+                 throw new IllegalArgumentException(String.valueOf(metaspaceObject));
+             }
         }
-        return HotSpotObjectConstantImpl.forObject(readRawObject(base, displacement, false));
+        return null;
     }
 
     @Override

--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -77,7 +77,7 @@
     /**
      * The offset of the _java_mirror field (of type {@link Class}) in a Klass.
      */
-    final int classMirrorOffset = getFieldOffset("Klass::_java_mirror", Integer.class, "oop");
+    final int classMirrorHandleOffset = getFieldOffset("Klass::_java_mirror", Integer.class, "OopHandle");
 
     final int klassAccessFlagsOffset = getFieldOffset("Klass::_access_flags", Integer.class, "AccessFlags");
     final int klassLayoutHelperOffset = getFieldOffset("Klass::_layout_helper", Integer.class, "jint");

--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.sparc/src/jdk/vm/ci/sparc/SPARC.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.sparc/src/jdk/vm/ci/sparc/SPARC.java	Tue Oct 10 16:29:04 2017 +0200
@@ -344,27 +344,35 @@
         CBCOND,
         CRC32C,
         DES,
+        DICTUNP,
         FMAF,
+        FPCMPSHL,
         HPC,
         IMA,
         KASUMI,
         MD5,
+        MME,
         MONT,
         MPMUL,
         MWAIT,
         PAUSE,
         PAUSE_NSEC,
         POPC,
+        RLE,
         SHA1,
         SHA256,
+        SHA3,
         SHA512,
         SPARC5,
+        SPARC5B,
+        SPARC6,
         V9,
         VAMASK,
         VIS1,
         VIS2,
         VIS3,
         VIS3B,
+        VIS3C,
         XMONT,
         XMPMUL,
         // Synthesised CPU properties:

--- a/src/jdk.internal.vm.ci/share/classes/module-info.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.internal.vm.ci/share/classes/module-info.java	Tue Oct 10 16:29:04 2017 +0200
@@ -25,6 +25,9 @@
 
 module jdk.internal.vm.ci {
     exports jdk.vm.ci.services to jdk.internal.vm.compiler;
+    exports jdk.vm.ci.runtime to
+        jdk.internal.vm.compiler,
+        jdk.internal.vm.compiler.management;
 
     uses jdk.vm.ci.services.JVMCIServiceLocator;
     uses jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler.management/share/classes/module-info.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * Registers Graal Compiler specific management interfaces for the JVM.
+ *
+ * @moduleGraph
+ * @since 10
+ */
+module jdk.internal.vm.compiler.management {
+    requires java.management;
+    requires jdk.management;
+    requires jdk.internal.vm.ci;
+    requires jdk.internal.vm.compiler;
+
+    provides sun.management.spi.PlatformMBeanProvider with
+        org.graalvm.compiler.hotspot.jmx.GraalMBeans;
+}
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler.management/share/classes/org/graalvm/compiler/hotspot/jmx/GraalMBeans.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.hotspot.jmx;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import jdk.vm.ci.runtime.JVMCI;
+import jdk.vm.ci.runtime.JVMCICompiler;
+import jdk.vm.ci.runtime.JVMCIRuntime;
+import org.graalvm.compiler.hotspot.HotSpotGraalCompiler;
+import sun.management.spi.PlatformMBeanProvider;
+import sun.management.spi.PlatformMBeanProvider.PlatformComponent;
+
+public final class GraalMBeans extends PlatformMBeanProvider {
+    @Override
+    public List<PlatformComponent<?>> getPlatformComponentList() {
+        List<PlatformComponent<?>> components = new ArrayList<>();
+        try {
+            Object bean = findGraalRuntimeBean();
+            if (bean != null) {
+                components.add(new HotSpotRuntimeMBeanComponent(bean));
+            }
+        } catch (InternalError | LinkageError err) {
+            // go on and ignore
+        }
+        return components;
+    }
+
+    public static Object findGraalRuntimeBean() {
+        JVMCIRuntime r = JVMCI.getRuntime();
+        JVMCICompiler c = r.getCompiler();
+        if (c instanceof HotSpotGraalCompiler) {
+            return ((HotSpotGraalCompiler) c).mbean();
+        }
+        return null;
+    }
+
+    private static final class HotSpotRuntimeMBeanComponent implements PlatformComponent<Object> {
+
+        private final String name;
+        private final Object mbean;
+
+        HotSpotRuntimeMBeanComponent(Object mbean) {
+            this.name = "org.graalvm.compiler.hotspot:type=Options";
+            this.mbean = mbean;
+        }
+
+        @Override
+        public Set<Class<?>> mbeanInterfaces() {
+            return Collections.emptySet();
+        }
+
+        @Override
+        public Set<String> mbeanInterfaceNames() {
+            return Collections.emptySet();
+        }
+
+        @Override
+        public String getObjectNamePattern() {
+            return name;
+        }
+
+        @Override
+        public Map<String, Object> nameToMBeanMap() {
+            return Collections.<String, Object>singletonMap(name, mbean);
+        }
+    }
+}

--- a/src/jdk.internal.vm.compiler/share/classes/module-info.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.internal.vm.compiler/share/classes/module-info.java	Tue Oct 10 16:29:04 2017 +0200
@@ -50,7 +50,9 @@
     exports org.graalvm.compiler.core.target            to jdk.aot;
     exports org.graalvm.compiler.debug                  to jdk.aot;
     exports org.graalvm.compiler.graph                  to jdk.aot;
-    exports org.graalvm.compiler.hotspot                to jdk.aot;
+    exports org.graalvm.compiler.hotspot                to
+        jdk.aot,
+        jdk.internal.vm.compiler.management;
     exports org.graalvm.compiler.hotspot.meta           to jdk.aot;
     exports org.graalvm.compiler.hotspot.replacements   to jdk.aot;
     exports org.graalvm.compiler.hotspot.stubs          to jdk.aot;

--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/GraalHotSpotVMConfig.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/GraalHotSpotVMConfig.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -280,14 +280,14 @@
         }
         if (offset == -1) {
             try {
-                offset = getFieldOffset(name, Integer.class, "jobject");
+                offset = getFieldOffset(name, Integer.class, "OopHandle");
                 isHandle = true;
             } catch (JVMCIError e) {
 
             }
         }
         if (offset == -1) {
-            throw new JVMCIError("cannot get offset of field " + name + " with type oop or jobject");
+            throw new JVMCIError("cannot get offset of field " + name + " with type oop or OopHandle");
         }
         classMirrorOffset = offset;
         classMirrorIsHandle = isHandle;

--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/HotSpotGraalCompiler.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/HotSpotGraalCompiler.java	Tue Oct 10 16:29:04 2017 +0200
@@ -282,6 +282,13 @@
         return suite;
     }
 
+    public Object mbean() {
+        if (graalRuntime instanceof HotSpotGraalRuntime) {
+            return ((HotSpotGraalRuntime)graalRuntime).mbean();
+        }
+        return null;
+    }
+
     /**
      * Converts {@code method} to a String with {@link JavaMethod#format(String)} and the format
      * string {@code "%H.%n(%p)"}.

--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/HotSpotGraalRuntime.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/HotSpotGraalRuntime.java	Tue Oct 10 16:29:04 2017 +0200
@@ -316,4 +316,8 @@
     public Map<ExceptionAction, Integer> getCompilationProblemsPerAction() {
         return compilationProblemsPerAction;
     }
+
+    final Object mbean() {
+        return mBean;
+    }
 }

--- a/src/jdk.jdwp.agent/share/native/libjdwp/debugInit.c	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.jdwp.agent/share/native/libjdwp/debugInit.c	Tue Oct 10 16:29:04 2017 +0200
@@ -1301,6 +1301,9 @@
 {
     enum exit_codes { EXIT_NO_ERRORS = 0, EXIT_JVMTI_ERROR = 1, EXIT_TRANSPORT_ERROR = 2 };
 
+    // Release commandLoop vmDeathLock if necessary
+    commandLoop_exitVmDeathLockOnError();
+
     // Prepare to exit. Log error and finish logging
     LOG_MISC(("Exiting with error %s(%d): %s", jvmtiErrorText(error), error,
                                                ((msg == NULL) ? "" : msg)));

--- a/src/jdk.jdwp.agent/share/native/libjdwp/eventHandler.c	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.jdwp.agent/share/native/libjdwp/eventHandler.c	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1287,11 +1287,11 @@
     } debugMonitorExit(callbackBlock);
 
     /*
-     * The VM will die soon after the completion of this callback - we
-     * may need to do a final synchronization with the command loop to
-     * avoid the VM terminating with replying to the final (resume)
-     * command.
+     * The VM will die soon after the completion of this callback -
+     * we synchronize with both the command loop and the debug loop
+     * for a more orderly shutdown.
      */
+    commandLoop_sync();
     debugLoop_sync();
 
     LOG_MISC(("END cbVMDeath"));

--- a/src/jdk.jdwp.agent/share/native/libjdwp/eventHelper.c	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.jdwp.agent/share/native/libjdwp/eventHelper.c	Tue Oct 10 16:29:04 2017 +0200
@@ -29,6 +29,9 @@
 #include "threadControl.h"
 #include "invoker.h"
 
+
+#define COMMAND_LOOP_THREAD_NAME "JDWP Event Helper Thread"
+
 /*
  * Event helper thread command commandKinds
  */
@@ -121,6 +124,9 @@
 static jrawMonitorID commandQueueLock;
 static jrawMonitorID commandCompleteLock;
 static jrawMonitorID blockCommandLoopLock;
+static jrawMonitorID vmDeathLock;
+static volatile jboolean commandLoopEnteredVmDeathLock = JNI_FALSE;
+
 static jint maxQueueSize = 50 * 1024; /* TO DO: Make this configurable */
 static jboolean holdEvents;
 static jint currentQueueSize = 0;
@@ -700,9 +706,15 @@
              * handleCommand() to prevent any races.
              */
             jboolean doBlock = needBlockCommandLoop(command);
-            log_debugee_location("commandLoop(): command being handled", NULL, NULL, 0);
-            handleCommand(jni_env, command);
+            debugMonitorEnter(vmDeathLock);
+            commandLoopEnteredVmDeathLock = JNI_TRUE;
+            if (!gdata->vmDead) {
+                log_debugee_location("commandLoop(): command being handled", NULL, NULL, 0);
+                handleCommand(jni_env, command);
+            }
             completeCommand(command);
+            debugMonitorExit(vmDeathLock);
+            commandLoopEnteredVmDeathLock = JNI_FALSE;
             /* if we just finished a suspend-all cmd, then we block here */
             if (doBlock) {
                 doBlockCommandLoop();
@@ -725,10 +737,11 @@
     commandQueueLock = debugMonitorCreate("JDWP Event Helper Queue Monitor");
     commandCompleteLock = debugMonitorCreate("JDWP Event Helper Completion Monitor");
     blockCommandLoopLock = debugMonitorCreate("JDWP Event Block CommandLoop Monitor");
+    vmDeathLock = debugMonitorCreate("JDWP VM_DEATH CommandLoop Monitor");
 
     /* Start the event handler thread */
     func = &commandLoop;
-    (void)spawnNewThread(func, NULL, "JDWP Event Helper Thread");
+    (void)spawnNewThread(func, NULL, COMMAND_LOOP_THREAD_NAME);
 }
 
 void
@@ -759,6 +772,42 @@
     debugMonitorExit(commandQueueLock);
 }
 
+void commandLoop_exitVmDeathLockOnError()
+{
+    const char* MSG_BASE = "exitVmDeathLockOnError: error in JVMTI %s: %d\n";
+    jthread cur_thread = NULL;
+    jvmtiThreadInfo thread_info;
+    jvmtiError err = JVMTI_ERROR_NONE;
+
+    err = JVMTI_FUNC_PTR(gdata->jvmti, GetCurrentThread)
+              (gdata->jvmti, &cur_thread);
+    if (err != JVMTI_ERROR_NONE) {
+        LOG_ERROR((MSG_BASE, "GetCurrentThread", err));
+        return;
+    }
+
+    err = JVMTI_FUNC_PTR(gdata->jvmti, GetThreadInfo)
+              (gdata->jvmti, cur_thread, &thread_info);
+    if (err != JVMTI_ERROR_NONE) {
+        LOG_ERROR((MSG_BASE, "GetThreadInfo", err));
+        return;
+    }
+    if (strcmp(thread_info.name, COMMAND_LOOP_THREAD_NAME) != 0) {
+        return;
+    }
+    if (commandLoopEnteredVmDeathLock == JNI_TRUE) {
+        debugMonitorExit(vmDeathLock);
+        commandLoopEnteredVmDeathLock = JNI_FALSE;
+    }
+}
+
+void
+commandLoop_sync(void)
+{
+    debugMonitorEnter(vmDeathLock);
+    debugMonitorExit(vmDeathLock);
+}
+
 /* Change all references to global in the EventInfo struct */
 static void
 saveEventInfoRefs(JNIEnv *env, EventInfo *evinfo)

--- a/src/jdk.jdwp.agent/share/native/libjdwp/eventHelper.h	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.jdwp.agent/share/native/libjdwp/eventHelper.h	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,6 +54,9 @@
 void eventHelper_lock(void);
 void eventHelper_unlock(void);
 
+void commandLoop_sync(void); /* commandLoop sync with cbVMDeath */
+void commandLoop_exitVmDeathLockOnError(void);
+
 /*
  * Private interface for coordinating between eventHelper.c: commandLoop()
  * and ThreadReferenceImpl.c: resume() and VirtualMachineImpl.c: resume().

--- a/src/jdk.management/share/native/libmanagement_ext/management_ext.c	Thu Oct 05 18:29:47 2017 +0100
+++ b/src/jdk.management/share/native/libmanagement_ext/management_ext.c	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,7 +44,7 @@
         return JNI_ERR;
     }
 
-    jmm_interface = (JmmInterface*) JVM_GetManagement(JMM_VERSION_1_0);
+    jmm_interface = (JmmInterface*) JVM_GetManagement(JMM_VERSION);
     if (jmm_interface == NULL) {
         JNU_ThrowInternalError(env, "Unsupported Management version");
         return JNI_ERR;

--- a/test/hotspot/jtreg/ProblemList.txt	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/ProblemList.txt	Tue Oct 10 16:29:04 2017 +0200
@@ -64,6 +64,7 @@
 gc/g1/humongousObjects/TestHeapCounters.java 8178918 generic-all
 gc/stress/gclocker/TestGCLockerWithG1.java 8179226 generic-all
 gc/survivorAlignment/TestPromotionFromSurvivorToTenuredAfterMinorGC.java 8177765 generic-all
+gc/logging/TestPrintReferences.java 8188245 generic-all
 
 #############################################################################

--- a/test/hotspot/jtreg/TEST.ROOT	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/TEST.ROOT	Tue Oct 10 16:29:04 2017 +0200
@@ -52,7 +52,8 @@
     vm.rtm.cpu \
     vm.rtm.os \
     vm.aot \
-    vm.cds
+    vm.cds \
+    vm.graal.enabled
 
 # Minimum jtreg version
 requiredVersion=4.2 b08

--- a/test/hotspot/jtreg/compiler/aot/AotCompiler.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/aot/AotCompiler.java	Tue Oct 10 16:29:04 2017 +0200
@@ -145,13 +145,37 @@
                 + " [-compile <compileItems>]* [-extraopt <java option>]*");
     }
 
+    // runs ld -v (or ld -V on solaris) and check its exit code
+    private static boolean checkLd(Path bin) {
+        try {
+            return 0 == ProcessTools.executeCommand(bin.toString(),
+                                                    Platform.isSolaris() ? "-V" : "-v")
+                                    .getExitValue();
+        } catch (Throwable t) {
+            // any errors mean ld doesn't work
+            return false;
+        }
+    }
+
     public static String resolveLinker() {
         Path linker = null;
-        // 1st, check if PATH has ld
-        for (String path : System.getenv("PATH").split(File.pathSeparator)) {
-            if (Files.exists(Paths.get(path).resolve("ld"))) {
-                // there is ld in PATH, jaotc is supposed to find it by its own
-                return null;
+        // if non windows, 1st, check if PATH has ld
+        if (!Platform.isWindows()) {
+            String bin = "ld";
+            for (String path : System.getenv("PATH").split(File.pathSeparator)) {
+                Path ld = Paths.get(path).resolve("ld");
+                if (Files.exists(ld)) {
+                    // there is ld in PATH
+                    if (checkLd(ld)) {
+                        System.out.println("found working linker: " + ld);
+                        // ld works, jaotc is supposed to find and use it
+                        return null;
+                    } else {
+                        System.out.println("found broken linker: " + ld);
+                        // ld exists in PATH, but doesn't work, have to use devkit
+                        break;
+                    }
+                }
             }
         }
         // there is no ld in PATH, will use ld from devkit
@@ -275,7 +299,9 @@
                 }
             }
         } catch (FileNotFoundException e) {
-            throw new Error("artifact resolution error: " + e, e);
+            System.err.println("artifact resolution error: " + e);
+            // let jaotc try to find linker
+            return null;
         }
         if (linker != null) {
             return linker.toAbsolutePath().toString();

--- a/test/hotspot/jtreg/compiler/arraycopy/TestArrayCopyNoInitDeopt.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/arraycopy/TestArrayCopyNoInitDeopt.java	Tue Oct 10 16:29:04 2017 +0200
@@ -25,7 +25,7 @@
  * @test
  * @bug 8072016
  * @summary Infinite deoptimization/recompilation cycles in case of arraycopy with tightly coupled allocation
- * @requires vm.flavor == "server" & !vm.emulatedClient
+ * @requires vm.flavor == "server" & !vm.emulatedClient & !vm.graal.enabled
  * @library /test/lib /
  * @modules java.base/jdk.internal.misc
  *          java.management

--- a/test/hotspot/jtreg/compiler/c2/Test8004741.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/c2/Test8004741.java	Tue Oct 10 16:29:04 2017 +0200
@@ -26,6 +26,7 @@
  * @bug 8004741
  * @summary Missing compiled exception handle table entry for multidimensional array allocation
  *
+ * @requires !vm.graal.enabled
  * @run main/othervm -Xmx64m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
  *    -XX:-TieredCompilation -XX:+StressCompiledExceptionHandlers
  *    -XX:+SafepointALot -XX:GuaranteedSafepointInterval=100

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/compiler/ciReplay/TestDumpReplay.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc:+open
+ * @build sun.hotspot.WhiteBox
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -Xbatch -XX:-TieredCompilation -XX:+AlwaysIncrementalInline
+ *                   -XX:CompileCommand=compileonly,compiler.ciReplay.TestDumpReplay::*
+ *                   compiler.ciReplay.TestDumpReplay
+ */
+
+package compiler.ciReplay;
+
+import sun.hotspot.WhiteBox;
+
+public class TestDumpReplay {
+    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+
+    private static final String emptyString;
+
+    static {
+        emptyString = "";
+    }
+
+    public static void m1() {
+        m2();
+    }
+
+    public static void m2() {
+        m3();
+    }
+
+    public static void m3() {
+
+    }
+
+    public static void main(String[] args) {
+        // Add compiler control directive to force generation of replay file
+        String directive = "[{ match: \"*.*\", DumpReplay: true }]";
+        if (WHITE_BOX.addCompilerDirective(directive) != 1) {
+            throw new RuntimeException("Failed to add compiler directive");
+        }
+
+        // Trigger compilation of m1
+        for (int i = 0; i < 10_000; ++i) {
+            m1();
+        }
+    }
+}

--- a/test/hotspot/jtreg/compiler/classUnloading/anonymousClass/TestAnonymousClassUnloading.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/classUnloading/anonymousClass/TestAnonymousClassUnloading.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,13 +22,16 @@
  */
 
 /*
- * @test TestAnonymousClassUnloading
+ * @test
  * @bug 8054402
  * @summary "Tests unloading of anonymous classes."
  * @library /test/lib /
  * @modules java.base/jdk.internal.misc
+ * @build sun.hotspot.WhiteBox
+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
  *
- * @run main/othervm/bootclasspath -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
  *      -XX:-BackgroundCompilation
  *      compiler.classUnloading.anonymousClass.TestAnonymousClassUnloading
  */

--- a/test/hotspot/jtreg/compiler/compilercontrol/jcmd/PrintDirectivesTest.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/compilercontrol/jcmd/PrintDirectivesTest.java	Tue Oct 10 16:29:04 2017 +0200
@@ -27,7 +27,7 @@
  * @summary Tests jcmd to be able to add a directive to compile only specified methods
  * @modules java.base/jdk.internal.misc
  * @library /test/lib /
- * @requires vm.flavor != "minimal"
+ * @requires vm.flavor != "minimal" & !vm.graal.enabled
  *
  * @build sun.hotspot.WhiteBox
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox

--- a/test/hotspot/jtreg/compiler/compilercontrol/logcompilation/LogTest.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/compilercontrol/logcompilation/LogTest.java	Tue Oct 10 16:29:04 2017 +0200
@@ -25,6 +25,8 @@
  * @test
  * @bug 8137167
  * @summary Tests LogCompilation executed standalone without log commands or directives
+ *
+ * @requires !vm.graal.enabled
  * @modules java.base/jdk.internal.misc
  * @library /test/lib /
  *

--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java	Tue Oct 10 16:29:04 2017 +0200
@@ -26,7 +26,7 @@
  * @library /test/lib /
  * @modules java.base/jdk.internal.misc
  *          java.management
- * @requires vm.cpu.features ~= ".*aes.*"
+ * @requires vm.cpu.features ~= ".*aes.*" & !vm.graal.enabled
  * @build sun.hotspot.WhiteBox
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox
  *                                sun.hotspot.WhiteBox$WhiteBoxPermission

--- a/test/hotspot/jtreg/compiler/intrinsics/IntrinsicDisabledTest.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/intrinsics/IntrinsicDisabledTest.java	Tue Oct 10 16:29:04 2017 +0200
@@ -24,6 +24,8 @@
 /*
  * @test
  * @bug 8138651
+ *
+ * @requires !vm.graal.enabled
  * @modules java.base/jdk.internal.misc
  * @library /test/lib /
  *

--- a/test/hotspot/jtreg/compiler/intrinsics/klass/CastNullCheckDroppingsTest.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/intrinsics/klass/CastNullCheckDroppingsTest.java	Tue Oct 10 16:29:04 2017 +0200
@@ -25,7 +25,7 @@
  * @test NullCheckDroppingsTest
  * @bug 8054492
  * @summary Casting can result in redundant null checks in generated code
- * @requires vm.flavor == "server" & !vm.emulatedClient
+ * @requires vm.flavor == "server" & !vm.emulatedClient & !vm.graal.enabled
  * @library /test/lib
  * @modules java.base/jdk.internal.misc
  *          java.management

--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java	Tue Oct 10 16:29:04 2017 +0200
@@ -42,7 +42,8 @@
                               new OrPredicate(Platform::isAArch64,
                               new OrPredicate(Platform::isS390x,
                               new OrPredicate(Platform::isSparc,
-                              new OrPredicate(Platform::isX64, Platform::isX86))))));
+                              new OrPredicate(Platform::isPPC,
+                              new OrPredicate(Platform::isX64, Platform::isX86)))))));
     }
 
     @Override

--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -110,7 +110,7 @@
         test(asm -> {
             ResolvedJavaType type = metaAccess.lookupJavaType(getConstClass());
             Register klass = asm.emitLoadPointer((HotSpotConstant) constantReflection.asObjectHub(type));
-            Register ret = asm.emitLoadPointer(klass, config.classMirrorOffset);
+            Register ret = asm.emitLoadPointer(asm.emitLoadPointer(klass, config.classMirrorHandleOffset), 0);
             asm.emitPointerRet(ret);
         });
     }
@@ -123,7 +123,7 @@
             HotSpotConstant hub = (HotSpotConstant) constantReflection.asObjectHub(type);
             Register narrowKlass = asm.emitLoadPointer((HotSpotConstant) hub.compress());
             Register klass = asm.emitUncompressPointer(narrowKlass, config.narrowKlassBase, config.narrowKlassShift);
-            Register ret = asm.emitLoadPointer(klass, config.classMirrorOffset);
+            Register ret = asm.emitLoadPointer(asm.emitLoadPointer(klass, config.classMirrorHandleOffset), 0);
             asm.emitPointerRet(ret);
         });
     }
@@ -135,7 +135,7 @@
             HotSpotConstant hub = (HotSpotConstant) constantReflection.asObjectHub(type);
             DataSectionReference ref = asm.emitDataItem(hub);
             Register klass = asm.emitLoadPointer(ref);
-            Register ret = asm.emitLoadPointer(klass, config.classMirrorOffset);
+            Register ret = asm.emitLoadPointer(asm.emitLoadPointer(klass, config.classMirrorHandleOffset), 0);
             asm.emitPointerRet(ret);
         });
     }
@@ -150,7 +150,7 @@
             DataSectionReference ref = asm.emitDataItem(narrowHub);
             Register narrowKlass = asm.emitLoadNarrowPointer(ref);
             Register klass = asm.emitUncompressPointer(narrowKlass, config.narrowKlassBase, config.narrowKlassShift);
-            Register ret = asm.emitLoadPointer(klass, config.classMirrorOffset);
+            Register ret = asm.emitLoadPointer(asm.emitLoadPointer(klass, config.classMirrorHandleOffset), 0);
             asm.emitPointerRet(ret);
         });
     }

--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/TestHotSpotVMConfig.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/TestHotSpotVMConfig.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,7 @@
     public final long narrowKlassBase = getFieldValue("CompilerToVM::Data::Universe_narrow_klass_base", Long.class, "address");
     public final int narrowKlassShift = getFieldValue("CompilerToVM::Data::Universe_narrow_klass_shift", Integer.class, "int");
 
-    public final int classMirrorOffset = getFieldOffset("Klass::_java_mirror", Integer.class, "oop");
+    public final int classMirrorHandleOffset = getFieldOffset("Klass::_java_mirror", Integer.class, "OopHandle");
 
     public final int MARKID_DEOPT_HANDLER_ENTRY = getConstant("CodeInstaller::DEOPT_HANDLER_ENTRY", Integer.class);
     public final long handleDeoptStub = getFieldValue("CompilerToVM::Data::SharedRuntime_deopt_blob_unpack", Long.class, "address");

--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.hotspot.test/src/jdk/vm/ci/hotspot/test/MemoryAccessProviderData.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.hotspot.test/src/jdk/vm/ci/hotspot/test/MemoryAccessProviderData.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -64,7 +64,7 @@
     @DataProvider(name = "positiveObject")
     public static Object[][] getPositiveObjectJavaKind() {
         HotSpotJVMCIRuntimeProvider runtime = (HotSpotJVMCIRuntimeProvider) JVMCI.getRuntime();
-        int offset = new HotSpotVMConfigAccess(runtime.getConfigStore()).getFieldOffset("Klass::_java_mirror", Integer.class, "oop");
+        int offset = new HotSpotVMConfigAccess(runtime.getConfigStore()).getFieldOffset("Klass::_java_mirror", Integer.class, "OopHandle");
         Constant wrappedKlassPointer = ((HotSpotResolvedObjectType) runtime.fromClass(TestClass.class)).klass();
         return new Object[][]{new Object[]{JavaKind.Object, wrappedKlassPointer, (long) offset, TEST_CLASS_CONSTANT, 0}};
     }

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/compiler/loopopts/TestCMovSplitThruPhi.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8187822
+ * @summary C2 conditonal move optimization might create broken graph
+ * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:CompileCommand=dontinline,TestCMovSplitThruPhi::not_inlined -XX:CompileOnly=TestCMovSplitThruPhi::test -XX:-LoopUnswitching TestCMovSplitThruPhi
+ *
+ */
+
+public class TestCMovSplitThruPhi {
+    static int f;
+
+    static int test(boolean flag1, boolean flag2, boolean flag3, boolean flag4) {
+        int v3 = 0;
+        if (flag4) {
+            for (int i = 0; i < 10; i++) {
+                int v1 = 0;
+                if (flag1) {
+                    v1 = not_inlined();
+                }
+                // AddI below will be candidate for split through Phi
+                int v2 = v1;
+                if (flag2) {
+                    v2 = f + v1;
+                }
+                // test above will be converted to CMovI
+                if (flag3) {
+                    v3 = v2 * 2;
+                    break;
+                }
+            }
+        }
+        return v3;
+    }
+
+    private static int not_inlined() {
+        return 0;
+    }
+
+    public static void main(String[] args) {
+        for (int i = 0; i < 20000; i++) {
+            test((i % 2) == 0, (i % 2) == 0, (i % 100) == 1, (i % 1000) == 1);
+        }
+    }
+}

--- a/test/hotspot/jtreg/compiler/loopopts/UseCountedLoopSafepointsTest.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/loopopts/UseCountedLoopSafepointsTest.java	Tue Oct 10 16:29:04 2017 +0200
@@ -28,7 +28,7 @@
  * @summary Test that C2 flag UseCountedLoopSafepoints ensures a safepoint is kept in a CountedLoop
  * @library /test/lib /
  * @requires vm.compMode != "Xint" & vm.flavor == "server" & (vm.opt.TieredStopAtLevel == null | vm.opt.TieredStopAtLevel == 4) & vm.debug == true
- * @requires !vm.emulatedClient
+ * @requires !vm.emulatedClient & !vm.graal.enabled
  * @modules java.base/jdk.internal.misc
  * @build sun.hotspot.WhiteBox
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox

--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java	Tue Oct 10 16:29:04 2017 +0200
@@ -71,23 +71,27 @@
             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha256"       }, null),
               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256"       }, null),
+              new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+              new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"          }, null),
               // x86 variants
               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha"          }, null),
               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))));
+                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
 
     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha512"       }, null),
               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha512"       }, null),
+              new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+              new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"          }, null),
               // x86 variants
               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha"          }, null),
               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))));
+                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
 
     public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
             = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,

--- a/test/hotspot/jtreg/gc/logging/TestPrintReferences.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/gc/logging/TestPrintReferences.java	Tue Oct 10 16:29:04 2017 +0200
@@ -23,7 +23,7 @@
 
 /*
  * @test TestPrintReferences
- * @bug 8136991 8186402
+ * @bug 8136991 8186402 8186465
  * @summary Validate the reference processing logging
  * @key gc
  * @library /test/lib
@@ -36,36 +36,58 @@
 
 import jdk.test.lib.process.OutputAnalyzer;
 import jdk.test.lib.process.ProcessTools;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
 
 public class TestPrintReferences {
+  static String output;
+  static final String doubleRegex = "[0-9]+[.,][0-9]+";
+  static final String referenceProcessing = "Reference Processing";
+  static final String softReference = "SoftReference";
+  static final String weakReference = "WeakReference";
+  static final String finalReference = "FinalReference";
+  static final String phantomReference = "PhantomReference";
+  static final String phase1 = "Phase1";
+  static final String phase2 = "Phase2";
+  static final String phase3 = "Phase3";
+  static final String gcLogTimeRegex = ".* GC\\([0-9]+\\) ";
+
   public static void main(String[] args) throws Exception {
     ProcessBuilder pb_enabled = ProcessTools.createJavaProcessBuilder("-Xlog:gc+phases+ref=debug",
                                                                       "-XX:+UseG1GC",
-                                                                      "-Xmx10M",
+                                                                      "-Xmx32M",
                                                                       // Explicit thread setting is required to avoid using only 1 thread
                                                                       "-XX:ParallelGCThreads=2",
                                                                       GCTest.class.getName());
     OutputAnalyzer output = new OutputAnalyzer(pb_enabled.start());
 
-    String indent_4 = "    ";
-    String indent_6 = "      ";
-    String indent_8 = "        ";
-    String gcLogTimeRegex = ".* GC\\([0-9]+\\) ";
+    checkLogFormat(output);
+    checkLogValue(output);
+
+    output.shouldHaveExitValue(0);
+  }
+
+  static String indent(int count) {
+    return " {" + count + "}";
+  }
+
+  // Find the first Reference Processing log and check its format.
+  public static void checkLogFormat(OutputAnalyzer output) {
     String countRegex = "[0-9]+";
-    String timeRegex = "[0-9]+[.,][0-9]+ms";
-    String totalRegex = gcLogTimeRegex + indent_4 + "Reference Processing: " + timeRegex + "\n";
-    String balanceRegex = gcLogTimeRegex + indent_8 + "Balance queues: " + timeRegex + "\n";
-    String softRefRegex = gcLogTimeRegex + indent_6 + "SoftReference: " + timeRegex + "\n";
-    String weakRefRegex = gcLogTimeRegex + indent_6 + "WeakReference: " + timeRegex + "\n";
-    String finalRefRegex = gcLogTimeRegex + indent_6 + "FinalReference: " + timeRegex + "\n";
-    String phantomRefRegex = gcLogTimeRegex + indent_6 + "PhantomReference: " + timeRegex + "\n";
-    String refDetailRegex = gcLogTimeRegex + indent_8 + "Phase2: " + timeRegex + "\n" +
-                            gcLogTimeRegex + indent_8 + "Phase3: " + timeRegex + "\n" +
-                            gcLogTimeRegex + indent_8 + "Discovered: " + countRegex + "\n" +
-                            gcLogTimeRegex + indent_8 + "Cleared: " + countRegex + "\n";
-    String softRefDetailRegex = gcLogTimeRegex + indent_8 + "Phase1: " + timeRegex + "\n" + refDetailRegex;
-    String enqueueRegex = gcLogTimeRegex + indent_4 + "Reference Enqueuing: " + timeRegex + "\n";
-    String enqueueDetailRegex = gcLogTimeRegex + indent_6 + "Reference Counts:  Soft: " + countRegex +
+    String timeRegex = doubleRegex + "ms";
+    String totalRegex = gcLogTimeRegex + indent(4) + referenceProcessing + ": " + timeRegex + "\n";
+    String balanceRegex = gcLogTimeRegex + indent(8) + "Balance queues: " + timeRegex + "\n";
+    String softRefRegex = gcLogTimeRegex + indent(6) + softReference + ": " + timeRegex + "\n";
+    String weakRefRegex = gcLogTimeRegex + indent(6) + weakReference + ": " + timeRegex + "\n";
+    String finalRefRegex = gcLogTimeRegex + indent(6) + finalReference + ": " + timeRegex + "\n";
+    String phantomRefRegex = gcLogTimeRegex + indent(6) + phantomReference + ": " + timeRegex + "\n";
+    String refDetailRegex = gcLogTimeRegex + indent(8) + phase2 + ": " + timeRegex + "\n" +
+                            gcLogTimeRegex + indent(8) + phase3 + ": " + timeRegex + "\n" +
+                            gcLogTimeRegex + indent(8) + "Discovered: " + countRegex + "\n" +
+                            gcLogTimeRegex + indent(8) + "Cleared: " + countRegex + "\n";
+    String softRefDetailRegex = gcLogTimeRegex + indent(8) + phase1 + ": " + timeRegex + "\n" + refDetailRegex;
+    String enqueueRegex = gcLogTimeRegex + indent(4) + "Reference Enqueuing: " + timeRegex + "\n";
+    String enqueueDetailRegex = gcLogTimeRegex + indent(6) + "Reference Counts:  Soft: " + countRegex +
                                 "  Weak: " + countRegex + "  Final: " + countRegex + "  Phantom: " + countRegex + "\n";
 
     output.shouldMatch(/* Total Reference processing time */
@@ -83,22 +105,90 @@
                          /* Enqueued Stats */
                        enqueueDetailRegex
                        );
+  }
 
-    output.shouldHaveExitValue(0);
+  // After getting time value, update 'output' for next use.
+  public static double getTimeValue(String name, int indentCount) {
+    // Pattern of 'name', 'value' and some extra strings.
+    String patternString = gcLogTimeRegex + indent(indentCount) + name + ": " + "(" + doubleRegex + ")";
+    Matcher m = Pattern.compile(patternString).matcher(output);
+     if (!m.find()) {
+      throw new RuntimeException("Could not find time log for " + patternString);
+     }
+
+    String match = m.group();
+    String value = m.group(1);
+
+    double result = Double.parseDouble(value);
+
+    int index = output.indexOf(match);
+    if (index != -1) {
+      output = output.substring(index, output.length());
+    }
+
+    return result;
+  }
+
+  // Reference log is printing 1 decimal place of elapsed time.
+  // So sum of each sub-phases could be slightly larger than the enclosing phase in some cases.
+  // As the maximum of sub-phases is 3, allow 0.1 of TOLERANCE.
+  // e.g. Actual value:  SoftReference(5.55) = phase1(1.85) + phase2(1.85) + phase3(1.85)
+  //      Log value:     SoftReference(5.6) = phase1(1.9) + phase2(1.9) + phase3(1.9)
+  //      When checked:  5.6 < 5.7 (sum of phase1~3)
+  public static boolean approximatelyEqual(double a, double b) {
+    final double TOLERANCE = 0.1;
+
+    return Math.abs(a - b) <= TOLERANCE;
+  }
+
+  // Return false, if 'total' is larger and not approximately equal to 'refTime'.
+  public static boolean compare(double refTime, double total) {
+    return (refTime < total) && (!approximatelyEqual(refTime, total));
+  }
+
+  public static double checkRefTime(String refType) {
+    double refTime = getTimeValue(refType, 2);
+    double total = 0.0;
+
+    if (softReference.equals(refType)) {
+      total += getTimeValue(phase1, 4);
+    }
+    total += getTimeValue(phase2, 4);
+    total += getTimeValue(phase3, 4);
+
+    if (compare(refTime, total)) {
+      throw new RuntimeException(refType +" time(" + refTime +
+                                 "ms) is less than the sum(" + total + "ms) of each phases");
+    }
+
+    return refTime;
+  }
+
+  // Find the first concurrent Reference Processing log and compare sub-time vs. total.
+  public static void checkLogValue(OutputAnalyzer out) {
+    output = out.getStdout();
+
+    double refProcTime = getTimeValue(referenceProcessing, 0);
+
+    double total = 0.0;
+    total += checkRefTime(softReference);
+    total += checkRefTime(weakReference);
+    total += checkRefTime(finalReference);
+    total += checkRefTime(phantomReference);
+
+    if (compare(refProcTime, total)) {
+      throw new RuntimeException("Reference Processing time(" + refProcTime + "ms) is less than the sum("
+                                 + total + "ms) of each phases");
+    }
   }
 
   static class GCTest {
-    static final int M = 1024 * 1024;
+    static final int SIZE = 512 * 1024;
+    static Object[] dummy = new Object[SIZE];
 
     public static void main(String [] args) {
-
-      ArrayList arrSoftRefs = new ArrayList();
-
-      // Populate to triger GC and then Reference related logs will be printed.
-      for (int i = 0; i < 10; i++) {
-        byte[] tmp = new byte[M];
-
-        arrSoftRefs.add(new SoftReference(tmp));
+      for (int i = 0; i < SIZE; i++) {
+        dummy[i] = new SoftReference<>(new Object());
       }
     }
   }

--- a/test/hotspot/jtreg/runtime/CommandLine/VMDeprecatedOptions.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/runtime/CommandLine/VMDeprecatedOptions.java	Tue Oct 10 16:29:04 2017 +0200
@@ -46,6 +46,7 @@
         {"MaxRAMFraction",            "8"},
         {"MinRAMFraction",            "2"},
         {"InitialRAMFraction",        "64"},
+        {"AssumeMP",                  "false"},
 
         // deprecated alias flags (see also aliased_jvm_flags):
         {"DefaultMaxRAMFraction", "4"},

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/common/C.jasm	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+// If this file was written in java then some of the tests would fail during
+// compilation with errors such as:
+//   class C inherits unrelated defaults for m() from types I and J
+//   C is not abstract and does not override abstract method m() in I
+
+super public class C implements I, J version 52:0 {
+
+    public Method "<init>":"()V" stack 1 locals 1 {
+        aload_0;
+        invokespecial    Method java/lang/Object."<init>":"()V";
+        return;
+    }
+
+} // end Class C

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/common/Foo.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class Foo {}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/common/J.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public interface J {
+    public default Foo m() { return null; }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/common/PreemptingClassLoader.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.*;
+import java.io.*;
+
+public class PreemptingClassLoader extends ClassLoader {
+
+    private final Set<String> names = new HashSet<>();
+
+    public PreemptingClassLoader(String... names) {
+        for (String n : names) this.names.add(n);
+    }
+
+    protected Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundException {
+        if (!names.contains(name)) return super.loadClass(name, resolve);
+        Class<?> result = findLoadedClass(name);
+        if (result == null) {
+            String filename = name.replace('.', '/') + ".class";
+            try (InputStream data = getResourceAsStream(filename)) {
+                if (data == null) throw new ClassNotFoundException();
+                try (ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
+                    int b;
+                    do {
+                        b = data.read();
+                        if (b >= 0) buffer.write(b);
+                    } while (b >= 0);
+                    byte[] bytes = buffer.toByteArray();
+                    result = defineClass(name, bytes, 0, bytes.length);
+                }
+            } catch (IOException e) {
+                throw new ClassNotFoundException("Error reading class file", e);
+            }
+        }
+        if (resolve) resolveClass(result);
+        return result;
+    }
+
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/itableICCE/I.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public interface I {
+    public default Foo m() { return null; }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/itableICCE/Task.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class Task implements Runnable {
+
+    public void run() {
+        Class<?> c = Foo.class; // forces PreemptingClassLoader to load Foo
+        C x = new C(); // should not trigger loader constraints exception
+        x.m();
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/itableICCE/Test.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8186092
+ * @compile ../common/Foo.java
+ *          I.java
+ *          ../common/J.java
+ *          ../common/C.jasm
+ *          Task.java
+ *          ../common/PreemptingClassLoader.java
+ * @run main/othervm Test
+ */
+
+public class Test {
+
+    // Test that LinkageError exceptions are not thrown during itable creation,
+    // for loader constraint errors, if the target method is an overpass method.
+    //
+    // In this test, during itable creation for class C, method "m()LFoo;" for
+    // C's super interface I has a different class Foo than the selected method's
+    // type J.  But, the selected method is an overpass method (that throws an
+    // ICCE). So, no LinkageError exception should be thrown because the loader
+    // constraint check that would cause the LinkageError should not be done.
+    public static void main(String... args) throws Exception {
+        Class<?> c = Foo.class; // forces standard class loader to load Foo
+        ClassLoader l = new PreemptingClassLoader("Task", "Foo", "C", "I");
+        Runnable r = (Runnable) l.loadClass("Task").newInstance();
+        try {
+            r.run(); // Cause an ICCE because both I and J define m()LFoo;
+            throw new RuntimeException("Expected ICCE exception not thrown");
+        } catch (IncompatibleClassChangeError e) {
+            if (!e.getMessage().contains("Conflicting default methods: I.m J.m")) {
+                throw new RuntimeException("Wrong ICCE exception thrown: " + e.getMessage());
+            }
+        }
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/itableLdrConstraint/I.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public interface I {
+    public Foo m();
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/itableLdrConstraint/Task.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class Task implements Runnable {
+
+    public void run() {
+        Class<?> c = Foo.class; // forces PreemptingClassLoader to load Foo
+        C x = new C(); // triggers overloading constraints
+        x.m();
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/itableLdrConstraint/Test.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8186092
+ * @compile ../common/Foo.java
+ *          ../common/J.java
+ *          I.java
+ *          ../common/C.jasm
+ *          Task.java
+ *          ../common/PreemptingClassLoader.java
+ * @run main/othervm Test
+ */
+
+public class Test {
+
+    // Test that the error message is correct when a loader constraint error is
+    // detected during itable creation.
+    //
+    // In this test, during itable creation for class C, method "m()LFoo;" for
+    // C's super interface I has a different class Foo than the selected method's
+    // type super interface J.  The selected method is not an overpass method nor
+    // otherwise excluded from loader constraint checking.  So, a LinkageError
+    // exception should be thrown because the loader constraint check will fail.
+    public static void main(String... args) throws Exception {
+        Class<?> c = Foo.class; // forces standard class loader to load Foo
+        ClassLoader l = new PreemptingClassLoader("Task", "Foo", "C", "I");
+        Runnable r = (Runnable) l.loadClass("Task").newInstance();
+        try {
+            r.run();
+            throw new RuntimeException("Expected LinkageError exception not thrown");
+        } catch (LinkageError e) {
+            if (!e.getMessage().contains(
+                "loader constraint violation in interface itable initialization for class C:")) {
+                throw new RuntimeException("Wrong LinkageError exception thrown: " + e.getMessage());
+            }
+        }
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/vtableAME/I.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public interface I extends J {
+    public Foo m();
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/vtableAME/Task.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class Task extends C { }

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/vtableAME/Test.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8186092
+ * @compile ../common/Foo.java
+ *          ../common/J.java
+ *          I.java
+ *          ../common/C.jasm
+ *          Task.java
+ *          ../common/PreemptingClassLoader.java
+ * @run main/othervm Test
+ */
+
+import java.io.PrintStream;
+import java.lang.reflect.*;
+
+public class Test {
+
+    // Test that LinkageError exceptions are not thrown during vtable creation,
+    // for loader constraint errors, if the target method is an overpass method.
+    //
+    // In this test, during vtable creation for class Task, the target method
+    // "Task.m()LFoo;" is an overpass method (that throws an AME).  So, even
+    // though it is inheriting the method from its super class C, and Task has
+    // a different class Foo than C, no LinkageError exception should be thrown
+    // because the loader constraint check that would cause the LinkageError
+    // should not be done.
+    public static void main(String args[]) throws Exception {
+        Class<?> c = Foo.class; // forces standard class loader to load Foo
+        ClassLoader l = new PreemptingClassLoader("Task", "Foo", "I", "J");
+        l.loadClass("Foo");
+        l.loadClass("Task").newInstance();
+        Task t = new Task();
+        try {
+            t.m(); // Should get AME
+            throw new RuntimeException("Missing AbstractMethodError exception");
+        } catch (AbstractMethodError e) {
+            if (!e.getMessage().contains("Method Task.m()LFoo; is abstract")) {
+                throw new RuntimeException("Wrong AME exception thrown: " + e.getMessage());
+            }
+        }
+    }
+
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/vtableLdrConstraint/I.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public interface I extends J {
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/vtableLdrConstraint/Task.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class Task extends C {
+
+    public Foo m() {
+        return null;
+    }
+
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/LoaderConstraints/vtableLdrConstraint/Test.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8186092
+ * @compile ../common/Foo.java
+ *          ../common/J.java
+ *          I.java
+ *          ../common/C.jasm
+ *          Task.java
+ *          ../common/PreemptingClassLoader.java
+ * @run main/othervm Test
+ */
+
+public class Test {
+
+    // Test that the error message is correct when a loader constraint error is
+    // detected during vtable creation.
+    //
+    // In this test, during vtable creation for class Task, method "Task.m()LFoo;"
+    // overrides "J.m()LFoo;".  But, Task's class Foo and super type J's class Foo
+    // are different.  So, a LinkageError exception should be thrown because the
+    // loader constraint check will fail.
+    public static void main(String args[]) throws Exception {
+        Class<?> c = Foo.class; // forces standard class loader to load Foo
+        ClassLoader l = new PreemptingClassLoader("Task", "Foo", "I");
+        l.loadClass("Foo");
+        try {
+            l.loadClass("Task").newInstance();
+            throw new RuntimeException("Expected LinkageError exception not thrown");
+        } catch (LinkageError e) {
+            if (!e.getMessage().contains(
+                    "loader constraint violation for class Task: when selecting overriding method") ||
+                !e.getMessage().contains(
+                    "for its super type J have different Class objects for the type Foo")) {
+                throw new RuntimeException("Wrong LinkageError exception thrown: " + e.getMessage());
+            }
+        }
+    }
+
+}
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/RedefineTests/RedefineDoubleDelete.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8178870
+ * @summary Redefine class with CFLH twice to test deleting the cached_class_file
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc
+ * @modules java.compiler
+ *          java.instrument
+ *          jdk.jartool/sun.tools.jar
+ * @run main RedefineClassHelper
+ * @run main/othervm/native -Xlog:redefine+class+load+exceptions -agentlib:RedefineDoubleDelete -javaagent:redefineagent.jar RedefineDoubleDelete
+ */
+
+public class RedefineDoubleDelete {
+
+    // Class gets a redefinition error because it adds a data member
+    public static String newB =
+                "class RedefineDoubleDelete$B {" +
+                "   int count1 = 0;" +
+                "}";
+
+    public static String newerB =
+                "class RedefineDoubleDelete$B { " +
+                "   int faa() { System.out.println(\"baa\"); return 2; }" +
+                "}";
+
+    // The ClassFileLoadHook for this class turns foo into faa and prints out faa.
+    static class B {
+      int faa() { System.out.println("foo"); return 1; }
+    }
+
+    public static void main(String args[]) throws Exception {
+
+        B b = new B();
+        int val = b.faa();
+        if (val != 1) {
+            throw new RuntimeException("return value wrong " + val);
+        }
+
+        // Redefine B twice to get cached_class_file in both B scratch classes
+        try {
+            RedefineClassHelper.redefineClass(B.class, newB);
+        } catch (java.lang.UnsupportedOperationException e) {
+            // this is expected
+        }
+        try {
+            RedefineClassHelper.redefineClass(B.class, newB);
+        } catch (java.lang.UnsupportedOperationException e) {
+            // this is expected
+        }
+
+        // Do a full GC.
+        System.gc();
+
+        // Redefine with a compatible class
+        RedefineClassHelper.redefineClass(B.class, newerB);
+        val = b.faa();
+        if (val != 2) {
+            throw new RuntimeException("return value wrong " + val);
+        }
+
+        // Do another full GC to clean things up.
+        System.gc();
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/RedefineTests/libRedefineDoubleDelete.c	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "jvmti.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef JNI_ENV_ARG
+
+#ifdef __cplusplus
+#define JNI_ENV_ARG(x, y) y
+#define JNI_ENV_PTR(x) x
+#else
+#define JNI_ENV_ARG(x,y) x, y
+#define JNI_ENV_PTR(x) (*x)
+#endif
+
+#endif
+
+#define TranslateError(err) "JVMTI error"
+
+static jvmtiEnv *jvmti = NULL;
+
+static jint Agent_Initialize(JavaVM *jvm, char *options, void *reserved);
+
+JNIEXPORT
+jint JNICALL Agent_OnLoad(JavaVM *jvm, char *options, void *reserved) {
+    return Agent_Initialize(jvm, options, reserved);
+}
+
+JNIEXPORT
+jint JNICALL Agent_OnAttach(JavaVM *jvm, char *options, void *reserved) {
+    return Agent_Initialize(jvm, options, reserved);
+}
+
+JNIEXPORT
+jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) {
+    return JNI_VERSION_9;
+}
+
+
+static jint newClassDataLen = 0;
+static unsigned char* newClassData = NULL;
+
+static jint
+getBytecodes(jvmtiEnv *jvmti_env,
+             jint class_data_len, const unsigned char* class_data) {
+    int i;
+    jint res;
+
+    newClassDataLen = class_data_len;
+    res = (*jvmti_env)->Allocate(jvmti_env, newClassDataLen, &newClassData);
+    if (res != JNI_OK) {
+        printf("    Unable to allocate bytes\n");
+        return JNI_ERR;
+    }
+    for (i = 0; i < newClassDataLen; i++) {
+        newClassData[i] = class_data[i];
+        // Rewrite oo in class to aa
+        if (i > 0 && class_data[i] == 'o' && class_data[i-1] == 'o') {
+            newClassData[i] = newClassData[i-1] = 'a';
+        }
+    }
+    printf("  ... copied bytecode: %d bytes\n", (int)newClassDataLen);
+    return JNI_OK;
+}
+
+
+static void JNICALL
+Callback_ClassFileLoadHook(jvmtiEnv *jvmti_env, JNIEnv *env,
+                           jclass class_being_redefined,
+                           jobject loader, const char* name, jobject protection_domain,
+                           jint class_data_len, const unsigned char* class_data,
+                           jint *new_class_data_len, unsigned char** new_class_data) {
+    if (name != NULL && strcmp(name, "RedefineDoubleDelete$B") == 0) {
+        if (newClassData == NULL) {
+            jint res = getBytecodes(jvmti_env, class_data_len, class_data);
+            if (res == JNI_ERR) {
+              printf(">>>    ClassFileLoadHook event: class name %s FAILED\n", name);
+              return;
+            }
+            // Only change for first CFLH event.
+            *new_class_data_len = newClassDataLen;
+            *new_class_data = newClassData;
+        }
+        printf(">>>    ClassFileLoadHook event: class name %s\n", name);
+    }
+}
+
+static
+jint Agent_Initialize(JavaVM *jvm, char *options, void *reserved) {
+    jint res, size;
+    jvmtiCapabilities caps;
+    jvmtiEventCallbacks callbacks;
+    jvmtiError err;
+
+    res = JNI_ENV_PTR(jvm)->GetEnv(JNI_ENV_ARG(jvm, (void **) &jvmti),
+        JVMTI_VERSION_9);
+    if (res != JNI_OK || jvmti == NULL) {
+        printf("    Error: wrong result of a valid call to GetEnv!\n");
+        return JNI_ERR;
+    }
+
+    printf("Enabling following capabilities: can_generate_all_class_hook_events, "
+           "can_retransform_classes, can_redefine_classes");
+    memset(&caps, 0, sizeof(caps));
+    caps.can_generate_all_class_hook_events = 1;
+    caps.can_retransform_classes = 1;
+    caps.can_redefine_classes = 1;
+    printf("\n");
+
+    err = (*jvmti)->AddCapabilities(jvmti, &caps);
+    if (err != JVMTI_ERROR_NONE) {
+        printf("    Error in AddCapabilites: %s (%d)\n", TranslateError(err), err);
+        return JNI_ERR;
+    }
+
+    size = (jint)sizeof(callbacks);
+
+    memset(&callbacks, 0, sizeof(callbacks));
+    callbacks.ClassFileLoadHook = Callback_ClassFileLoadHook;
+
+    err = (*jvmti)->SetEventCallbacks(jvmti, &callbacks, size);
+    if (err != JVMTI_ERROR_NONE) {
+        printf("    Error in SetEventCallbacks: %s (%d)\n", TranslateError(err), err);
+        return JNI_ERR;
+    }
+
+    err = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, JVMTI_EVENT_CLASS_FILE_LOAD_HOOK, NULL);
+    if (err != JVMTI_ERROR_NONE) {
+        printf("    Error in SetEventNotificationMode: %s (%d)\n", TranslateError(err), err);
+        return JNI_ERR;
+    }
+
+    return JNI_OK;
+}
+
+#ifdef __cplusplus
+}
+#endif

--- a/test/hotspot/jtreg/runtime/SharedArchiveFile/SpaceUtilizationCheck.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/runtime/SharedArchiveFile/SpaceUtilizationCheck.java	Tue Oct 10 16:29:04 2017 +0200
@@ -64,7 +64,7 @@
     static void test(String... extra_options) throws Exception {
         OutputAnalyzer output = CDSTestUtils.createArchive(extra_options);
         CDSTestUtils.checkDump(output);
-        Pattern pattern = Pattern.compile("^(..) space: *([0-9]+).* out of *([0-9]+) bytes .* at 0x([0-9a0-f]+)");
+        Pattern pattern = Pattern.compile("^(..) *space: *([0-9]+).* out of *([0-9]+) bytes .* at 0x([0-9a0-f]+)");
         WhiteBox wb = WhiteBox.getWhiteBox();
         long reserve_alignment = wb.metaspaceReserveAlignment();
         System.out.println("Metaspace::reserve_alignment() = " + reserve_alignment);

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/getSysPackage/GetPackageXbootclasspath.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8187436
+ * @summary Test that getPackage() works with a class loaded via -Xbootclasspath/a.
+ * @library /test/lib
+ * @run main/othervm GetPackageXbootclasspath
+ */
+
+// This is a regression test for a bug with the exploded build but should pass
+// when run with either the normal or exploded build.
+import jdk.test.lib.compiler.InMemoryJavaCompiler;
+import jdk.test.lib.process.ProcessTools;
+import jdk.test.lib.process.OutputAnalyzer;
+
+public class GetPackageXbootclasspath {
+
+    public static void main(String args[]) throws Exception {
+
+        String Test_src =
+            "package P; " +
+            "public class Test { " +
+                "public static void main(String[] args) throws Exception { " +
+                    "Package p = Test.class.getPackage(); " +
+                    "System.out.println(\"Test Passed\"); " +
+                "} " +
+            "}";
+
+        String test_classes = System.getProperty("test.classes");
+        ClassFileInstaller.writeClassToDisk("P/Test",
+            InMemoryJavaCompiler.compile("P.Test", Test_src), test_classes);
+
+        new OutputAnalyzer(ProcessTools.createJavaProcessBuilder(
+                "-Xbootclasspath/a:" + test_classes, "P.Test")
+            .start()).shouldContain("Test Passed");
+    }
+}

--- a/test/hotspot/jtreg/runtime/modules/PatchModule/PatchModuleCDS.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/hotspot/jtreg/runtime/modules/PatchModule/PatchModuleCDS.java	Tue Oct 10 16:29:04 2017 +0200
@@ -50,7 +50,7 @@
             "-Xlog:class+path=info",
             "-version");
         new OutputAnalyzer(pb.start())
-            .shouldContain("ro space:"); // Make sure archive got created.
+            .shouldContain("ro  space:"); // Make sure archive got created.
 
         // Case 2: Test that directory in --patch-module is supported for CDS dumping
         // Create a class file in the module java.base.
@@ -73,7 +73,7 @@
             "-Xlog:class+path=info",
             "-version");
         new OutputAnalyzer(pb.start())
-            .shouldContain("ro space:"); // Make sure archive got created.
+            .shouldContain("ro  space:"); // Make sure archive got created.
 
         // Case 3a: Test CDS dumping with jar file in --patch-module
         BasicJarBuilder.build("javanaming", "javax/naming/spi/NamingManager");
@@ -87,7 +87,7 @@
             "-Xlog:class+path=info",
             "PatchModuleMain", "javax.naming.spi.NamingManager");
         new OutputAnalyzer(pb.start())
-            .shouldContain("ro space:"); // Make sure archive got created.
+            .shouldContain("ro  space:"); // Make sure archive got created.
 
         // Case 3b: Test CDS run with jar file in --patch-module
         pb = ProcessTools.createJavaProcessBuilder(

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/serviceability/sa/TestRevPtrsForInvokeDynamic.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import sun.jvm.hotspot.HotSpotAgent;
+import sun.jvm.hotspot.utilities.ReversePtrsAnalysis;
+
+import jdk.test.lib.apps.LingeredApp;
+import jdk.test.lib.Asserts;
+import jdk.test.lib.JDKToolFinder;
+import jdk.test.lib.JDKToolLauncher;
+import jdk.test.lib.Platform;
+import jdk.test.lib.process.OutputAnalyzer;
+import jdk.test.lib.process.ProcessTools;
+import jdk.test.lib.Utils;
+
+/*
+ * @test
+ * @library /test/lib
+ * @requires os.family != "mac"
+ * @modules java.base/jdk.internal.misc
+ *          jdk.hotspot.agent/sun.jvm.hotspot
+ *          jdk.hotspot.agent/sun.jvm.hotspot.utilities
+ * @run main/othervm TestRevPtrsForInvokeDynamic
+ */
+
+public class TestRevPtrsForInvokeDynamic {
+
+    private static LingeredAppWithInvokeDynamic theApp = null;
+
+    private static void computeReversePointers(String pid) throws Exception {
+        HotSpotAgent agent = new HotSpotAgent();
+
+        try {
+            agent.attach(Integer.parseInt(pid));
+            ReversePtrsAnalysis analysis = new ReversePtrsAnalysis();
+            analysis.run();
+        } finally {
+            agent.detach();
+        }
+    }
+
+    private static void createAnotherToAttach(long lingeredAppPid)
+                                                         throws Exception {
+        String[] toolArgs = {
+            "--add-modules=jdk.hotspot.agent",
+            "--add-exports=jdk.hotspot.agent/sun.jvm.hotspot=ALL-UNNAMED",
+            "--add-exports=jdk.hotspot.agent/sun.jvm.hotspot.utilities=ALL-UNNAMED",
+            "TestRevPtrsForInvokeDynamic",
+            Long.toString(lingeredAppPid)
+        };
+
+        // Start a new process to attach to the lingered app
+        ProcessBuilder processBuilder = ProcessTools.createJavaProcessBuilder(toolArgs);
+        OutputAnalyzer SAOutput = ProcessTools.executeProcess(processBuilder);
+        SAOutput.shouldHaveExitValue(0);
+        System.out.println(SAOutput.getOutput());
+    }
+
+    public static void main (String... args) throws Exception {
+        if (!Platform.shouldSAAttach()) {
+            System.out.println(
+               "SA attach not expected to work - test skipped.");
+            return;
+        }
+
+        if (args == null || args.length == 0) {
+            try {
+                List<String> vmArgs = new ArrayList<String>();
+                vmArgs.add("-XX:+UsePerfData");
+                vmArgs.addAll(Utils.getVmOptions());
+
+                theApp = new LingeredAppWithInvokeDynamic();
+                LingeredApp.startApp(vmArgs, theApp);
+                createAnotherToAttach(theApp.getPid());
+            } finally {
+                LingeredApp.stopApp(theApp);
+            }
+        } else {
+            computeReversePointers(args[0]);
+        }
+    }
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/management/ThreadMXBean/MaxDepthForThreadInfoTest.java	Tue Oct 10 16:29:04 2017 +0200
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8185003
+ * @build ThreadDump
+ * @run main MaxDepthForThreadInfoTest
+ * @summary verifies the functionality of ThreadMXBean.dumpAllThreads
+ * and ThreadMXBean.getThreadInfo with maxDepth argument
+ */
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadInfo;
+import java.lang.management.ThreadMXBean;
+
+
+
+public class MaxDepthForThreadInfoTest {
+
+
+    public static void main(String[] Args) {
+
+        ThreadMXBean tmxb = ManagementFactory.getThreadMXBean();
+
+        long[] threadIds = tmxb.getAllThreadIds();
+
+        ThreadInfo[] tinfos = tmxb.getThreadInfo(threadIds, true, true, 0);
+        for (ThreadInfo ti : tinfos) {
+            if (ti.getStackTrace().length > 0) {
+                ThreadDump.printThreadInfo(ti);
+                throw new RuntimeException("more than requested " +
+                        "number of frames dumped");
+            }
+        }
+
+        tinfos = tmxb.getThreadInfo(threadIds, true, true, 3);
+        for (ThreadInfo ti : tinfos) {
+            if (ti.getStackTrace().length > 3) {
+                ThreadDump.printThreadInfo(ti);
+                throw new RuntimeException("more than requested " +
+                        "number of frames dumped");
+            }
+        }
+
+        try {
+            tmxb.getThreadInfo(threadIds, true, true, -1);
+            throw new RuntimeException("Didn't throw IllegalArgumentException " +
+                    "for negative maxdepth value");
+        } catch (IllegalArgumentException e) {
+            System.out.println("Throwed IllegalArgumentException as expected");
+        }
+
+        tinfos = tmxb.dumpAllThreads(true, true, 0);
+        for (ThreadInfo ti : tinfos) {
+            if (ti.getStackTrace().length > 0) {
+                ThreadDump.printThreadInfo(ti);
+                throw new RuntimeException("more than requested " +
+                        "number of frames dumped");
+            }
+        }
+        tinfos = tmxb.dumpAllThreads(true, true, 2);
+        for (ThreadInfo ti : tinfos) {
+            if (ti.getStackTrace().length > 2) {
+                ThreadDump.printThreadInfo(ti);
+                throw new RuntimeException("more than requested " +
+                        "number of frames dumped");
+            }
+        }
+
+        try {
+            tmxb.dumpAllThreads(true, true, -1);
+            throw new RuntimeException("Didn't throw IllegalArgumentException " +
+                    "for negative maxdepth value");
+        } catch (IllegalArgumentException e) {
+            System.out.println("Throwed IllegalArgumentException as expected");
+        }
+
+        System.out.println("Test passed");
+    }
+}

--- a/test/jdk/jdk/modules/etc/JdkQualifiedExportTest.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/jdk/jdk/modules/etc/JdkQualifiedExportTest.java	Tue Oct 10 16:29:04 2017 +0200
@@ -70,6 +70,7 @@
 
     static Set<String> KNOWN_EXCEPTIONS =
         Set.of("jdk.internal.vm.ci/jdk.vm.ci.services",
+               "jdk.internal.vm.ci/jdk.vm.ci.runtime",
                "jdk.jsobject/jdk.internal.netscape.javascript.spi");
 
     static void checkExports(ModuleDescriptor md) {

--- a/test/jtreg-ext/requires/VMProps.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/jtreg-ext/requires/VMProps.java	Tue Oct 10 16:29:04 2017 +0200
@@ -73,6 +73,8 @@
         map.put("vm.aot", vmAOT());
         // vm.cds is true if the VM is compiled with cds support.
         map.put("vm.cds", vmCDS());
+        // vm.graal.enabled is true if Graal is used as JIT
+        map.put("vm.graal.enabled", isGraalEnabled());
         vmGC(map); // vm.gc.X = true/false
 
         VMProps.dump(map);
@@ -293,6 +295,41 @@
     }
 
     /**
+     * Check if Graal is used as JIT compiler.
+     *
+     * @return true if Graal is used as JIT compiler.
+     */
+    protected String isGraalEnabled() {
+        // Graal is enabled if following conditions are true:
+        // - we are not in Interpreter mode
+        // - UseJVMCICompiler flag is true
+        // - jvmci.Compiler variable is equal to 'graal'
+        // - TieredCompilation is not used or TieredStopAtLevel is greater than 3
+
+        Boolean useCompiler = WB.getBooleanVMFlag("UseCompiler");
+        if (useCompiler == null || !useCompiler)
+            return "false";
+
+        Boolean useJvmciComp = WB.getBooleanVMFlag("UseJVMCICompiler");
+        if (useJvmciComp == null || !useJvmciComp)
+            return "false";
+
+        // This check might be redundant but let's keep it for now.
+        String jvmciCompiler = System.getProperty("jvmci.Compiler");
+        if (jvmciCompiler == null || !jvmciCompiler.equals("graal")) {
+            return "false";
+        }
+
+        Boolean tieredCompilation = WB.getBooleanVMFlag("TieredCompilation");
+        Long compLevel = WB.getIntxVMFlag("TieredStopAtLevel");
+        // if TieredCompilation is enabled and compilation level is <= 3 then no Graal is used
+        if (tieredCompilation != null && tieredCompilation && compLevel != null && compLevel <= 3)
+            return "false";
+
+        return "true";
+    }
+
+    /**
      * Dumps the map to the file if the file name is given as the property.
      * This functionality could be helpful to know context in the real
      * execution.

--- a/test/lib/jdk/test/lib/FileInstaller.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/lib/jdk/test/lib/FileInstaller.java	Tue Oct 10 16:29:04 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,11 +32,18 @@
 import java.nio.file.StandardCopyOption;
 import java.nio.file.attribute.BasicFileAttributes;
 
+// !!!
+// NOTE: this class is widely used. DO NOT depend on any other classes in any test library, or else
+// you may see intermittent ClassNotFoundException as in JDK-8188828
+// !!!
+
 /**
  * Copy a resource: file or directory recursively, using relative path(src and dst)
  * which are applied to test source directory(src) and current directory(dst)
  */
 public class FileInstaller {
+    public static final String TEST_SRC = System.getProperty("test.src", "").trim();
+
     /**
      * @param args source and destination
      * @throws IOException if an I/O error occurs
@@ -45,10 +52,12 @@
         if (args.length != 2) {
             throw new IllegalArgumentException("Unexpected number of arguments for file copy");
         }
-        Path src = Paths.get(Utils.TEST_SRC, args[0]).toAbsolutePath();
-        Path dst = Paths.get(args[1]).toAbsolutePath();
+        Path src = Paths.get(TEST_SRC, args[0]).toAbsolutePath().normalize();
+        Path dst = Paths.get(args[1]).toAbsolutePath().normalize();
         if (src.toFile().exists()) {
+            System.out.printf("copying %s to %s%n", src, dst);
             if (src.toFile().isDirectory()) {
+                // can't use Files::copy for dirs, as 'dst' might exist already
                 Files.walkFileTree(src, new CopyFileVisitor(src, dst));
             } else {
                 Path dstDir = dst.getParent();
@@ -74,7 +83,7 @@
         @Override
         public FileVisitResult preVisitDirectory(Path file,
                 BasicFileAttributes attrs) throws IOException {
-            Path relativePath = file.relativize(copyFrom);
+            Path relativePath = copyFrom.relativize(file);
             Path destination = copyTo.resolve(relativePath);
             if (!destination.toFile().exists()) {
                 Files.createDirectories(destination);

--- a/test/lib/jdk/test/lib/cds/CDSTestUtils.java	Thu Oct 05 18:29:47 2017 +0100
+++ b/test/lib/jdk/test/lib/cds/CDSTestUtils.java	Tue Oct 10 16:29:04 2017 +0200
@@ -26,7 +26,9 @@
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.PrintStream;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.Date;
 import jdk.test.lib.Utils;
 import jdk.test.lib.process.OutputAnalyzer;
 import jdk.test.lib.process.ProcessTools;
@@ -60,6 +62,8 @@
     public static OutputAnalyzer createArchive(CDSOptions opts)
         throws Exception {
 
+        startNewArchiveName();
+
         ArrayList<String> cmd = new ArrayList<String>();
 
         for (String p : opts.prefix) cmd.add(p);
@@ -328,9 +332,19 @@
         return testName;
     }
 
+    private static final SimpleDateFormat timeStampFormat =
+        new SimpleDateFormat("HH'h'mm'm'ss's'SSS");
+
+    private static String defaultArchiveName;
+
+    // Call this method to start new archive with new unique name
+    public static void startNewArchiveName() {
+        defaultArchiveName = getTestName() +
+            timeStampFormat.format(new Date()) + ".jsa";
+    }
 
     public static String getDefaultArchiveName() {
-        return getTestName() + ".jsa";
+        return defaultArchiveName;
     }

author	jwilhelm
	Tue, 10 Oct 2017 16:29:04 +0200
changeset 47612	b512c5781ca1
parent 47316	1129253d3bc7 (current diff)
parent 47611	7eda0a64c199 (diff)
child 47613	af241e3e5a13

make/common/Modules.gmk		file \| annotate \| diff \| comparison \| revisions
make/hotspot/lib/JvmFeatures.gmk		file \| annotate \| diff \| comparison \| revisions
make/test/JtregNativeHotspot.gmk		file \| annotate \| diff \| comparison \| revisions
src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat		file \| annotate \| diff \| comparison \| revisions