src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/replacements/NewObjectSnippets.java
branchdatagramsocketimpl-branch
changeset 58678 9cf78a70fa4f
parent 54724 62f373a53296
child 58679 9c3209ff7550
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/replacements/NewObjectSnippets.java	Thu Oct 17 20:27:44 2019 +0100
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/replacements/NewObjectSnippets.java	Thu Oct 17 20:53:35 2019 +0100
@@ -24,6 +24,7 @@
 
 package org.graalvm.compiler.hotspot.replacements;
 
+import static jdk.vm.ci.meta.DeoptimizationAction.InvalidateRecompile;
 import static jdk.vm.ci.meta.DeoptimizationAction.None;
 import static jdk.vm.ci.meta.DeoptimizationReason.RuntimeConstraint;
 import static org.graalvm.compiler.core.common.GraalOptions.GeneratePIC;
@@ -38,6 +39,7 @@
 import static org.graalvm.compiler.hotspot.HotSpotBackend.NEW_MULTI_ARRAY;
 import static org.graalvm.compiler.hotspot.HotSpotBackend.NEW_MULTI_ARRAY_OR_NULL;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.CLASS_ARRAY_KLASS_LOCATION;
+import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.CLASS_INIT_STATE_LOCATION;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.HUB_WRITE_LOCATION;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.MARK_WORD_LOCATION;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.PROTOTYPE_MARK_WORD_LOCATION;
@@ -53,6 +55,7 @@
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.arrayLengthOffset;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.initializeObjectHeader;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.instanceHeaderSize;
+import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.instanceKlassStateBeingInitialized;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.isInstanceKlassFullyInitialized;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.layoutHelperHeaderSizeMask;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.layoutHelperHeaderSizeShift;
@@ -60,6 +63,8 @@
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.layoutHelperLog2ElementSizeShift;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.loadKlassFromObject;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.prototypeMarkWordOffset;
+import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.readInstanceKlassInitState;
+import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.readInstanceKlassInitThread;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.readLayoutHelper;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.readTlabEnd;
 import static org.graalvm.compiler.hotspot.replacements.HotSpotReplacementsUtil.readTlabTop;
@@ -72,9 +77,11 @@
 import static org.graalvm.compiler.hotspot.replacements.HotspotSnippetsOptions.ProfileAllocationsContext;
 import static org.graalvm.compiler.nodes.PiArrayNode.piArrayCastToSnippetReplaceeStamp;
 import static org.graalvm.compiler.nodes.PiNode.piCastToSnippetReplaceeStamp;
+import static org.graalvm.compiler.nodes.extended.BranchProbabilityNode.DEOPT_PROBABILITY;
 import static org.graalvm.compiler.nodes.extended.BranchProbabilityNode.FAST_PATH_PROBABILITY;
 import static org.graalvm.compiler.nodes.extended.BranchProbabilityNode.FREQUENT_PROBABILITY;
 import static org.graalvm.compiler.nodes.extended.BranchProbabilityNode.SLOW_PATH_PROBABILITY;
+import static org.graalvm.compiler.nodes.extended.BranchProbabilityNode.VERY_FAST_PATH_PROBABILITY;
 import static org.graalvm.compiler.nodes.extended.BranchProbabilityNode.probability;
 import static org.graalvm.compiler.replacements.ReplacementsUtil.REPLACEMENTS_ASSERTIONS_ENABLED;
 import static org.graalvm.compiler.replacements.ReplacementsUtil.runtimeAssert;
@@ -98,6 +105,7 @@
 import org.graalvm.compiler.hotspot.meta.HotSpotProviders;
 import org.graalvm.compiler.hotspot.meta.HotSpotRegistersProvider;
 import org.graalvm.compiler.hotspot.nodes.DimensionsNode;
+import org.graalvm.compiler.hotspot.nodes.KlassBeingInitializedCheckNode;
 import org.graalvm.compiler.hotspot.nodes.aot.LoadConstantIndirectlyFixedNode;
 import org.graalvm.compiler.hotspot.nodes.aot.LoadConstantIndirectlyNode;
 import org.graalvm.compiler.hotspot.nodes.type.KlassPointerStamp;
@@ -111,6 +119,7 @@
 import org.graalvm.compiler.nodes.ValueNode;
 import org.graalvm.compiler.nodes.debug.DynamicCounterNode;
 import org.graalvm.compiler.nodes.debug.VerifyHeapNode;
+import org.graalvm.compiler.nodes.extended.BranchProbabilityNode;
 import org.graalvm.compiler.nodes.extended.ForeignCallNode;
 import org.graalvm.compiler.nodes.extended.MembarNode;
 import org.graalvm.compiler.nodes.java.DynamicNewArrayNode;
@@ -211,14 +220,27 @@
     }
 
     @Snippet
-    public static Object allocateInstance(@ConstantParameter long size, KlassPointer hub, Word prototypeMarkWord, @ConstantParameter boolean fillContents,
-                    @ConstantParameter Register threadRegister, @ConstantParameter boolean constantSize, @ConstantParameter String typeContext,
+    public static Object allocateInstance(@ConstantParameter long size,
+                    KlassPointer hub,
+                    Word prototypeMarkWord,
+                    @ConstantParameter boolean fillContents,
+                    @ConstantParameter boolean emitMemoryBarrier,
+                    @ConstantParameter Register threadRegister,
+                    @ConstantParameter boolean constantSize,
+                    @ConstantParameter String typeContext,
                     @ConstantParameter Counters counters) {
-        return piCastToSnippetReplaceeStamp(allocateInstanceHelper(size, hub, prototypeMarkWord, fillContents, threadRegister, constantSize, typeContext, counters));
+        return piCastToSnippetReplaceeStamp(allocateInstanceHelper(size, hub, prototypeMarkWord, fillContents, emitMemoryBarrier, threadRegister, constantSize, typeContext, counters));
     }
 
-    public static Object allocateInstanceHelper(long size, KlassPointer hub, Word prototypeMarkWord, boolean fillContents,
-                    Register threadRegister, boolean constantSize, String typeContext, Counters counters) {
+    public static Object allocateInstanceHelper(long size,
+                    KlassPointer hub,
+                    Word prototypeMarkWord,
+                    boolean fillContents,
+                    boolean emitMemoryBarrier,
+                    Register threadRegister,
+                    boolean constantSize,
+                    String typeContext,
+                    Counters counters) {
         Object result;
         Word thread = registerAsWord(threadRegister);
         Word top = readTlabTop(thread);
@@ -227,7 +249,7 @@
         if (useTLAB(INJECTED_VMCONFIG) && probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) {
             writeTlabTop(thread, newTop);
             emitPrefetchAllocate(newTop, false);
-            result = formatObject(hub, size, top, prototypeMarkWord, fillContents, constantSize, counters);
+            result = formatObject(hub, size, top, prototypeMarkWord, fillContents, emitMemoryBarrier, constantSize, counters);
         } else {
             Counters theCounters = counters;
             if (theCounters != null && theCounters.stub != null) {
@@ -254,37 +276,51 @@
     private static native Object newInstanceOrNull(@ConstantNodeParameter ForeignCallDescriptor descriptor, KlassPointer hub);
 
     @Snippet
-    public static Object allocateInstancePIC(@ConstantParameter long size, KlassPointer hub, Word prototypeMarkWord, @ConstantParameter boolean fillContents,
-                    @ConstantParameter Register threadRegister, @ConstantParameter boolean constantSize, @ConstantParameter String typeContext,
+    public static Object allocateInstancePIC(@ConstantParameter long size,
+                    KlassPointer hub,
+                    Word prototypeMarkWord,
+                    @ConstantParameter boolean fillContents,
+                    @ConstantParameter boolean emitMemoryBarrier,
+                    @ConstantParameter Register threadRegister,
+                    @ConstantParameter boolean constantSize,
+                    @ConstantParameter String typeContext,
                     @ConstantParameter Counters counters) {
         // Klass must be initialized by the time the first instance is allocated, therefore we can
         // just load it from the corresponding cell and avoid the resolution check. We have to use a
         // fixed load though, to prevent it from floating above the initialization.
         KlassPointer picHub = LoadConstantIndirectlyFixedNode.loadKlass(hub);
-        return piCastToSnippetReplaceeStamp(allocateInstanceHelper(size, picHub, prototypeMarkWord, fillContents, threadRegister, constantSize, typeContext, counters));
+        return piCastToSnippetReplaceeStamp(allocateInstanceHelper(size, picHub, prototypeMarkWord, fillContents, emitMemoryBarrier, threadRegister, constantSize, typeContext, counters));
     }
 
     @Snippet
-    public static Object allocateInstanceDynamic(Class<?> type, Class<?> classClass, @ConstantParameter boolean fillContents, @ConstantParameter Register threadRegister,
+    public static Object allocateInstanceDynamic(Class<?> type, Class<?> classClass,
+                    @ConstantParameter boolean fillContents,
+                    @ConstantParameter boolean emitMemoryBarrier,
+                    @ConstantParameter Register threadRegister,
                     @ConstantParameter Counters counters) {
-        if (probability(SLOW_PATH_PROBABILITY, type == null)) {
+        if (probability(DEOPT_PROBABILITY, type == null)) {
             DeoptimizeNode.deopt(None, RuntimeConstraint);
         }
         Class<?> nonNullType = PiNode.piCastNonNullClass(type, SnippetAnchorNode.anchor());
 
-        if (probability(SLOW_PATH_PROBABILITY, DynamicNewInstanceNode.throwsInstantiationException(type, classClass))) {
+        if (probability(DEOPT_PROBABILITY, DynamicNewInstanceNode.throwsInstantiationException(type, classClass))) {
             DeoptimizeNode.deopt(None, RuntimeConstraint);
         }
 
-        return PiNode.piCastToSnippetReplaceeStamp(allocateInstanceDynamicHelper(type, fillContents, threadRegister, counters, nonNullType));
+        return PiNode.piCastToSnippetReplaceeStamp(allocateInstanceDynamicHelper(type, fillContents, emitMemoryBarrier, threadRegister, counters, nonNullType));
     }
 
-    private static Object allocateInstanceDynamicHelper(Class<?> type, boolean fillContents, Register threadRegister, Counters counters, Class<?> nonNullType) {
+    private static Object allocateInstanceDynamicHelper(Class<?> type,
+                    boolean fillContents,
+                    boolean emitMemoryBarrier,
+                    Register threadRegister,
+                    Counters counters,
+                    Class<?> nonNullType) {
         KlassPointer hub = ClassGetHubNode.readClass(nonNullType);
         if (probability(FAST_PATH_PROBABILITY, !hub.isNull())) {
             KlassPointer nonNullHub = ClassGetHubNode.piCastNonNull(hub, SnippetAnchorNode.anchor());
 
-            if (probability(FAST_PATH_PROBABILITY, isInstanceKlassFullyInitialized(nonNullHub))) {
+            if (probability(VERY_FAST_PATH_PROBABILITY, isInstanceKlassFullyInitialized(nonNullHub))) {
                 int layoutHelper = readLayoutHelper(nonNullHub);
                 /*
                  * src/share/vm/oops/klass.hpp: For instances, layout helper is a positive number,
@@ -298,7 +334,7 @@
                      * FIXME(je,ds): we should actually pass typeContext instead of "" but late
                      * binding of parameters is not yet supported by the GraphBuilderPlugin system.
                      */
-                    return allocateInstanceHelper(layoutHelper, nonNullHub, prototypeMarkWord, fillContents, threadRegister, false, "", counters);
+                    return allocateInstanceHelper(layoutHelper, nonNullHub, prototypeMarkWord, fillContents, emitMemoryBarrier, threadRegister, false, "", counters);
                 }
             } else {
                 DeoptimizeNode.deopt(None, RuntimeConstraint);
@@ -319,15 +355,16 @@
                     @ConstantParameter int headerSize,
                     @ConstantParameter int log2ElementSize,
                     @ConstantParameter boolean fillContents,
+                    @ConstantParameter boolean emitMemoryBarrier,
                     @ConstantParameter Register threadRegister,
                     @ConstantParameter boolean maybeUnroll,
                     @ConstantParameter String typeContext,
-                    @ConstantParameter boolean useBulkZeroing,
+                    @ConstantParameter boolean supportsBulkZeroing,
                     @ConstantParameter Counters counters) {
         // Primitive array types are eagerly pre-resolved. We can use a floating load.
         KlassPointer picHub = LoadConstantIndirectlyNode.loadKlass(hub);
         return allocateArrayImpl(picHub, length, prototypeMarkWord, headerSize, log2ElementSize, fillContents,
-                        threadRegister, maybeUnroll, typeContext, useBulkZeroing, counters);
+                        emitMemoryBarrier, threadRegister, maybeUnroll, typeContext, supportsBulkZeroing, counters);
     }
 
     @Snippet
@@ -337,15 +374,16 @@
                     @ConstantParameter int headerSize,
                     @ConstantParameter int log2ElementSize,
                     @ConstantParameter boolean fillContents,
+                    @ConstantParameter boolean emitMemoryBarrier,
                     @ConstantParameter Register threadRegister,
                     @ConstantParameter boolean maybeUnroll,
                     @ConstantParameter String typeContext,
-                    @ConstantParameter boolean useBulkZeroing,
+                    @ConstantParameter boolean supportsBulkZeroing,
                     @ConstantParameter Counters counters) {
         // Array type would be resolved by dominating resolution.
         KlassPointer picHub = LoadConstantIndirectlyFixedNode.loadKlass(hub);
         return allocateArrayImpl(picHub, length, prototypeMarkWord, headerSize, log2ElementSize, fillContents,
-                        threadRegister, maybeUnroll, typeContext, useBulkZeroing, counters);
+                        emitMemoryBarrier, threadRegister, maybeUnroll, typeContext, supportsBulkZeroing, counters);
     }
 
     @Snippet
@@ -355,10 +393,11 @@
                     @ConstantParameter int headerSize,
                     @ConstantParameter int log2ElementSize,
                     @ConstantParameter boolean fillContents,
+                    @ConstantParameter boolean emitMemoryBarrier,
                     @ConstantParameter Register threadRegister,
                     @ConstantParameter boolean maybeUnroll,
                     @ConstantParameter String typeContext,
-                    @ConstantParameter boolean useBulkZeroing,
+                    @ConstantParameter boolean supportsBulkZeroing,
                     @ConstantParameter Counters counters) {
         Object result = allocateArrayImpl(hub,
                         length,
@@ -366,10 +405,10 @@
                         headerSize,
                         log2ElementSize,
                         fillContents,
-                        threadRegister,
+                        emitMemoryBarrier, threadRegister,
                         maybeUnroll,
                         typeContext,
-                        useBulkZeroing,
+                        supportsBulkZeroing,
                         counters);
         return piArrayCastToSnippetReplaceeStamp(verifyOop(result), length);
     }
@@ -383,8 +422,18 @@
         return config.areNullAllocationStubsAvailable();
     }
 
-    private static Object allocateArrayImpl(KlassPointer hub, int length, Word prototypeMarkWord, int headerSize, int log2ElementSize, boolean fillContents, Register threadRegister,
-                    boolean maybeUnroll, String typeContext, boolean useBulkZeroing, Counters counters) {
+    private static Object allocateArrayImpl(KlassPointer hub,
+                    int length,
+                    Word prototypeMarkWord,
+                    int headerSize,
+                    int log2ElementSize,
+                    boolean fillContents,
+                    boolean emitMemoryBarrier,
+                    Register threadRegister,
+                    boolean maybeUnroll,
+                    String typeContext,
+                    boolean supportsBulkZeroing,
+                    Counters counters) {
         Object result;
         long allocationSize = arrayAllocationSize(length, headerSize, log2ElementSize);
         Word thread = registerAsWord(threadRegister);
@@ -399,7 +448,7 @@
             if (theCounters != null && theCounters.arrayLoopInit != null) {
                 theCounters.arrayLoopInit.inc();
             }
-            result = formatArray(hub, allocationSize, length, headerSize, top, prototypeMarkWord, fillContents, maybeUnroll, useBulkZeroing, counters);
+            result = formatArray(hub, allocationSize, length, headerSize, top, prototypeMarkWord, fillContents, emitMemoryBarrier, maybeUnroll, supportsBulkZeroing, counters);
         } else {
             result = newArrayStub(hub, length);
         }
@@ -443,7 +492,7 @@
      * Deoptimizes if {@code obj == null} otherwise returns {@code obj}.
      */
     private static Object nonNullOrDeopt(Object obj) {
-        if (obj == null) {
+        if (BranchProbabilityNode.probability(BranchProbabilityNode.DEOPT_PROBABILITY, obj == null)) {
             DeoptimizeNode.deopt(None, RuntimeConstraint);
         }
         return obj;
@@ -460,19 +509,29 @@
                     Class<?> voidClass,
                     int length,
                     @ConstantParameter boolean fillContents,
+                    @ConstantParameter boolean emitMemoryBarrier,
                     @ConstantParameter Register threadRegister,
                     @ConstantParameter JavaKind knownElementKind,
                     @ConstantParameter int knownLayoutHelper,
-                    @ConstantParameter boolean useBulkZeroing,
+                    @ConstantParameter boolean supportsBulkZeroing,
                     Word prototypeMarkWord,
                     @ConstantParameter Counters counters) {
-        Object result = allocateArrayDynamicImpl(elementType, voidClass, length, fillContents, threadRegister, knownElementKind,
-                        knownLayoutHelper, useBulkZeroing, prototypeMarkWord, counters);
+        Object result = allocateArrayDynamicImpl(elementType, voidClass, length, fillContents, emitMemoryBarrier, threadRegister, knownElementKind,
+                        knownLayoutHelper, supportsBulkZeroing, prototypeMarkWord, counters);
         return result;
     }
 
-    private static Object allocateArrayDynamicImpl(Class<?> elementType, Class<?> voidClass, int length, boolean fillContents, Register threadRegister, JavaKind knownElementKind,
-                    int knownLayoutHelper, boolean useBulkZeroing, Word prototypeMarkWord, Counters counters) {
+    private static Object allocateArrayDynamicImpl(Class<?> elementType,
+                    Class<?> voidClass,
+                    int length,
+                    boolean fillContents,
+                    boolean emitMemoryBarrier,
+                    Register threadRegister,
+                    JavaKind knownElementKind,
+                    int knownLayoutHelper,
+                    boolean supportsBulkZeroing,
+                    Word prototypeMarkWord,
+                    Counters counters) {
         /*
          * We only need the dynamic check for void when we have no static information from
          * knownElementKind.
@@ -483,12 +542,12 @@
         }
 
         KlassPointer klass = loadKlassFromObject(elementType, arrayKlassOffset(INJECTED_VMCONFIG), CLASS_ARRAY_KLASS_LOCATION);
-        if (klass.isNull()) {
+        if (probability(DEOPT_PROBABILITY, klass.isNull())) {
             DeoptimizeNode.deopt(None, RuntimeConstraint);
         }
         KlassPointer nonNullKlass = ClassGetHubNode.piCastNonNull(klass, SnippetAnchorNode.anchor());
 
-        if (length < 0) {
+        if (probability(DEOPT_PROBABILITY, length < 0)) {
             DeoptimizeNode.deopt(None, RuntimeConstraint);
         }
         int layoutHelper;
@@ -515,7 +574,7 @@
         int log2ElementSize = (layoutHelper >> layoutHelperLog2ElementSizeShift(INJECTED_VMCONFIG)) & layoutHelperLog2ElementSizeMask(INJECTED_VMCONFIG);
 
         Object result = allocateArrayImpl(nonNullKlass, length, prototypeMarkWord, headerSize, log2ElementSize, fillContents,
-                        threadRegister, false, "dynamic type", useBulkZeroing, counters);
+                        emitMemoryBarrier, threadRegister, false, "dynamic type", supportsBulkZeroing, counters);
         return piArrayCastToSnippetReplaceeStamp(verifyOop(result), length);
     }
 
@@ -563,21 +622,21 @@
      * Zero uninitialized memory in a newly allocated object, unrolling as necessary and ensuring
      * that stores are aligned.
      *
-     * @param size number of bytes to zero
      * @param memory beginning of object which is being zeroed
-     * @param constantSize is {@code size} known to be constant in the snippet
-     * @param startOffset offset to begin zeroing. May not be word aligned.
+     * @param startOffset offset to begin zeroing (inclusive). May not be word aligned.
+     * @param endOffset offset to stop zeroing (exclusive). May not be word aligned.
+     * @param isEndOffsetConstant is {@code endOffset} known to be constant in the snippet
      * @param manualUnroll maximally unroll zeroing
-     * @param useBulkZeroing apply bulk zeroing
+     * @param supportsBulkZeroing whether bulk zeroing is supported by the backend
      */
-    private static void zeroMemory(long size, Word memory, boolean constantSize, int startOffset, boolean manualUnroll,
-                    boolean useBulkZeroing, Counters counters) {
-        fillMemory(0, size, memory, constantSize, startOffset, manualUnroll, useBulkZeroing, counters);
+    private static void zeroMemory(Word memory, int startOffset, long endOffset, boolean isEndOffsetConstant, boolean manualUnroll,
+                    boolean supportsBulkZeroing, Counters counters) {
+        fillMemory(0, memory, startOffset, endOffset, isEndOffsetConstant, manualUnroll, supportsBulkZeroing, counters);
     }
 
-    private static void fillMemory(long value, long size, Word memory, boolean constantSize, int startOffset, boolean manualUnroll,
-                    boolean useBulkZeroing, Counters counters) {
-        ReplacementsUtil.runtimeAssert((size & 0x7) == 0, "unaligned object size");
+    private static void fillMemory(long value, Word memory, int startOffset, long endOffset, boolean constantOffsetLimit, boolean manualUnroll,
+                    boolean supportsBulkZeroing, Counters counters) {
+        ReplacementsUtil.runtimeAssert((endOffset & 0x7) == 0, "unaligned object size");
         int offset = startOffset;
         if ((offset & 0x7) != 0) {
             memory.writeInt(offset, (int) value, LocationIdentity.init());
@@ -585,8 +644,8 @@
         }
         ReplacementsUtil.runtimeAssert((offset & 0x7) == 0, "unaligned offset");
         Counters theCounters = counters;
-        if (manualUnroll && ((size - offset) / 8) <= MAX_UNROLLED_OBJECT_ZEROING_STORES) {
-            ReplacementsUtil.staticAssert(!constantSize, "size shouldn't be constant at instantiation time");
+        if (manualUnroll && ((endOffset - offset) / 8) <= MAX_UNROLLED_OBJECT_ZEROING_STORES) {
+            ReplacementsUtil.staticAssert(!constantOffsetLimit, "size shouldn't be constant at instantiation time");
             // This case handles arrays of constant length. Instead of having a snippet variant for
             // each length, generate a chain of stores of maximum length. Once it's inlined the
             // break statement will trim excess stores.
@@ -596,7 +655,7 @@
 
             explodeLoop();
             for (int i = 0; i < MAX_UNROLLED_OBJECT_ZEROING_STORES; i++, offset += 8) {
-                if (offset == size) {
+                if (offset == endOffset) {
                     break;
                 }
                 memory.initializeLong(offset, value, LocationIdentity.init());
@@ -604,13 +663,13 @@
         } else {
             // Use Word instead of int to avoid extension to long in generated code
             Word off = WordFactory.signed(offset);
-            if (useBulkZeroing && probability(SLOW_PATH_PROBABILITY, size >= getMinimalBulkZeroingSize(INJECTED_OPTIONVALUES))) {
+            if (supportsBulkZeroing && value == 0 && probability(SLOW_PATH_PROBABILITY, (endOffset - offset) >= getMinimalBulkZeroingSize(INJECTED_OPTIONVALUES))) {
                 if (theCounters != null && theCounters.instanceBulkInit != null) {
                     theCounters.instanceBulkInit.inc();
                 }
-                ZeroMemoryNode.zero(memory.add(off), size - offset, LocationIdentity.init());
+                ZeroMemoryNode.zero(memory.add(off), endOffset - offset, true, LocationIdentity.init());
             } else {
-                if (constantSize && ((size - offset) / 8) <= MAX_UNROLLED_OBJECT_ZEROING_STORES) {
+                if (constantOffsetLimit && ((endOffset - offset) / 8) <= MAX_UNROLLED_OBJECT_ZEROING_STORES) {
                     if (theCounters != null && theCounters.instanceSeqInit != null) {
                         theCounters.instanceSeqInit.inc();
                     }
@@ -620,10 +679,9 @@
                         theCounters.instanceLoopInit.inc();
                     }
                 }
-                for (; off.rawValue() < size; off = off.add(8)) {
+                for (; off.rawValue() < endOffset; off = off.add(8)) {
                     memory.initializeLong(off, value, LocationIdentity.init());
                 }
-
             }
         }
     }
@@ -637,28 +695,38 @@
      * Fill uninitialized memory with garbage value in a newly allocated object, unrolling as
      * necessary and ensuring that stores are aligned.
      *
-     * @param size number of bytes to zero
      * @param memory beginning of object which is being zeroed
-     * @param constantSize is {@code  size} known to be constant in the snippet
-     * @param startOffset offset to begin zeroing. May not be word aligned.
+     * @param startOffset offset to begin filling garbage value (inclusive). May not be word
+     *            aligned.
+     * @param endOffset offset to stop filling garbage value (exclusive). May not be word aligned.
+     * @param isEndOffsetConstant is {@code  endOffset} known to be constant in the snippet
      * @param manualUnroll maximally unroll zeroing
      */
-    private static void fillWithGarbage(long size, Word memory, boolean constantSize, int startOffset, boolean manualUnroll, Counters counters) {
-        fillMemory(0xfefefefefefefefeL, size, memory, constantSize, startOffset, manualUnroll, false, counters);
+    private static void fillWithGarbage(Word memory, int startOffset, long endOffset, boolean isEndOffsetConstant, boolean manualUnroll, Counters counters) {
+        fillMemory(0xfefefefefefefefeL, memory, startOffset, endOffset, isEndOffsetConstant, manualUnroll, false, counters);
     }
 
     /**
      * Formats some allocated memory with an object header and zeroes out the rest.
      */
-    private static Object formatObject(KlassPointer hub, long size, Word memory, Word compileTimePrototypeMarkWord, boolean fillContents, boolean constantSize, Counters counters) {
+    private static Object formatObject(KlassPointer hub,
+                    long size,
+                    Word memory,
+                    Word compileTimePrototypeMarkWord,
+                    boolean fillContents,
+                    boolean emitMemoryBarrier,
+                    boolean constantSize,
+                    Counters counters) {
         Word prototypeMarkWord = useBiasedLocking(INJECTED_VMCONFIG) ? hub.readWord(prototypeMarkWordOffset(INJECTED_VMCONFIG), PROTOTYPE_MARK_WORD_LOCATION) : compileTimePrototypeMarkWord;
         initializeObjectHeader(memory, prototypeMarkWord, hub);
         if (fillContents) {
-            zeroMemory(size, memory, constantSize, instanceHeaderSize(INJECTED_VMCONFIG), false, false, counters);
+            zeroMemory(memory, instanceHeaderSize(INJECTED_VMCONFIG), size, constantSize, false, false, counters);
         } else if (REPLACEMENTS_ASSERTIONS_ENABLED) {
-            fillWithGarbage(size, memory, constantSize, instanceHeaderSize(INJECTED_VMCONFIG), false, counters);
+            fillWithGarbage(memory, instanceHeaderSize(INJECTED_VMCONFIG), size, constantSize, false, counters);
         }
-        MembarNode.memoryBarrier(MemoryBarriers.STORE_STORE, LocationIdentity.init());
+        if (emitMemoryBarrier) {
+            MembarNode.memoryBarrier(MemoryBarriers.STORE_STORE, LocationIdentity.init());
+        }
         return memory.toObjectNonNull();
     }
 
@@ -674,11 +742,33 @@
         }
     }
 
+    @Snippet
+    private static void threadBeingInitializedCheck(@ConstantParameter Register threadRegister, KlassPointer klass) {
+        int state = readInstanceKlassInitState(klass);
+        if (state != instanceKlassStateBeingInitialized(INJECTED_VMCONFIG)) {
+            // The klass is no longer being initialized so force recompilation
+            DeoptimizeNode.deopt(InvalidateRecompile, RuntimeConstraint);
+        } else if (registerAsWord(threadRegister) != readInstanceKlassInitThread(klass)) {
+            // The klass is being initialized but this isn't the initializing thread so
+            // so deopt and allow execution to resume in the interpreter where it should block.
+            DeoptimizeNode.deopt(None, RuntimeConstraint);
+        }
+    }
+
     /**
      * Formats some allocated memory with an object header and zeroes out the rest.
      */
-    private static Object formatArray(KlassPointer hub, long allocationSize, int length, int headerSize, Word memory, Word prototypeMarkWord, boolean fillContents, boolean maybeUnroll,
-                    boolean useBulkZeroing, Counters counters) {
+    private static Object formatArray(KlassPointer hub,
+                    long allocationSize,
+                    int length,
+                    int headerSize,
+                    Word memory,
+                    Word prototypeMarkWord,
+                    boolean fillContents,
+                    boolean emitMemoryBarrier,
+                    boolean maybeUnroll,
+                    boolean supportsBulkZeroing,
+                    Counters counters) {
         memory.writeInt(arrayLengthOffset(INJECTED_VMCONFIG), length, LocationIdentity.init());
         /*
          * store hub last as the concurrent garbage collectors assume length is valid if hub field
@@ -686,11 +776,13 @@
          */
         initializeObjectHeader(memory, prototypeMarkWord, hub);
         if (fillContents) {
-            zeroMemory(allocationSize, memory, false, headerSize, maybeUnroll, useBulkZeroing, counters);
+            zeroMemory(memory, headerSize, allocationSize, false, maybeUnroll, supportsBulkZeroing, counters);
         } else if (REPLACEMENTS_ASSERTIONS_ENABLED) {
-            fillWithGarbage(allocationSize, memory, false, headerSize, maybeUnroll, counters);
+            fillWithGarbage(memory, headerSize, allocationSize, false, maybeUnroll, counters);
         }
-        MembarNode.memoryBarrier(MemoryBarriers.STORE_STORE, LocationIdentity.init());
+        if (emitMemoryBarrier) {
+            MembarNode.memoryBarrier(MemoryBarriers.STORE_STORE, LocationIdentity.init());
+        }
         return memory.toObjectNonNull();
     }
 
@@ -714,9 +806,10 @@
 
     public static class Templates extends AbstractTemplates {
 
-        private final SnippetInfo allocateInstance = snippet(NewObjectSnippets.class, "allocateInstance", MARK_WORD_LOCATION, HUB_WRITE_LOCATION, TLAB_TOP_LOCATION, TLAB_END_LOCATION);
+        private final SnippetInfo allocateInstance = snippet(NewObjectSnippets.class, "allocateInstance", MARK_WORD_LOCATION, HUB_WRITE_LOCATION, TLAB_TOP_LOCATION, TLAB_END_LOCATION,
+                        PROTOTYPE_MARK_WORD_LOCATION);
         private final SnippetInfo allocateInstancePIC = snippet(NewObjectSnippets.class, "allocateInstancePIC", MARK_WORD_LOCATION, HUB_WRITE_LOCATION, TLAB_TOP_LOCATION,
-                        TLAB_END_LOCATION);
+                        TLAB_END_LOCATION, PROTOTYPE_MARK_WORD_LOCATION);
         private final SnippetInfo allocateArray = snippet(NewObjectSnippets.class, "allocateArray", MARK_WORD_LOCATION, HUB_WRITE_LOCATION, TLAB_TOP_LOCATION, TLAB_END_LOCATION);
         private final SnippetInfo allocateArrayPIC = snippet(NewObjectSnippets.class, "allocateArrayPIC", MARK_WORD_LOCATION, HUB_WRITE_LOCATION, TLAB_TOP_LOCATION, TLAB_END_LOCATION);
         private final SnippetInfo allocatePrimitiveArrayPIC = snippet(NewObjectSnippets.class, "allocatePrimitiveArrayPIC", MARK_WORD_LOCATION, HUB_WRITE_LOCATION, TLAB_TOP_LOCATION,
@@ -724,10 +817,11 @@
         private final SnippetInfo allocateArrayDynamic = snippet(NewObjectSnippets.class, "allocateArrayDynamic", MARK_WORD_LOCATION, HUB_WRITE_LOCATION, TLAB_TOP_LOCATION,
                         TLAB_END_LOCATION);
         private final SnippetInfo allocateInstanceDynamic = snippet(NewObjectSnippets.class, "allocateInstanceDynamic", MARK_WORD_LOCATION, HUB_WRITE_LOCATION, TLAB_TOP_LOCATION,
-                        TLAB_END_LOCATION);
+                        TLAB_END_LOCATION, PROTOTYPE_MARK_WORD_LOCATION, CLASS_INIT_STATE_LOCATION);
         private final SnippetInfo newmultiarray = snippet(NewObjectSnippets.class, "newmultiarray", TLAB_TOP_LOCATION, TLAB_END_LOCATION);
         private final SnippetInfo newmultiarrayPIC = snippet(NewObjectSnippets.class, "newmultiarrayPIC", TLAB_TOP_LOCATION, TLAB_END_LOCATION);
         private final SnippetInfo verifyHeap = snippet(NewObjectSnippets.class, "verifyHeap");
+        private final SnippetInfo threadBeingInitializedCheck = snippet(NewObjectSnippets.class, "threadBeingInitializedCheck");
         private final GraalHotSpotVMConfig config;
         private final Counters counters;
 
@@ -755,6 +849,7 @@
             args.add("hub", hub);
             args.add("prototypeMarkWord", type.prototypeMarkWord());
             args.addConst("fillContents", newInstanceNode.fillContents());
+            args.addConst("emitMemoryBarrier", newInstanceNode.emitMemoryBarrier());
             args.addConst("threadRegister", registers.getThreadRegister());
             args.addConst("constantSize", true);
             args.addConst("typeContext", ProfileAllocations.getValue(localOptions) ? type.toJavaName(false) : "");
@@ -798,10 +893,11 @@
             args.addConst("headerSize", headerSize);
             args.addConst("log2ElementSize", log2ElementSize);
             args.addConst("fillContents", newArrayNode.fillContents());
+            args.addConst("emitMemoryBarrier", newArrayNode.emitMemoryBarrier());
             args.addConst("threadRegister", registers.getThreadRegister());
             args.addConst("maybeUnroll", length.isConstant());
             args.addConst("typeContext", ProfileAllocations.getValue(localOptions) ? arrayType.toJavaName(false) : "");
-            args.addConst("useBulkZeroing", tool.getLowerer().supportBulkZeroing());
+            args.addConst("supportsBulkZeroing", tool.getLowerer().supportsBulkZeroing());
             args.addConst("counters", counters);
             SnippetTemplate template = template(newArrayNode, args);
             graph.getDebug().log("Lowering allocateArray in %s: node=%s, template=%s, arguments=%s", graph, newArrayNode, template, args);
@@ -815,6 +911,7 @@
             assert classClass != null;
             args.add("classClass", classClass);
             args.addConst("fillContents", newInstanceNode.fillContents());
+            args.addConst("emitMemoryBarrier", newInstanceNode.emitMemoryBarrier());
             args.addConst("threadRegister", registers.getThreadRegister());
             args.addConst("counters", counters);
 
@@ -832,6 +929,7 @@
             ValueNode length = newArrayNode.length();
             args.add("length", length.isAlive() ? length : graph.addOrUniqueWithInputs(length));
             args.addConst("fillContents", newArrayNode.fillContents());
+            args.addConst("emitMemoryBarrier", newArrayNode.emitMemoryBarrier());
             args.addConst("threadRegister", registers.getThreadRegister());
             /*
              * We use Kind.Illegal as a marker value instead of null because constant snippet
@@ -843,7 +941,7 @@
             } else {
                 args.addConst("knownLayoutHelper", 0);
             }
-            args.addConst("useBulkZeroing", tool.getLowerer().supportBulkZeroing());
+            args.addConst("supportsBulkZeroing", tool.getLowerer().supportsBulkZeroing());
             args.add("prototypeMarkWord", lookupArrayClass(tool, JavaKind.Object).prototypeMarkWord());
             args.addConst("counters", counters);
             SnippetTemplate template = template(newArrayNode, args);
@@ -890,5 +988,14 @@
                 GraphUtil.removeFixedWithUnusedInputs(verifyHeapNode);
             }
         }
+
+        public void lower(KlassBeingInitializedCheckNode verifyHeapNode, HotSpotRegistersProvider registers, LoweringTool tool) {
+            Arguments args = new Arguments(threadBeingInitializedCheck, verifyHeapNode.graph().getGuardsStage(), tool.getLoweringStage());
+            args.addConst("threadRegister", registers.getThreadRegister());
+            args.add("klass", verifyHeapNode.getKlass());
+
+            SnippetTemplate template = template(verifyHeapNode, args);
+            template.instantiate(providers.getMetaAccess(), verifyHeapNode, DEFAULT_REPLACER, args);
+        }
     }
 }