8209420: Track membars for volatile accesses so they can be properly optimized
Tue, 14 Aug 2018 16:54:47 +0200
changeset 51482 d7029542d67a
parent 51481 dae00d6705ec
child 51483 2d7bff7367c6
8209420: Track membars for volatile accesses so they can be properly optimized Reviewed-by: adinn, aph, thartmann
--- a/src/hotspot/cpu/aarch64/aarch64.ad	Wed Aug 22 15:10:40 2018 +0800
+++ b/src/hotspot/cpu/aarch64/aarch64.ad	Tue Aug 14 16:54:47 2018 +0200
@@ -1036,21 +1036,8 @@
-  // graph traversal helpers
-  MemBarNode *parent_membar(const Node *n);
-  MemBarNode *child_membar(const MemBarNode *n);
-  bool leading_membar(const MemBarNode *barrier);
-  bool is_card_mark_membar(const MemBarNode *barrier);
   bool is_CAS(int opcode);
-  MemBarNode *leading_to_normal(MemBarNode *leading);
-  MemBarNode *normal_to_leading(const MemBarNode *barrier);
-  MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
-  MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
-  MemBarNode *trailing_to_leading(const MemBarNode *trailing);
   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
   bool unnecessary_acquire(const Node *barrier);
@@ -1272,605 +1259,6 @@
   // relevant dmb instructions.
-  // graph traversal helpers used for volatile put/get and CAS
-  // optimization
-  // 1) general purpose helpers
-  // if node n is linked to a parent MemBarNode by an intervening
-  // Control and Memory ProjNode return the MemBarNode otherwise return
-  // NULL.
-  //
-  // n may only be a Load or a MemBar.
-  MemBarNode *parent_membar(const Node *n)
-  {
-    Node *ctl = NULL;
-    Node *mem = NULL;
-    Node *membar = NULL;
-    if (n->is_Load()) {
-      ctl = n->lookup(LoadNode::Control);
-      mem = n->lookup(LoadNode::Memory);
-    } else if (n->is_MemBar()) {
-      ctl = n->lookup(TypeFunc::Control);
-      mem = n->lookup(TypeFunc::Memory);
-    } else {
-	return NULL;
-    }
-    if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
-      return NULL;
-    }
-    membar = ctl->lookup(0);
-    if (!membar || !membar->is_MemBar()) {
-      return NULL;
-    }
-    if (mem->lookup(0) != membar) {
-      return NULL;
-    }
-    return membar->as_MemBar();
-  }
-  // if n is linked to a child MemBarNode by intervening Control and
-  // Memory ProjNodes return the MemBarNode otherwise return NULL.
-  MemBarNode *child_membar(const MemBarNode *n)
-  {
-    ProjNode *ctl = n->proj_out_or_null(TypeFunc::Control);
-    ProjNode *mem = n->proj_out_or_null(TypeFunc::Memory);
-    // MemBar needs to have both a Ctl and Mem projection
-    if (! ctl || ! mem)
-      return NULL;
-    MemBarNode *child = NULL;
-    Node *x;
-    for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
-      x = ctl->fast_out(i);
-      // if we see a membar we keep hold of it. we may also see a new
-      // arena copy of the original but it will appear later
-      if (x->is_MemBar()) {
-	  child = x->as_MemBar();
-	  break;
-      }
-    }
-    if (child == NULL) {
-      return NULL;
-    }
-    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
-      x = mem->fast_out(i);
-      // if we see a membar we keep hold of it. we may also see a new
-      // arena copy of the original but it will appear later
-      if (x == child) {
-	return child;
-      }
-    }
-    return NULL;
-  }
-  // helper predicate use to filter candidates for a leading memory
-  // barrier
-  //
-  // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
-  // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
-  bool leading_membar(const MemBarNode *barrier)
-  {
-    int opcode = barrier->Opcode();
-    // if this is a release membar we are ok
-    if (opcode == Op_MemBarRelease) {
-      return true;
-    }
-    // if its a cpuorder membar . . .
-    if (opcode != Op_MemBarCPUOrder) {
-      return false;
-    }
-    // then the parent has to be a release membar
-    MemBarNode *parent = parent_membar(barrier);
-    if (!parent) {
-      return false;
-    }
-    opcode = parent->Opcode();
-    return opcode == Op_MemBarRelease;
-  }
-  // 2) card mark detection helper
-  // helper predicate which can be used to detect a volatile membar
-  // introduced as part of a conditional card mark sequence either by
-  // G1 or by CMS when UseCondCardMark is true.
-  //
-  // membar can be definitively determined to be part of a card mark
-  // sequence if and only if all the following hold
-  //
-  // i) it is a MemBarVolatile
-  //
-  // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
-  // true
-  //
-  // iii) the node's Mem projection feeds a StoreCM node.
-  bool is_card_mark_membar(const MemBarNode *barrier)
-  {
-    if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
-      return false;
-    }
-    if (barrier->Opcode() != Op_MemBarVolatile) {
-      return false;
-    }
-    ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
-    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
-      Node *y = mem->fast_out(i);
-      if (y->Opcode() == Op_StoreCM) {
-	return true;
-      }
-    }
-    return false;
-  }
-  // 3) helper predicates to traverse volatile put or CAS graphs which
-  // may contain GC barrier subgraphs
-  // Preamble
-  // --------
-  //
-  // for volatile writes we can omit generating barriers and employ a
-  // releasing store when we see a node sequence sequence with a
-  // leading MemBarRelease and a trailing MemBarVolatile as follows
-  //
-  //   MemBarRelease
-  //  {      ||      } -- optional
-  //  {MemBarCPUOrder}
-  //         ||     \\
-  //         ||     StoreX[mo_release]
-  //         | \     /
-  //         | MergeMem
-  //         | /
-  //  {MemBarCPUOrder} -- optional
-  //  {      ||      }
-  //   MemBarVolatile
-  //
-  // where
-  //  || and \\ represent Ctl and Mem feeds via Proj nodes
-  //  | \ and / indicate further routing of the Ctl and Mem feeds
-  //
-  // this is the graph we see for non-object stores. however, for a
-  // volatile Object store (StoreN/P) we may see other nodes below the
-  // leading membar because of the need for a GC pre- or post-write
-  // barrier.
-  //
-  // with most GC configurations we with see this simple variant which
-  // includes a post-write barrier card mark.
-  //
-  //   MemBarRelease______________________________
-  //         ||    \\               Ctl \        \\
-  //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
-  //         | \     /                       . . .  /
-  //         | MergeMem
-  //         | /
-  //         ||      /
-  //  {MemBarCPUOrder} -- optional
-  //  {      ||      }
-  //   MemBarVolatile
-  //
-  // i.e. the leading membar feeds Ctl to a CastP2X (which converts
-  // the object address to an int used to compute the card offset) and
-  // Ctl+Mem to a StoreB node (which does the actual card mark).
-  //
-  // n.b. a StoreCM node will only appear in this configuration when
-  // using CMS or G1. StoreCM differs from a normal card mark write (StoreB)
-  // because it implies a requirement to order visibility of the card
-  // mark (StoreCM) relative to the object put (StoreP/N) using a
-  // StoreStore memory barrier (arguably this ought to be represented
-  // explicitly in the ideal graph but that is not how it works). This
-  // ordering is required for both non-volatile and volatile
-  // puts. Normally that means we need to translate a StoreCM using
-  // the sequence
-  //
-  //   dmb ishst
-  //   strb
-  //
-  // However, when using G1 or CMS with conditional card marking (as
-  // we shall see) we don't need to insert the dmb when translating
-  // StoreCM because there is already an intervening StoreLoad barrier
-  // between it and the StoreP/N.
-  //
-  // It is also possible to perform the card mark conditionally on it
-  // currently being unmarked in which case the volatile put graph
-  // will look slightly different
-  //
-  //   MemBarRelease____________________________________________
-  //         ||    \\               Ctl \     Ctl \     \\  Mem \
-  //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
-  //         | \     /                              \            |
-  //         | MergeMem                            . . .      StoreB
-  //         | /                                                /
-  //         ||     /
-  //   MemBarVolatile
-  //
-  // It is worth noting at this stage that both the above
-  // configurations can be uniquely identified by checking that the
-  // memory flow includes the following subgraph:
-  //
-  //   MemBarRelease
-  //  {MemBarCPUOrder}
-  //          |  \      . . .
-  //          |  StoreX[mo_release]  . . .
-  //          |   /
-  //         MergeMem
-  //          |
-  //  {MemBarCPUOrder}
-  //   MemBarVolatile
-  //
-  // This is referred to as a *normal* subgraph. It can easily be
-  // detected starting from any candidate MemBarRelease,
-  // StoreX[mo_release] or MemBarVolatile.
-  //
-  // A simple variation on this normal case occurs for an unsafe CAS
-  // operation. The basic graph for a non-object CAS is
-  //
-  //   MemBarRelease
-  //         ||
-  //   MemBarCPUOrder
-  //         ||     \\   . . .
-  //         ||     CompareAndSwapX
-  //         ||       |
-  //         ||     SCMemProj
-  //         | \     /
-  //         | MergeMem
-  //         | /
-  //   MemBarCPUOrder
-  //         ||
-  //   MemBarAcquire
-  //
-  // The same basic variations on this arrangement (mutatis mutandis)
-  // occur when a card mark is introduced. i.e. we se the same basic
-  // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
-  // tail of the graph is a pair comprising a MemBarCPUOrder +
-  // MemBarAcquire.
-  //
-  // So, in the case of a CAS the normal graph has the variant form
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder
-  //          |   \      . . .
-  //          |  CompareAndSwapX  . . .
-  //          |    |
-  //          |   SCMemProj
-  //          |   /  . . .
-  //         MergeMem
-  //          |
-  //   MemBarCPUOrder
-  //   MemBarAcquire
-  //
-  // This graph can also easily be detected starting from any
-  // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
-  //
-  // the code below uses two helper predicates, leading_to_normal and
-  // normal_to_leading to identify these normal graphs, one validating
-  // the layout starting from the top membar and searching down and
-  // the other validating the layout starting from the lower membar
-  // and searching up.
-  //
-  // There are two special case GC configurations when a normal graph
-  // may not be generated: when using G1 (which always employs a
-  // conditional card mark); and when using CMS with conditional card
-  // marking configured. These GCs are both concurrent rather than
-  // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
-  // graph between the leading and trailing membar nodes, in
-  // particular enforcing stronger memory serialisation beween the
-  // object put and the corresponding conditional card mark. CMS
-  // employs a post-write GC barrier while G1 employs both a pre- and
-  // post-write GC barrier. Of course the extra nodes may be absent --
-  // they are only inserted for object puts/swaps. This significantly
-  // complicates the task of identifying whether a MemBarRelease,
-  // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
-  // when using these GC configurations (see below). It adds similar
-  // complexity to the task of identifying whether a MemBarRelease,
-  // CompareAndSwapX or MemBarAcquire forms part of a CAS.
-  //
-  // In both cases the post-write subtree includes an auxiliary
-  // MemBarVolatile (StoreLoad barrier) separating the object put/swap
-  // and the read of the corresponding card. This poses two additional
-  // problems.
-  //
-  // Firstly, a card mark MemBarVolatile needs to be distinguished
-  // from a normal trailing MemBarVolatile. Resolving this first
-  // problem is straightforward: a card mark MemBarVolatile always
-  // projects a Mem feed to a StoreCM node and that is a unique marker
-  //
-  //      MemBarVolatile (card mark)
-  //       C |    \     . . .
-  //         |   StoreCM   . . .
-  //       . . .
-  //
-  // The second problem is how the code generator is to translate the
-  // card mark barrier? It always needs to be translated to a "dmb
-  // ish" instruction whether or not it occurs as part of a volatile
-  // put. A StoreLoad barrier is needed after the object put to ensure
-  // i) visibility to GC threads of the object put and ii) visibility
-  // to the mutator thread of any card clearing write by a GC
-  // thread. Clearly a normal store (str) will not guarantee this
-  // ordering but neither will a releasing store (stlr). The latter
-  // guarantees that the object put is visible but does not guarantee
-  // that writes by other threads have also been observed.
-  //
-  // So, returning to the task of translating the object put and the
-  // leading/trailing membar nodes: what do the non-normal node graph
-  // look like for these 2 special cases? and how can we determine the
-  // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
-  // in both normal and non-normal cases?
-  //
-  // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
-  // which selects conditonal execution based on the value loaded
-  // (LoadB) from the card. Ctl and Mem are fed to the If via an
-  // intervening StoreLoad barrier (MemBarVolatile).
-  //
-  // So, with CMS we may see a node graph for a volatile object store
-  // which looks like this
-  //
-  //   MemBarRelease
-  //  {MemBarCPUOrder}_(leading)_________________
-  //     C |    M \       \\                   C \
-  //       |       \    StoreN/P[mo_release]  CastP2X
-  //       |    Bot \    /
-  //       |       MergeMem
-  //       |         /
-  //      MemBarVolatile (card mark)
-  //     C |  ||    M |
-  //       | LoadB    |
-  //       |   |      |
-  //       | Cmp      |\
-  //       | /        | \
-  //       If         |  \
-  //       | \        |   \
-  // IfFalse  IfTrue  |    \
-  //       \     / \  |     \
-  //        \   / StoreCM    |
-  //         \ /      |      |
-  //        Region   . . .   |
-  //          | \           /
-  //          |  . . .  \  / Bot
-  //          |       MergeMem
-  //          |          |
-  //       {MemBarCPUOrder}
-  //        MemBarVolatile (trailing)
-  //
-  // The first MergeMem merges the AliasIdxBot Mem slice from the
-  // leading membar and the oopptr Mem slice from the Store into the
-  // card mark membar. The trailing MergeMem merges the AliasIdxBot
-  // Mem slice from the card mark membar and the AliasIdxRaw slice
-  // from the StoreCM into the trailing membar (n.b. the latter
-  // proceeds via a Phi associated with the If region).
-  //
-  // The graph for a CAS varies slightly, the difference being
-  // that the StoreN/P node is replaced by a CompareAndSwapP/N node
-  // and the trailing MemBarVolatile by a MemBarCPUOrder +
-  // MemBarAcquire pair (also the MemBarCPUOrder nodes are not optional).
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder_(leading)_______________
-  //     C |    M \       \\                C \
-  //       |       \    CompareAndSwapN/P  CastP2X
-  //       |        \      |
-  //       |         \   SCMemProj
-  //       |      Bot \   /
-  //       |        MergeMem
-  //       |         /
-  //      MemBarVolatile (card mark)
-  //     C |  ||    M |
-  //       | LoadB    |
-  //       |   |      |
-  //       | Cmp      |\
-  //       | /        | \
-  //       If         |  \
-  //       | \        |   \
-  // IfFalse  IfTrue  |    \
-  //       \     / \  |     \
-  //        \   / StoreCM    |
-  //         \ /      |      |
-  //        Region   . . .   |
-  //          | \           /
-  //          |  . . .  \  / Bot
-  //          |       MergeMem
-  //          |          |
-  //        MemBarCPUOrder
-  //        MemBarVolatile (trailing)
-  //
-  //
-  // G1 is quite a lot more complicated. The nodes inserted on behalf
-  // of G1 may comprise: a pre-write graph which adds the old value to
-  // the SATB queue; the releasing store itself; and, finally, a
-  // post-write graph which performs a card mark.
-  //
-  // The pre-write graph may be omitted, but only when the put is
-  // writing to a newly allocated (young gen) object and then only if
-  // there is a direct memory chain to the Initialize node for the
-  // object allocation. This will not happen for a volatile put since
-  // any memory chain passes through the leading membar.
-  //
-  // The pre-write graph includes a series of 3 If tests. The outermost
-  // If tests whether SATB is enabled (no else case). The next If tests
-  // whether the old value is non-NULL (no else case). The third tests
-  // whether the SATB queue index is > 0, if so updating the queue. The
-  // else case for this third If calls out to the runtime to allocate a
-  // new queue buffer.
-  //
-  // So with G1 the pre-write and releasing store subgraph looks like
-  // this (the nested Ifs are omitted).
-  //
-  //  MemBarRelease
-  // {MemBarCPUOrder}_(leading)___________
-  //     C |  ||  M \   M \    M \  M \ . . .
-  //       | LoadB   \  LoadL  LoadN   \
-  //       | /        \                 \
-  //       If         |\                 \
-  //       | \        | \                 \
-  //  IfFalse  IfTrue |  \                 \
-  //       |     |    |   \                 |
-  //       |     If   |   /\                |
-  //       |     |          \               |
-  //       |                 \              |
-  //       |    . . .         \             |
-  //       | /       | /       |            |
-  //      Region  Phi[M]       |            |
-  //       | \       |         |            |
-  //       |  \_____ | ___     |            |
-  //     C | C \     |   C \ M |            |
-  //       | CastP2X | StoreN/P[mo_release] |
-  //       |         |         |            |
-  //     C |       M |       M |          M |
-  //        \        |         |           /
-  //                  . . .
-  //          (post write subtree elided)
-  //                    . . .
-  //             C \         M /
-  //                \         /
-  //             {MemBarCPUOrder}
-  //              MemBarVolatile (trailing)
-  //
-  // n.b. the LoadB in this subgraph is not the card read -- it's a
-  // read of the SATB queue active flag.
-  //
-  // The G1 post-write subtree is also optional, this time when the
-  // new value being written is either null or can be identified as a
-  // newly allocated (young gen) object with no intervening control
-  // flow. The latter cannot happen but the former may, in which case
-  // the card mark membar is omitted and the memory feeds form the
-  // leading membar and the SToreN/P are merged direct into the
-  // trailing membar as per the normal subgraph. So, the only special
-  // case which arises is when the post-write subgraph is generated.
-  //
-  // The kernel of the post-write G1 subgraph is the card mark itself
-  // which includes a card mark memory barrier (MemBarVolatile), a
-  // card test (LoadB), and a conditional update (If feeding a
-  // StoreCM). These nodes are surrounded by a series of nested Ifs
-  // which try to avoid doing the card mark. The top level If skips if
-  // the object reference does not cross regions (i.e. it tests if
-  // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
-  // need not be recorded. The next If, which skips on a NULL value,
-  // may be absent (it is not generated if the type of value is >=
-  // OopPtr::NotNull). The 3rd If skips writes to young regions (by
-  // checking if card_val != young).  n.b. although this test requires
-  // a pre-read of the card it can safely be done before the StoreLoad
-  // barrier. However that does not bypass the need to reread the card
-  // after the barrier. A final, 4th If tests if the card is already
-  // marked.
-  //
-  //                (pre-write subtree elided)
-  //        . . .                  . . .    . . .  . . .
-  //        C |                    M |     M |    M |
-  //       Region                  Phi[M] StoreN    |
-  //          |                     / \      |      |
-  //         / \_______            /   \     |      |
-  //      C / C \      . . .            \    |      |
-  //       If   CastP2X . . .            |   |      |
-  //       / \                           |   |      |
-  //      /   \                          |   |      |
-  // IfFalse IfTrue                      |   |      |
-  //   |       |                         |   |     /|
-  //   |       If                        |   |    / |
-  //   |      / \                        |   |   /  |
-  //   |     /   \                        \  |  /   |
-  //   | IfFalse IfTrue                   MergeMem  |
-  //   |  . . .    / \                       /      |
-  //   |          /   \                     /       |
-  //   |     IfFalse IfTrue                /        |
-  //   |      . . .    |                  /         |
-  //   |               If                /          |
-  //   |               / \              /           |
-  //   |              /   \            /            |
-  //   |         IfFalse IfTrue       /             |
-  //   |           . . .   |         /              |
-  //   |                    \       /               |
-  //   |                     \     /                |
-  //   |             MemBarVolatile__(card mark)    |
-  //   |                ||   C |  M \  M \          |
-  //   |               LoadB   If    |    |         |
-  //   |                      / \    |    |         |
-  //   |                     . . .   |    |         |
-  //   |                          \  |    |        /
-  //   |                        StoreCM   |       /
-  //   |                          . . .   |      /
-  //   |                        _________/      /
-  //   |                       /  _____________/
-  //   |   . . .       . . .  |  /            /
-  //   |    |                 | /   _________/
-  //   |    |               Phi[M] /        /
-  //   |    |                 |   /        /
-  //   |    |                 |  /        /
-  //   |  Region  . . .     Phi[M]  _____/
-  //   |    /                 |    /
-  //   |                      |   /
-  //   | . . .   . . .        |  /
-  //   | /                    | /
-  // Region           |  |  Phi[M]
-  //   |              |  |  / Bot
-  //    \            MergeMem
-  //     \            /
-  //    {MemBarCPUOrder}
-  //     MemBarVolatile
-  //
-  // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
-  // from the leading membar and the oopptr Mem slice from the Store
-  // into the card mark membar i.e. the memory flow to the card mark
-  // membar still looks like a normal graph.
-  //
-  // The trailing MergeMem merges an AliasIdxBot Mem slice with other
-  // Mem slices (from the StoreCM and other card mark queue stores).
-  // However in this case the AliasIdxBot Mem slice does not come
-  // direct from the card mark membar. It is merged through a series
-  // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
-  // from the leading membar with the Mem feed from the card mark
-  // membar. Each Phi corresponds to one of the Ifs which may skip
-  // around the card mark membar. So when the If implementing the NULL
-  // value check has been elided the total number of Phis is 2
-  // otherwise it is 3.
-  //
-  // The CAS graph when using G1GC also includes a pre-write subgraph
-  // and an optional post-write subgraph. The same variations are
-  // introduced as for CMS with conditional card marking i.e. the
-  // StoreP/N is swapped for a CompareAndSwapP/N with a following
-  // SCMemProj, the trailing MemBarVolatile for a MemBarCPUOrder +
-  // MemBarAcquire pair. There may be an extra If test introduced in
-  // the CAS case, when the boolean result of the CAS is tested by the
-  // caller. In that case an extra Region and AliasIdxBot Phi may be
-  // introduced before the MergeMem
-  //
-  // So, the upshot is that in all cases the subgraph will include a
-  // *normal* memory subgraph betwen the leading membar and its child
-  // membar: either a normal volatile put graph including a releasing
-  // StoreX and terminating with a trailing volatile membar or card
-  // mark volatile membar; or a normal CAS graph including a
-  // CompareAndSwapX + SCMemProj pair and terminating with a card mark
-  // volatile membar or a trailing cpu order and acquire membar
-  // pair. If the child membar is not a (volatile) card mark membar
-  // then it marks the end of the volatile put or CAS subgraph. If the
-  // child is a card mark membar then the normal subgraph will form
-  // part of a larger volatile put or CAS subgraph if and only if the
-  // child feeds an AliasIdxBot Mem feed to a trailing barrier via a
-  // MergeMem. That feed is either direct (for CMS) or via 2, 3 or 4
-  // Phi nodes merging the leading barrier memory flow (for G1).
-  //
-  // The predicates controlling generation of instructions for store
-  // and barrier nodes employ a few simple helper functions (described
-  // below) which identify the presence or absence of all these
-  // subgraph configurations and provide a means of traversing from
-  // one node in the subgraph to another.
   // is_CAS(int opcode)
   // return true if opcode is one of the possible CompareAndSwapX
@@ -1910,674 +1298,7 @@
   // traverse when searching from a card mark membar for the merge mem
   // feeding a trailing membar or vice versa
-  int max_phis()
-  {
-    if (UseG1GC) {
-      return 4;
-    } else if (UseConcMarkSweepGC && UseCondCardMark) {
-      return 1;
-    } else {
-      return 0;
-    }
-  }
-  // leading_to_normal
-  //
-  // graph traversal helper which detects the normal case Mem feed
-  // from a release membar (or, optionally, its cpuorder child) to a
-  // dependent volatile or acquire membar i.e. it ensures that one of
-  // the following 3 Mem flow subgraphs is present.
-  //
-  //   MemBarRelease
-  //  {MemBarCPUOrder} {leading}
-  //          |  \      . . .
-  //          |  StoreN/P[mo_release]  . . .
-  //          |   /
-  //         MergeMem
-  //          |
-  //  {MemBarCPUOrder}
-  //   MemBarVolatile {trailing or card mark}
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder {leading}
-  //          |  \      . . .
-  //          |  CompareAndSwapX  . . .
-  //          |   /
-  //         MergeMem
-  //          |
-  //   MemBarVolatile {card mark}
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder {leading}
-  //          |  \      . . .
-  //          |  CompareAndSwapX  . . .
-  //          |   /
-  //         MergeMem
-  //          |
-  //   MemBarCPUOrder
-  //   MemBarAcquire {trailing}
-  //
-  // if the correct configuration is present returns the trailing
-  // or cardmark membar otherwise NULL.
-  //
-  // the input membar is expected to be either a cpuorder membar or a
-  // release membar. in the latter case it should not have a cpu membar
-  // child.
-  //
-  // the returned value may be a card mark or trailing membar
-  //
-  MemBarNode *leading_to_normal(MemBarNode *leading)
-  {
-    assert((leading->Opcode() == Op_MemBarRelease ||
-	    leading->Opcode() == Op_MemBarCPUOrder),
-	   "expecting a volatile or cpuroder membar!");
-    // check the mem flow
-    ProjNode *mem = leading->proj_out(TypeFunc::Memory);
-    if (!mem) {
-      return NULL;
-    }
-    Node *x = NULL;
-    StoreNode * st = NULL;
-    LoadStoreNode *cas = NULL;
-    MergeMemNode *mm = NULL;
-    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
-      x = mem->fast_out(i);
-      if (x->is_MergeMem()) {
-	if (mm != NULL) {
-	  return NULL;
-	}
-	// two merge mems is one too many
-	mm = x->as_MergeMem();
-      } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
-	// two releasing stores/CAS nodes is one too many
-	if (st != NULL || cas != NULL) {
-	  return NULL;
-	}
-	st = x->as_Store();
-      } else if (is_CAS(x->Opcode())) {
-	if (st != NULL || cas != NULL) {
-	  return NULL;
-	}
-	cas = x->as_LoadStore();
-      }
-    }
-    // must have a store or a cas
-    if (!st && !cas) {
-      return NULL;
-    }
-    // must have a merge
-    if (!mm) {
-      return NULL;
-    }
-    Node *feed = NULL;
-    if (cas) {
-      // look for an SCMemProj
-      for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
-	x = cas->fast_out(i);
-        if (x->Opcode() == Op_SCMemProj) {
-	  feed = x;
-	  break;
-	}
-      }
-      if (feed == NULL) {
-	return NULL;
-      }
-    } else {
-      feed = st;
-    }
-    // ensure the feed node feeds the existing mergemem;
-    for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
-      x = feed->fast_out(i);
-      if (x == mm) {
-        break;
-      }
-    }
-    if (x != mm) {
-      return NULL;
-    }
-    MemBarNode *mbar = NULL;
-    // ensure the merge feeds to the expected type of membar
-    for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
-      x = mm->fast_out(i);
-      if (x->is_MemBar()) {
-        if (x->Opcode() == Op_MemBarCPUOrder) {
-          // with a store any cpu order membar should precede a
-          // trailing volatile membar. with a cas it should precede a
-          // trailing acquire membar. in either case try to skip to
-          // that next membar
-	  MemBarNode *y =  x->as_MemBar();
-	  y = child_membar(y);
-	  if (y != NULL) {
-            // skip to this new membar to do the check
-	    x = y;
-	  }
-        }
-	if (x->Opcode() == Op_MemBarVolatile) {
-	  mbar = x->as_MemBar();
-          // for a volatile store this can be either a trailing membar
-          // or a card mark membar. for a cas it must be a card mark
-          // membar
-          guarantee(cas == NULL || is_card_mark_membar(mbar),
-                    "in CAS graph volatile membar must be a card mark");
-	} else if (cas != NULL && x->Opcode() == Op_MemBarAcquire) {
-	  mbar = x->as_MemBar();
-	}
-	break;
-      }
-    }
-    return mbar;
-  }
-  // normal_to_leading
-  //
-  // graph traversal helper which detects the normal case Mem feed
-  // from either a card mark or a trailing membar to a preceding
-  // release membar (optionally its cpuorder child) i.e. it ensures
-  // that one of the following 3 Mem flow subgraphs is present.
-  //
-  //   MemBarRelease
-  //  {MemBarCPUOrder} {leading}
-  //          |  \      . . .
-  //          |  StoreN/P[mo_release]  . . .
-  //          |   /
-  //         MergeMem
-  //          |
-  //  {MemBarCPUOrder}
-  //   MemBarVolatile {trailing or card mark}
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder {leading}
-  //          |  \      . . .
-  //          |  CompareAndSwapX  . . .
-  //          |   /
-  //         MergeMem
-  //          |
-  //   MemBarVolatile {card mark}
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder {leading}
-  //          |  \      . . .
-  //          |  CompareAndSwapX  . . .
-  //          |   /
-  //         MergeMem
-  //          |
-  //   MemBarCPUOrder
-  //   MemBarAcquire {trailing}
-  //
-  // this predicate checks for the same flow as the previous predicate
-  // but starting from the bottom rather than the top.
-  //
-  // if the configuration is present returns the cpuorder member for
-  // preference or when absent the release membar otherwise NULL.
-  //
-  // n.b. the input membar is expected to be a MemBarVolatile but
-  // need not be a card mark membar.
-  MemBarNode *normal_to_leading(const MemBarNode *barrier)
-  {
-    // input must be a volatile membar
-    assert((barrier->Opcode() == Op_MemBarVolatile ||
-	    barrier->Opcode() == Op_MemBarAcquire),
-	   "expecting a volatile or an acquire membar");
-    bool barrier_is_acquire = barrier->Opcode() == Op_MemBarAcquire;
-    // if we have an intervening cpu order membar then start the
-    // search from it
-    Node *x = parent_membar(barrier);
-    if (x == NULL) {
-      // stick with the original barrier
-      x = (Node *)barrier;
-    } else if (x->Opcode() != Op_MemBarCPUOrder) {
-      // any other barrier means this is not the graph we want
-      return NULL;
-    }
-    // the Mem feed to the membar should be a merge
-    x = x ->in(TypeFunc::Memory);
-    if (!x->is_MergeMem())
-      return NULL;
-    MergeMemNode *mm = x->as_MergeMem();
-    // the merge should get its Bottom mem feed from the leading membar
-    x = mm->in(Compile::AliasIdxBot);
-    // ensure this is a non control projection
-    if (!x->is_Proj() || x->is_CFG()) {
-      return NULL;
-    }
-    // if it is fed by a membar that's the one we want
-    x = x->in(0);
-    if (!x->is_MemBar()) {
-      return NULL;
-    }
-    MemBarNode *leading = x->as_MemBar();
-    // reject invalid candidates
-    if (!leading_membar(leading)) {
-      return NULL;
-    }
-    // ok, we have a leading membar, now for the sanity clauses
-    // the leading membar must feed Mem to a releasing store or CAS
-    ProjNode *mem = leading->proj_out(TypeFunc::Memory);
-    StoreNode *st = NULL;
-    LoadStoreNode *cas = NULL;
-    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
-      x = mem->fast_out(i);
-      if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
-	// two stores or CASes is one too many
-	if (st != NULL || cas != NULL) {
-	  return NULL;
-	}
-	st = x->as_Store();
-      } else if (is_CAS(x->Opcode())) {
-	if (st != NULL || cas != NULL) {
-	  return NULL;
-	}
-	cas = x->as_LoadStore();
-      }
-    }
-    // we cannot have both a store and a cas
-    if (st == NULL && cas == NULL) {
-      // we have neither -- this is not a normal graph
-      return NULL;
-    }
-    if (st == NULL) {
-      // if we started from a volatile membar and found a CAS then the
-      // original membar ought to be for a card mark
-      guarantee((barrier_is_acquire || is_card_mark_membar(barrier)),
-                "unexpected volatile barrier (i.e. not card mark) in CAS graph");
-      // check that the CAS feeds the merge we used to get here via an
-      // intermediary SCMemProj
-      Node *scmemproj = NULL;
-      for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
-        x = cas->fast_out(i);
-        if (x->Opcode() == Op_SCMemProj) {
-          scmemproj = x;
-          break;
-        }
-      }
-      if (scmemproj == NULL) {
-        return NULL;
-      }
-      for (DUIterator_Fast imax, i = scmemproj->fast_outs(imax); i < imax; i++) {
-        x = scmemproj->fast_out(i);
-        if (x == mm) {
-          return leading;
-        }
-      }
-    } else {
-      // we should not have found a store if we started from an acquire
-      guarantee(!barrier_is_acquire,
-                "unexpected trailing acquire barrier in volatile store graph");
-      // the store should feed the merge we used to get here
-      for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
-	if (st->fast_out(i) == mm) {
-	  return leading;
-	}
-      }
-    }
-    return NULL;
-  }
-  // card_mark_to_trailing
-  //
-  // graph traversal helper which detects extra, non-normal Mem feed
-  // from a card mark volatile membar to a trailing membar i.e. it
-  // ensures that one of the following three GC post-write Mem flow
-  // subgraphs is present.
-  //
-  // 1)
-  //     . . .
-  //       |
-  //   MemBarVolatile (card mark)
-  //      |          |
-  //      |        StoreCM
-  //      |          |
-  //      |        . . .
-  //  Bot |  /
-  //   MergeMem
-  //      |
-  //   {MemBarCPUOrder}            OR  MemBarCPUOrder
-  //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
-  //                                 
-  //
-  // 2)
-  //   MemBarRelease/CPUOrder (leading)
-  //    |
-  //    |
-  //    |\       . . .
-  //    | \        |
-  //    |  \  MemBarVolatile (card mark)
-  //    |   \   |     |
-  //     \   \  |   StoreCM    . . .
-  //      \   \ |
-  //       \  Phi
-  //        \ /
-  //        Phi  . . .
-  //     Bot |   /
-  //       MergeMem
-  //         |
-  //   {MemBarCPUOrder}            OR  MemBarCPUOrder
-  //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
-  //
-  // 3)
-  //   MemBarRelease/CPUOrder (leading)
-  //    |
-  //    |\
-  //    | \
-  //    |  \      . . .
-  //    |   \       |
-  //    |\   \  MemBarVolatile (card mark)
-  //    | \   \   |     |
-  //    |  \   \  |   StoreCM    . . .
-  //    |   \   \ |
-  //     \   \  Phi
-  //      \   \ /
-  //       \  Phi
-  //        \ /
-  //        Phi  . . .
-  //     Bot |   /
-  //       MergeMem
-  //         |
-  //         |
-  //   {MemBarCPUOrder}            OR  MemBarCPUOrder
-  //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
-  //
-  // 4)
-  //   MemBarRelease/CPUOrder (leading)
-  //    |
-  //    |\
-  //    | \
-  //    |  \
-  //    |   \
-  //    |\   \
-  //    | \   \
-  //    |  \   \        . . .
-  //    |   \   \         |
-  //    |\   \   \   MemBarVolatile (card mark)
-  //    | \   \   \   /   |
-  //    |  \   \   \ /  StoreCM    . . .
-  //    |   \   \  Phi
-  //     \   \   \ /
-  //      \   \  Phi
-  //       \   \ /
-  //        \  Phi
-  //         \ /
-  //         Phi  . . .
-  //      Bot |   /
-  //       MergeMem
-  //          |
-  //          |
-  //    MemBarCPUOrder
-  //    MemBarAcquire {trailing}
-  //
-  // configuration 1 is only valid if UseConcMarkSweepGC &&
-  // UseCondCardMark
-  //
-  // configuration 2, is only valid if UseConcMarkSweepGC &&
-  // UseCondCardMark or if UseG1GC
-  //
-  // configurations 3 and 4 are only valid if UseG1GC.
-  //
-  // if a valid configuration is present returns the trailing membar
-  // otherwise NULL.
-  //
-  // n.b. the supplied membar is expected to be a card mark
-  // MemBarVolatile i.e. the caller must ensure the input node has the
-  // correct operand and feeds Mem to a StoreCM node
-  MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
-  {
-    // input must be a card mark volatile membar
-    assert(is_card_mark_membar(barrier), "expecting a card mark membar");
-    Node *feed = barrier->proj_out(TypeFunc::Memory);
-    Node *x;
-    MergeMemNode *mm = NULL;
-    const int MAX_PHIS = max_phis(); // max phis we will search through
-    int phicount = 0;                // current search count
-    bool retry_feed = true;
-    while (retry_feed) {
-      // see if we have a direct MergeMem feed
-      for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
-	x = feed->fast_out(i);
-	// the correct Phi will be merging a Bot memory slice
-	if (x->is_MergeMem()) {
-	  mm = x->as_MergeMem();
-	  break;
-	}
-      }
-      if (mm) {
-	retry_feed = false;
-      } else if (phicount++ < MAX_PHIS) {
-	// the barrier may feed indirectly via one or two Phi nodes
-	PhiNode *phi = NULL;
-	for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
-	  x = feed->fast_out(i);
-	  // the correct Phi will be merging a Bot memory slice
-	  if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
-	    phi = x->as_Phi();
-	    break;
-	  }
-	}
-	if (!phi) {
-	  return NULL;
-	}
-	// look for another merge below this phi
-	feed = phi;
-      } else {
-	// couldn't find a merge
-	return NULL;
-      }
-    }
-    // sanity check this feed turns up as the expected slice
-    guarantee(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
-    MemBarNode *trailing = NULL;
-    // be sure we have a trailing membar fed by the merge
-    for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
-      x = mm->fast_out(i);
-      if (x->is_MemBar()) {
-        // if this is an intervening cpu order membar skip to the
-        // following membar
-        if (x->Opcode() == Op_MemBarCPUOrder) {
-          MemBarNode *y =  x->as_MemBar();
-          y = child_membar(y);
-          if (y != NULL) {
-            x = y;
-          }
-        }
-        if (x->Opcode() == Op_MemBarVolatile ||
-            x->Opcode() == Op_MemBarAcquire) {
-          trailing = x->as_MemBar();
-        }
-        break;
-      }
-    }
-    return trailing;
-  }
-  // trailing_to_card_mark
-  //
-  // graph traversal helper which detects extra, non-normal Mem feed
-  // from a trailing volatile membar to a preceding card mark volatile
-  // membar i.e. it identifies whether one of the three possible extra
-  // GC post-write Mem flow subgraphs is present
-  //
-  // this predicate checks for the same flow as the previous predicate
-  // but starting from the bottom rather than the top.
-  //
-  // if the configuration is present returns the card mark membar
-  // otherwise NULL
-  //
-  // n.b. the supplied membar is expected to be a trailing
-  // MemBarVolatile or MemBarAcquire i.e. the caller must ensure the
-  // input node has the correct opcode
-  MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
-  {
-    assert(trailing->Opcode() == Op_MemBarVolatile ||
-           trailing->Opcode() == Op_MemBarAcquire,
-	   "expecting a volatile or acquire membar");
-    assert(!is_card_mark_membar(trailing),
-	   "not expecting a card mark membar");
-    Node *x = (Node *)trailing;
-    // look for a preceding cpu order membar
-    MemBarNode *y = parent_membar(x->as_MemBar());
-    if (y != NULL) {
-      // make sure it is a cpu order membar
-      if (y->Opcode() != Op_MemBarCPUOrder) {
-        // this is nto the graph we were looking for
-        return NULL;
-      }
-      // start the search from here
-      x = y;
-    }
-    // the Mem feed to the membar should be a merge
-    x = x->in(TypeFunc::Memory);
-    if (!x->is_MergeMem()) {
-      return NULL;
-    }
-    MergeMemNode *mm = x->as_MergeMem();
-    x = mm->in(Compile::AliasIdxBot);
-    // with G1 we may possibly see a Phi or two before we see a Memory
-    // Proj from the card mark membar
-    const int MAX_PHIS = max_phis(); // max phis we will search through
-    int phicount = 0;                    // current search count
-    bool retry_feed = !x->is_Proj();
-    while (retry_feed) {
-      if (x->is_Phi() && phicount++ < MAX_PHIS) {
-	PhiNode *phi = x->as_Phi();
-	ProjNode *proj = NULL;
-	PhiNode *nextphi = NULL;
-	bool found_leading = false;
-	for (uint i = 1; i < phi->req(); i++) {
-	  x = phi->in(i);
-	  if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
-	    nextphi = x->as_Phi();
-	  } else if (x->is_Proj()) {
-	    int opcode = x->in(0)->Opcode();
-	    if (opcode == Op_MemBarVolatile) {
-	      proj = x->as_Proj();
-	    } else if (opcode == Op_MemBarRelease ||
-		       opcode == Op_MemBarCPUOrder) {
-	      // probably a leading membar
-	      found_leading = true;
-	    }
-	  }
-	}
-	// if we found a correct looking proj then retry from there
-	// otherwise we must see a leading and a phi or this the
-	// wrong config
-	if (proj != NULL) {
-	  x = proj;
-	  retry_feed = false;
-	} else if (found_leading && nextphi != NULL) {
-	  // retry from this phi to check phi2
-	  x = nextphi;
-	} else {
-	  // not what we were looking for
-	  return NULL;
-	}
-      } else {
-	return NULL;
-      }
-    }
-    // the proj has to come from the card mark membar
-    x = x->in(0);
-    if (!x->is_MemBar()) {
-      return NULL;
-    }
-    MemBarNode *card_mark_membar = x->as_MemBar();
-    if (!is_card_mark_membar(card_mark_membar)) {
-      return NULL;
-    }
-    return card_mark_membar;
-  }
-  // trailing_to_leading
-  //
-  // graph traversal helper which checks the Mem flow up the graph
-  // from a (non-card mark) trailing membar attempting to locate and
-  // return an associated leading membar. it first looks for a
-  // subgraph in the normal configuration (relying on helper
-  // normal_to_leading). failing that it then looks for one of the
-  // possible post-write card mark subgraphs linking the trailing node
-  // to a the card mark membar (relying on helper
-  // trailing_to_card_mark), and then checks that the card mark membar
-  // is fed by a leading membar (once again relying on auxiliary
-  // predicate normal_to_leading).
-  //
-  // if the configuration is valid returns the cpuorder member for
-  // preference or when absent the release membar otherwise NULL.
-  //
-  // n.b. the input membar is expected to be either a volatile or
-  // acquire membar but in the former case must *not* be a card mark
-  // membar.
-  MemBarNode *trailing_to_leading(const MemBarNode *trailing)
-  {
-    assert((trailing->Opcode() == Op_MemBarAcquire ||
-	    trailing->Opcode() == Op_MemBarVolatile),
-	   "expecting an acquire or volatile membar");
-    assert((trailing->Opcode() != Op_MemBarVolatile ||
-	    !is_card_mark_membar(trailing)),
-	   "not expecting a card mark membar");
-    MemBarNode *leading = normal_to_leading(trailing);
-    if (leading) {
-      return leading;
-    }
-    // there is no normal path from trailing to leading membar. see if
-    // we can arrive via a card mark membar
-    MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
-    if (!card_mark_membar) {
-      return NULL;
-    }
-    return normal_to_leading(card_mark_membar);
-  }
-  // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
+// predicates controlling emit of ldr<x>/ldar<x> and associated dmb
 bool unnecessary_acquire(const Node *barrier)
@@ -2588,40 +1309,19 @@
     return false;
-  // a volatile read derived from bytecode (or also from an inlined
-  // SHA field read via LibraryCallKit::load_field_from_object)
-  // manifests as a LoadX[mo_acquire] followed by an acquire membar
-  // with a bogus read dependency on it's preceding load. so in those
-  // cases we will find the load node at the PARMS offset of the
-  // acquire membar.  n.b. there may be an intervening DecodeN node.
-  Node *x = barrier->lookup(TypeFunc::Parms);
-  if (x) {
-    // we are starting from an acquire and it has a fake dependency
-    //
-    // need to check for
-    //
-    //   LoadX[mo_acquire]
-    //   {  |1   }
-    //   {DecodeN}
-    //      |Parms
-    //   MemBarAcquire*
-    //
-    // where * tags node we were passed
-    // and |k means input k
-    if (x->is_DecodeNarrowPtr()) {
-      x = x->in(1);
-    }
-    return (x->is_Load() && x->as_Load()->is_acquire());
+  MemBarNode* mb = barrier->as_MemBar();
+  if (mb->trailing_load()) {
+    return true;
-  // other option for unnecessary membar is that it is a trailing node
-  // belonging to a CAS
-  MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
-  return leading != NULL;
+  if (mb->trailing_load_store()) {
+    Node* load_store = mb->in(MemBarNode::Precedent);
+    assert(load_store->is_LoadStore(), "unexpected graph shape");
+    return is_CAS(load_store->Opcode());
+  }
+  return false;
 bool needs_acquiring_load(const Node *n)
@@ -2634,45 +1334,7 @@
   LoadNode *ld = n->as_Load();
-  if (!ld->is_acquire()) {
-    return false;
-  }
-  // check if this load is feeding an acquire membar
-  //
-  //   LoadX[mo_acquire]
-  //   {  |1   }
-  //   {DecodeN}
-  //      |Parms
-  //   MemBarAcquire*
-  //
-  // where * tags node we were passed
-  // and |k means input k
-  Node *start = ld;
-  Node *mbacq = NULL;
-  // if we hit a DecodeNarrowPtr we reset the start node and restart
-  // the search through the outputs
- restart:
-  for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
-    Node *x = start->fast_out(i);
-    if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
-      mbacq = x;
-    } else if (!mbacq &&
-	       (x->is_DecodeNarrowPtr() ||
-		(x->is_Mach() && x->Opcode() == Op_DecodeN))) {
-      start = x;
-      goto restart;
-    }
-  }
-  if (mbacq) {
-    return true;
-  }
-  return false;
+  return ld->is_acquire();
 bool unnecessary_release(const Node *n)
@@ -2686,32 +1348,27 @@
     return false;
-  // if there is a dependent CPUOrder barrier then use that as the
-  // leading
   MemBarNode *barrier = n->as_MemBar();
-  // check for an intervening cpuorder membar
-  MemBarNode *b = child_membar(barrier);
-  if (b && b->Opcode() == Op_MemBarCPUOrder) {
-    // ok, so start the check from the dependent cpuorder barrier
-    barrier = b;
+  if (!barrier->leading()) {
+    return false;
+  } else {
+    Node* trailing = barrier->trailing_membar();
+    MemBarNode* trailing_mb = trailing->as_MemBar();
+    assert(trailing_mb->trailing(), "Not a trailing membar?");
+    assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
+    Node* mem = trailing_mb->in(MemBarNode::Precedent);
+    if (mem->is_Store()) {
+      assert(mem->as_Store()->is_release(), "");
+      assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
+      return true;
+    } else {
+      assert(mem->is_LoadStore(), "");
+      assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
+      return is_CAS(mem->Opcode());
+    }
-  // must start with a normal feed
-  MemBarNode *child_barrier = leading_to_normal(barrier);
-  if (!child_barrier) {
-    return false;
-  }
-  if (!is_card_mark_membar(child_barrier)) {
-    // this is the trailing membar and we are done
-    return true;
-  }
-  // must be sure this card mark feeds a trailing membar
-  MemBarNode *trailing = card_mark_to_trailing(child_barrier);
-  return (trailing != NULL);
+  return false;
 bool unnecessary_volatile(const Node *n)
@@ -2724,17 +1381,18 @@
   MemBarNode *mbvol = n->as_MemBar();
-  // first we check if this is part of a card mark. if so then we have
-  // to generate a StoreLoad barrier
-  if (is_card_mark_membar(mbvol)) {
-      return false;
+  bool release = mbvol->trailing_store();
+  assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
+#ifdef ASSERT
+  if (release) {
+    Node* leading = mbvol->leading_membar();
+    assert(leading->Opcode() == Op_MemBarRelease, "");
+    assert(leading->as_MemBar()->leading_store(), "");
+    assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
-  // ok, if it's not a card mark then we still need to check if it is
-  // a trailing membar of a volatile put graph.
-  return (trailing_to_leading(mbvol) != NULL);
+  return release;
 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
@@ -2749,53 +1407,7 @@
   StoreNode *st = n->as_Store();
-  // the store must be marked as releasing
-  if (!st->is_release()) {
-    return false;
-  }
-  // the store must be fed by a membar
-  Node *x = st->lookup(StoreNode::Memory);
-  if (! x || !x->is_Proj()) {
-    return false;
-  }
-  ProjNode *proj = x->as_Proj();
-  x = proj->lookup(0);
-  if (!x || !x->is_MemBar()) {
-    return false;
-  }
-  MemBarNode *barrier = x->as_MemBar();
-  // if the barrier is a release membar or a cpuorder mmebar fed by a
-  // release membar then we need to check whether that forms part of a
-  // volatile put graph.
-  // reject invalid candidates
-  if (!leading_membar(barrier)) {
-    return false;
-  }
-  // does this lead a normal subgraph?
-  MemBarNode *mbvol = leading_to_normal(barrier);
-  if (!mbvol) {
-    return false;
-  }
-  // all done unless this is a card mark
-  if (!is_card_mark_membar(mbvol)) {
-    return true;
-  }
-  // we found a card mark -- just make sure we have a trailing barrier
-  return (card_mark_to_trailing(mbvol) != NULL);
+  return st->trailing_membar() != NULL;
 // predicate controlling translation of CAS
@@ -2809,48 +1421,9 @@
     return false;
-  // CAS nodes only ought to turn up in inlined unsafe CAS operations
-#ifdef ASSERT
-  LoadStoreNode *st = n->as_LoadStore();
-  // the store must be fed by a membar
-  Node *x = st->lookup(StoreNode::Memory);
-  assert (x && x->is_Proj(), "CAS not fed by memory proj!");
-  ProjNode *proj = x->as_Proj();
-  x = proj->lookup(0);
-  assert (x && x->is_MemBar(), "CAS not fed by membar!");
-  MemBarNode *barrier = x->as_MemBar();
-  // the barrier must be a cpuorder mmebar fed by a release membar
-  guarantee(barrier->Opcode() == Op_MemBarCPUOrder,
-            "CAS not fed by cpuorder membar!");
-  MemBarNode *b = parent_membar(barrier);
-  assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
-	  "CAS not fed by cpuorder+release membar pair!");
-  // does this lead a normal subgraph?
-  MemBarNode *mbar = leading_to_normal(barrier);
-  guarantee(mbar != NULL, "CAS not embedded in normal graph!");
-  // if this is a card mark membar check we have a trailing acquire
-  if (is_card_mark_membar(mbar)) {
-    mbar = card_mark_to_trailing(mbar);
-  }
-  guarantee(mbar != NULL, "card mark membar for CAS not embedded in normal graph!");
-  guarantee(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
-#endif // ASSERT
+  LoadStoreNode* ldst = n->as_LoadStore();
+  assert(ldst->trailing_membar() != NULL, "expected trailing membar");
   // so we can just return true here
   return true;
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp	Wed Aug 22 15:10:40 2018 +0800
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp	Tue Aug 14 16:54:47 2018 +0200
@@ -119,10 +119,11 @@
 class C2AccessFence: public StackObj {
   C2Access& _access;
+  Node* _leading_membar;
   C2AccessFence(C2Access& access) :
-    _access(access) {
+    _access(access), _leading_membar(NULL) {
     GraphKit* kit = access.kit();
     DecoratorSet decorators = access.decorators();
@@ -139,12 +140,12 @@
       // into actual barriers on most machines, but we still need rest of
       // compiler to respect ordering.
       if (is_release) {
-        kit->insert_mem_bar(Op_MemBarRelease);
+        _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
       } else if (is_volatile) {
         if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-          kit->insert_mem_bar(Op_MemBarVolatile);
+          _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
         } else {
-          kit->insert_mem_bar(Op_MemBarRelease);
+          _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
     } else if (is_write) {
@@ -152,7 +153,7 @@
       // floating down past the volatile write.  Also prevents commoning
       // another volatile read.
       if (is_volatile || is_release) {
-        kit->insert_mem_bar(Op_MemBarRelease);
+        _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
     } else {
       // Memory barrier to prevent normal and 'unsafe' accesses from
@@ -161,7 +162,7 @@
       // so there's no problems making a strong assert about mixing users
       // of safe & unsafe memory.
       if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) {
-        kit->insert_mem_bar(Op_MemBarVolatile);
+        _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
@@ -196,20 +197,30 @@
     if (is_atomic) {
       if (is_acquire || is_volatile) {
-        kit->insert_mem_bar(Op_MemBarAcquire);
+        Node* n = _access.raw_access();
+        Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
+        if (_leading_membar != NULL) {
+          MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
+        }
     } else if (is_write) {
       // If not multiple copy atomic, we do the MemBarVolatile before the load.
       if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) {
-        kit->insert_mem_bar(Op_MemBarVolatile); // Use fat membar
+        Node* n = _access.raw_access();
+        Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar
+        if (_leading_membar != NULL) {
+          MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
+        }
     } else {
       if (is_volatile || is_acquire) {
-        kit->insert_mem_bar(Op_MemBarAcquire, _access.raw_access());
+        Node* n = _access.raw_access();
+        assert(_leading_membar == NULL || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected");
+        Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
+        mb->as_MemBar()->set_trailing_load();
 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const {
--- a/src/hotspot/share/opto/compile.cpp	Wed Aug 22 15:10:40 2018 +0800
+++ b/src/hotspot/share/opto/compile.cpp	Tue Aug 14 16:54:47 2018 +0200
@@ -2767,6 +2767,17 @@
             "raw memory operations should have control edge");
+  if (n->is_MemBar()) {
+    MemBarNode* mb = n->as_MemBar();
+    if (mb->trailing_store() || mb->trailing_load_store()) {
+      assert(mb->leading_membar()->trailing_membar() == mb, "bad membar pair");
+      Node* mem = mb->in(MemBarNode::Precedent);
+      assert((mb->trailing_store() && mem->is_Store() && mem->as_Store()->is_release()) ||
+             (mb->trailing_load_store() && mem->is_LoadStore()), "missing mem op");
+    } else if (mb->leading()) {
+      assert(mb->trailing_membar()->leading_membar() == mb, "bad membar pair");
+    }
+  }
   // Count FPU ops and common calls, implements item (3)
   switch( nop ) {
--- a/src/hotspot/share/opto/memnode.cpp	Wed Aug 22 15:10:40 2018 +0800
+++ b/src/hotspot/share/opto/memnode.cpp	Tue Aug 14 16:54:47 2018 +0200
@@ -2521,45 +2521,63 @@
   Node* adr = in(MemNode::Address);
   Node* val = in(MemNode::ValueIn);
+  Node* result = this;
   // Load then Store?  Then the Store is useless
   if (val->is_Load() &&
       val->in(MemNode::Address)->eqv_uncast(adr) &&
       val->in(MemNode::Memory )->eqv_uncast(mem) &&
       val->as_Load()->store_Opcode() == Opcode()) {
-    return mem;
+    result = mem;
   // Two stores in a row of the same value?
-  if (mem->is_Store() &&
+  if (result == this &&
+      mem->is_Store() &&
       mem->in(MemNode::Address)->eqv_uncast(adr) &&
       mem->in(MemNode::ValueIn)->eqv_uncast(val) &&
       mem->Opcode() == Opcode()) {
-    return mem;
+    result = mem;
   // Store of zero anywhere into a freshly-allocated object?
   // Then the store is useless.
   // (It must already have been captured by the InitializeNode.)
-  if (ReduceFieldZeroing && phase->type(val)->is_zero_type()) {
+  if (result == this &&
+      ReduceFieldZeroing && phase->type(val)->is_zero_type()) {
     // a newly allocated object is already all-zeroes everywhere
     if (mem->is_Proj() && mem->in(0)->is_Allocate()) {
-      return mem;
+      result = mem;
-    // the store may also apply to zero-bits in an earlier object
-    Node* prev_mem = find_previous_store(phase);
-    // Steps (a), (b):  Walk past independent stores to find an exact match.
-    if (prev_mem != NULL) {
-      Node* prev_val = can_see_stored_value(prev_mem, phase);
-      if (prev_val != NULL && phase->eqv(prev_val, val)) {
-        // prev_val and val might differ by a cast; it would be good
-        // to keep the more informative of the two.
-        return mem;
+    if (result == this) {
+      // the store may also apply to zero-bits in an earlier object
+      Node* prev_mem = find_previous_store(phase);
+      // Steps (a), (b):  Walk past independent stores to find an exact match.
+      if (prev_mem != NULL) {
+        Node* prev_val = can_see_stored_value(prev_mem, phase);
+        if (prev_val != NULL && phase->eqv(prev_val, val)) {
+          // prev_val and val might differ by a cast; it would be good
+          // to keep the more informative of the two.
+          result = mem;
+        }
-  return this;
+  if (result != this && phase->is_IterGVN() != NULL) {
+    MemBarNode* trailing = trailing_membar();
+    if (trailing != NULL) {
+#ifdef ASSERT
+      const TypeOopPtr* t_oop = phase->type(in(Address))->isa_oopptr();
+      assert(t_oop == NULL || t_oop->is_known_instance_field(), "only for non escaping objects");
+      PhaseIterGVN* igvn = phase->is_IterGVN();
+      trailing->remove(igvn);
+    }
+  }
+  return result;
@@ -2637,6 +2655,33 @@
   return true;
+MemBarNode* StoreNode::trailing_membar() const {
+  if (is_release()) {
+    MemBarNode* trailing_mb = NULL;
+    for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
+      Node* u = fast_out(i);
+      if (u->is_MemBar()) {
+        if (u->as_MemBar()->trailing_store()) {
+          assert(u->Opcode() == Op_MemBarVolatile, "");
+          assert(trailing_mb == NULL, "only one");
+          trailing_mb = u->as_MemBar();
+#ifdef ASSERT
+          Node* leading = u->as_MemBar()->leading_membar();
+          assert(leading->Opcode() == Op_MemBarRelease, "incorrect membar");
+          assert(leading->as_MemBar()->leading_store(), "incorrect membar pair");
+          assert(leading->as_MemBar()->trailing_membar() == u, "incorrect membar pair");
+        } else {
+          assert(u->as_MemBar()->standalone(), "");
+        }
+      }
+    }
+    return trailing_mb;
+  }
+  return NULL;
 // If the store is from an AND mask that leaves the low bits untouched, then
@@ -2749,6 +2794,30 @@
   return true;
+MemBarNode* LoadStoreNode::trailing_membar() const {
+  MemBarNode* trailing = NULL;
+  for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
+    Node* u = fast_out(i);
+    if (u->is_MemBar()) {
+      if (u->as_MemBar()->trailing_load_store()) {
+        assert(u->Opcode() == Op_MemBarAcquire, "");
+        assert(trailing == NULL, "only one");
+        trailing = u->as_MemBar();
+#ifdef ASSERT
+        Node* leading = trailing->leading_membar();
+        assert(support_IRIW_for_not_multiple_copy_atomic_cpu || leading->Opcode() == Op_MemBarRelease, "incorrect membar");
+        assert(leading->as_MemBar()->leading_load_store(), "incorrect membar pair");
+        assert(leading->as_MemBar()->trailing_membar() == trailing, "incorrect membar pair");
+      } else {
+        assert(u->as_MemBar()->standalone(), "wrong barrier kind");
+      }
+    }
+  }
+  return trailing;
 uint LoadStoreNode::size_of() const { return sizeof(*this); }
@@ -2934,7 +3003,10 @@
 MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
   : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)),
-    _adr_type(C->get_adr_type(alias_idx))
+    _adr_type(C->get_adr_type(alias_idx)), _kind(Standalone)
+#ifdef ASSERT
+  , _pair_idx(0)
   Node* top = C->top();
@@ -2969,6 +3041,21 @@
+void MemBarNode::remove(PhaseIterGVN *igvn) {
+  if (outcnt() != 2) {
+    return;
+  }
+  if (trailing_store() || trailing_load_store()) {
+    MemBarNode* leading = leading_membar();
+    if (leading != NULL) {
+      assert(leading->trailing_membar() == this, "inconsistent leading/trailing membars");
+      leading->remove(igvn);
+    }
+  }
+  igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory));
+  igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control));
 // Return a node which is more "ideal" than the current node.  Strip out
 // control copies
@@ -3035,8 +3122,7 @@
     if (eliminate) {
       // Replace MemBar projections by its inputs.
       PhaseIterGVN* igvn = phase->is_IterGVN();
-      igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory));
-      igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control));
+      remove(igvn);
       // Must return either the original node (now dead) or a new node
       // (Do not return a top here, since that would break the uniqueness of top.)
       return new ConINode(TypeInt::ZERO);
@@ -3065,6 +3151,98 @@
   return NULL;
+void MemBarNode::set_store_pair(MemBarNode* leading, MemBarNode* trailing) {
+  trailing->_kind = TrailingStore;
+  leading->_kind = LeadingStore;
+#ifdef ASSERT
+  trailing->_pair_idx = leading->_idx;
+  leading->_pair_idx = leading->_idx;
+void MemBarNode::set_load_store_pair(MemBarNode* leading, MemBarNode* trailing) {
+  trailing->_kind = TrailingLoadStore;
+  leading->_kind = LeadingLoadStore;
+#ifdef ASSERT
+  trailing->_pair_idx = leading->_idx;
+  leading->_pair_idx = leading->_idx;
+MemBarNode* MemBarNode::trailing_membar() const {
+  Node* trailing = (Node*)this;
+  VectorSet seen(Thread::current()->resource_area());
+  while (!trailing->is_MemBar() || !trailing->as_MemBar()->trailing()) {
+    if (seen.test_set(trailing->_idx)) {
+      // Dying subgraph?
+      return NULL;
+    }
+    for (DUIterator_Fast jmax, j = trailing->fast_outs(jmax); j < jmax; j++) {
+      Node* next = trailing->fast_out(j);
+      if (next != trailing && next->is_CFG()) {
+        trailing = next;
+        break;
+      }
+    }
+  }
+  MemBarNode* mb = trailing->as_MemBar();
+  assert((mb->_kind == TrailingStore && _kind == LeadingStore) ||
+         (mb->_kind == TrailingLoadStore && _kind == LeadingLoadStore), "bad trailing membar");
+  assert(mb->_pair_idx == _pair_idx, "bad trailing membar");
+  return mb;
+MemBarNode* MemBarNode::leading_membar() const {
+  VectorSet seen(Thread::current()->resource_area());
+  Node* leading = in(0);
+  while (leading != NULL && (!leading->is_MemBar() || !leading->as_MemBar()->leading())) {
+    if (seen.test_set(leading->_idx)) {
+      // Dying subgraph?
+      return NULL;
+    }
+    if (leading->is_Region()) {
+      leading = leading->in(1);
+    } else {
+      leading = leading->in(0);
+    }
+  }
+#ifdef ASSERT
+  Unique_Node_List wq;
+  wq.push((Node*)this);
+  uint found = 0;
+  for (uint i = 0; i < wq.size(); i++) {
+    Node* n = wq.at(i);
+    if (n->is_Region()) {
+      for (uint j = 1; j < n->req(); j++) {
+        Node* in = n->in(j);
+        if (in != NULL && !in->is_top()) {
+          wq.push(in);
+        }
+      }
+    } else {
+      if (n->is_MemBar() && n->as_MemBar()->leading()) {
+        assert(n == leading, "consistency check failed");
+        found++;
+      } else {
+        Node* in = n->in(0);
+        if (in != NULL && !in->is_top()) {
+          wq.push(in);
+        }
+      }
+    }
+  }
+  assert(found == 1 || (found == 0 && leading == NULL), "consistency check failed");
+  if (leading == NULL) {
+    return NULL;
+  }
+  MemBarNode* mb = leading->as_MemBar();
+  assert((mb->_kind == LeadingStore && _kind == TrailingStore) ||
+         (mb->_kind == LeadingLoadStore && _kind == TrailingLoadStore), "bad leading membar");
+  assert(mb->_pair_idx == _pair_idx, "bad leading membar");
+  return mb;
 // This node acts as a memory barrier on raw memory, after some raw stores.
--- a/src/hotspot/share/opto/memnode.hpp	Wed Aug 22 15:10:40 2018 +0800
+++ b/src/hotspot/share/opto/memnode.hpp	Tue Aug 14 16:54:47 2018 +0200
@@ -607,6 +607,8 @@
   // have all possible loads of the value stored been optimized away?
   bool value_never_loaded(PhaseTransform *phase) const;
+  MemBarNode* trailing_membar() const;
@@ -816,6 +818,7 @@
   virtual const class TypePtr *adr_type() const { return _adr_type; }  // returns bottom_type of address
   bool result_not_used() const;
+  MemBarNode* trailing_membar() const;
 class LoadStoreConditionalNode : public LoadStoreNode {
@@ -1142,6 +1145,20 @@
   // Memory type this node is serializing.  Usually either rawptr or bottom.
   const TypePtr* _adr_type;
+  // How is this membar related to a nearby memory access?
+  enum {
+    Standalone,
+    TrailingLoad,
+    TrailingStore,
+    LeadingStore,
+    TrailingLoadStore,
+    LeadingLoadStore
+  } _kind;
+#ifdef ASSERT
+  uint _pair_idx;
   enum {
     Precedent = TypeFunc::Parms  // optional edge to force precedence
@@ -1159,6 +1176,24 @@
   static MemBarNode* make(Compile* C, int opcode,
                           int alias_idx = Compile::AliasIdxBot,
                           Node* precedent = NULL);
+  MemBarNode* trailing_membar() const;
+  MemBarNode* leading_membar() const;
+  void set_trailing_load() { _kind = TrailingLoad; }
+  bool trailing_load() const { return _kind == TrailingLoad; }
+  bool trailing_store() const { return _kind == TrailingStore; }
+  bool leading_store() const { return _kind == LeadingStore; }
+  bool trailing_load_store() const { return _kind == TrailingLoadStore; }
+  bool leading_load_store() const { return _kind == LeadingLoadStore; }
+  bool trailing() const { return _kind == TrailingLoad || _kind == TrailingStore || _kind == TrailingLoadStore; }
+  bool leading() const { return _kind == LeadingStore || _kind == LeadingLoadStore; }
+  bool standalone() const { return _kind == Standalone; }
+  static void set_store_pair(MemBarNode* leading, MemBarNode* trailing);
+  static void set_load_store_pair(MemBarNode* leading, MemBarNode* trailing);
+  void remove(PhaseIterGVN *igvn);
 // "Acquire" - no following ref can move before (but earlier refs can