# HG changeset patch # User lana # Date 1441322042 25200 # Node ID 6675700073c14bab9c25a306d5ed73f995dc1500 # Parent eb1661ea942c998ea214ce0f727622ebaec94f0c# Parent ab79437fbfaad3acd3ba87e85c23afbcda28c71e Merge diff -r eb1661ea942c -r 6675700073c1 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/Address.java --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/Address.java Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/Address.java Thu Sep 03 16:14:02 2015 -0700 @@ -209,4 +209,7 @@ returns the result as an Address. Returns null if the result was zero. */ public Address xorWithMask(long mask) throws UnsupportedOperationException; + + // return address as long integer. + public long asLongValue(); } diff -r eb1661ea942c -r 6675700073c1 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/BsdAddress.java --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/BsdAddress.java Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/BsdAddress.java Thu Sep 03 16:14:02 2015 -0700 @@ -288,7 +288,7 @@ return new BsdAddress(debugger, value); } - + public long asLongValue() { return addr; } //-------------------------------------------------------------------------------- // Internals only below this point // diff -r eb1661ea942c -r 6675700073c1 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/dummy/DummyAddress.java --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/dummy/DummyAddress.java Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/dummy/DummyAddress.java Thu Sep 03 16:14:02 2015 -0700 @@ -275,6 +275,7 @@ return new DummyAddress(debugger, value); } + public long asLongValue() { return addr; } //-------------------------------------------------------------------------------- // Internals only below this point // diff -r eb1661ea942c -r 6675700073c1 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxAddress.java --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxAddress.java Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxAddress.java Thu Sep 03 16:14:02 2015 -0700 @@ -288,6 +288,7 @@ return new LinuxAddress(debugger, value); } + public long asLongValue() { return addr; } //-------------------------------------------------------------------------------- // Internals only below this point diff -r eb1661ea942c -r 6675700073c1 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcAddress.java --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcAddress.java Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcAddress.java Thu Sep 03 16:14:02 2015 -0700 @@ -283,7 +283,7 @@ return new ProcAddress(debugger, value); } - + public long asLongValue() { return addr; } //-------------------------------------------------------------------------------- // Internals only below this point // diff -r eb1661ea942c -r 6675700073c1 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteAddress.java --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteAddress.java Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteAddress.java Thu Sep 03 16:14:02 2015 -0700 @@ -281,7 +281,7 @@ return new RemoteAddress(debugger, value); } - + public long asLongValue() { return addr; } //-------------------------------------------------------------------------------- // Internals only below this point // diff -r eb1661ea942c -r 6675700073c1 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/windbg/WindbgAddress.java --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/windbg/WindbgAddress.java Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/windbg/WindbgAddress.java Thu Sep 03 16:14:02 2015 -0700 @@ -292,6 +292,7 @@ return new WindbgAddress(debugger, value); } + public long asLongValue() { return addr; } //-------------------------------------------------------------------------------- // Internals only below this point diff -r eb1661ea942c -r 6675700073c1 hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/Symbol.java --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/Symbol.java Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/Symbol.java Thu Sep 03 16:14:02 2015 -0700 @@ -80,10 +80,19 @@ public byte getByteAt(long index) { return addr.getJByteAt(baseOffset + index); } - + // _identity_hash is a short private static CIntegerField idHash; - public int identityHash() { return (int)idHash.getValue(this.addr); } + public int identityHash() { + long addr_value = getAddress().asLongValue(); + int addr_bits = (int)(addr_value >> (VM.getVM().getLogMinObjAlignmentInBytes() + 3)); + int length = (int)getLength(); + int byte0 = getByteAt(0); + int byte1 = getByteAt(1); + int id_hash = (int)(0xffff & idHash.getValue(this.addr)); + return id_hash | + ((addr_bits ^ (length << 8) ^ ((byte0 << 8) | byte1)) << 16); + } public boolean equals(byte[] modUTF8Chars) { int l = (int) getLength(); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/aarch64/vm/aarch64.ad --- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Thu Sep 03 16:14:02 2015 -0700 @@ -1033,27 +1033,39 @@ }; // graph traversal helpers - MemBarNode *has_parent_membar(const Node *n, - ProjNode *&ctl, ProjNode *&mem); - MemBarNode *has_child_membar(const MemBarNode *n, - ProjNode *&ctl, ProjNode *&mem); + + MemBarNode *parent_membar(const Node *n); + MemBarNode *child_membar(const MemBarNode *n); + bool leading_membar(const MemBarNode *barrier); + + bool is_card_mark_membar(const MemBarNode *barrier); + + MemBarNode *leading_to_normal(MemBarNode *leading); + MemBarNode *normal_to_leading(const MemBarNode *barrier); + MemBarNode *card_mark_to_trailing(const MemBarNode *barrier); + MemBarNode *trailing_to_card_mark(const MemBarNode *trailing); + MemBarNode *trailing_to_leading(const MemBarNode *trailing); // predicates controlling emit of ldr/ldar and associated dmb + bool unnecessary_acquire(const Node *barrier); bool needs_acquiring_load(const Node *load); // predicates controlling emit of str/stlr and associated dmbs + bool unnecessary_release(const Node *barrier); bool unnecessary_volatile(const Node *barrier); bool needs_releasing_store(const Node *store); - // Use barrier instructions for unsafe volatile gets rather than - // trying to identify an exact signature for them - const bool UseBarriersForUnsafeVolatileGet = false; + // predicate controlling translation of StoreCM + bool unnecessary_storestore(const Node *storecm); %} source %{ + // Optimizaton of volatile gets and puts + // ------------------------------------- + // // AArch64 has ldar and stlr instructions which we can safely // use to implement volatile reads and writes. For a volatile read // we simply need @@ -1102,15 +1114,19 @@ // A volatile write is translated to the node sequence // // MemBarRelease - // StoreX[mo_release] + // StoreX[mo_release] {CardMark}-optional // MemBarVolatile // // n.b. the above node patterns are generated with a strict // 'signature' configuration of input and output dependencies (see - // the predicates below for exact details). The two signatures are - // unique to translated volatile reads/stores -- they will not - // appear as a result of any other bytecode translation or inlining - // nor as a consequence of optimizing transforms. + // the predicates below for exact details). The card mark may be as + // simple as a few extra nodes or, in a few GC configurations, may + // include more complex control flow between the leading and + // trailing memory barriers. However, whatever the card mark + // configuration these signatures are unique to translated volatile + // reads/stores -- they will not appear as a result of any other + // bytecode translation or inlining nor as a consequence of + // optimizing transforms. // // We also want to catch inlined unsafe volatile gets and puts and // be able to implement them using either ldar/stlr or some @@ -1122,7 +1138,7 @@ // // MemBarRelease // MemBarCPUOrder - // StoreX[mo_release] + // StoreX[mo_release] {CardMark}-optional // MemBarVolatile // // n.b. as an aside, the cpuorder membar is not itself subject to @@ -1130,7 +1146,7 @@ // predicates need to detect its presence in order to correctly // select the desired adlc rules. // - // Inlined unsafe volatiles gets manifest as a somewhat different + // Inlined unsafe volatile gets manifest as a somewhat different // node sequence to a normal volatile get // // MemBarCPUOrder @@ -1173,33 +1189,22 @@ // n.b. the translation rules below which rely on detection of the // volatile signatures and insert ldar or stlr are failsafe. // If we see anything other than the signature configurations we - // always just translate the loads and stors to ldr and str + // always just translate the loads and stores to ldr and str // and translate acquire, release and volatile membars to the // relevant dmb instructions. // - // n.b.b as a case in point for the above comment, the current - // predicates don't detect the precise signature for certain types - // of volatile object stores (where the heap_base input type is not - // known at compile-time to be non-NULL). In those cases the - // MemBarRelease and MemBarVolatile bracket an if-then-else sequence - // with a store in each branch (we need a different store depending - // on whether heap_base is actually NULL). In such a case we will - // just plant a dmb both before and after the branch/merge. The - // predicate could (and probably should) be fixed later to also - // detect this case. - - // graph traversal helpers + + // graph traversal helpers used for volatile put/get optimization + + // 1) general purpose helpers // if node n is linked to a parent MemBarNode by an intervening - // Control or Memory ProjNode return the MemBarNode otherwise return + // Control and Memory ProjNode return the MemBarNode otherwise return // NULL. // // n may only be a Load or a MemBar. - // - // The ProjNode* references c and m are used to return the relevant - // nodes. - - MemBarNode *has_parent_membar(const Node *n, ProjNode *&c, ProjNode *&m) + + MemBarNode *parent_membar(const Node *n) { Node *ctl = NULL; Node *mem = NULL; @@ -1218,15 +1223,11 @@ if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) return NULL; - c = ctl->as_Proj(); - membar = ctl->lookup(0); if (!membar || !membar->is_MemBar()) return NULL; - m = mem->as_Proj(); - if (mem->lookup(0) != membar) return NULL; @@ -1235,12 +1236,8 @@ // if n is linked to a child MemBarNode by intervening Control and // Memory ProjNodes return the MemBarNode otherwise return NULL. - // - // The ProjNode** arguments c and m are used to return pointers to - // the relevant nodes. A null argument means don't don't return a - // value. - - MemBarNode *has_child_membar(const MemBarNode *n, ProjNode *&c, ProjNode *&m) + + MemBarNode *child_membar(const MemBarNode *n) { ProjNode *ctl = n->proj_out(TypeFunc::Control); ProjNode *mem = n->proj_out(TypeFunc::Memory); @@ -1249,9 +1246,6 @@ if (! ctl || ! mem) return NULL; - c = ctl; - m = mem; - MemBarNode *child = NULL; Node *x; @@ -1279,9 +1273,838 @@ return NULL; } + // helper predicate use to filter candidates for a leading memory + // barrier + // + // returns true if barrier is a MemBarRelease or a MemBarCPUOrder + // whose Ctl and Mem feeds come from a MemBarRelease otherwise false + + bool leading_membar(const MemBarNode *barrier) + { + int opcode = barrier->Opcode(); + // if this is a release membar we are ok + if (opcode == Op_MemBarRelease) + return true; + // if its a cpuorder membar . . . + if (opcode != Op_MemBarCPUOrder) + return false; + // then the parent has to be a release membar + MemBarNode *parent = parent_membar(barrier); + if (!parent) + return false; + opcode = parent->Opcode(); + return opcode == Op_MemBarRelease; + } + + // 2) card mark detection helper + + // helper predicate which can be used to detect a volatile membar + // introduced as part of a conditional card mark sequence either by + // G1 or by CMS when UseCondCardMark is true. + // + // membar can be definitively determined to be part of a card mark + // sequence if and only if all the following hold + // + // i) it is a MemBarVolatile + // + // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is + // true + // + // iii) the node's Mem projection feeds a StoreCM node. + + bool is_card_mark_membar(const MemBarNode *barrier) + { + if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) + return false; + + if (barrier->Opcode() != Op_MemBarVolatile) + return false; + + ProjNode *mem = barrier->proj_out(TypeFunc::Memory); + + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) { + Node *y = mem->fast_out(i); + if (y->Opcode() == Op_StoreCM) { + return true; + } + } + + return false; + } + + + // 3) helper predicates to traverse volatile put graphs which may + // contain GC barrier subgraphs + + // Preamble + // -------- + // + // for volatile writes we can omit generating barriers and employ a + // releasing store when we see a node sequence sequence with a + // leading MemBarRelease and a trailing MemBarVolatile as follows + // + // MemBarRelease + // { || } -- optional + // {MemBarCPUOrder} + // || \\ + // || StoreX[mo_release] + // | \ / + // | MergeMem + // | / + // MemBarVolatile + // + // where + // || and \\ represent Ctl and Mem feeds via Proj nodes + // | \ and / indicate further routing of the Ctl and Mem feeds + // + // this is the graph we see for non-object stores. however, for a + // volatile Object store (StoreN/P) we may see other nodes below the + // leading membar because of the need for a GC pre- or post-write + // barrier. + // + // with most GC configurations we with see this simple variant which + // includes a post-write barrier card mark. + // + // MemBarRelease______________________________ + // || \\ Ctl \ \\ + // || StoreN/P[mo_release] CastP2X StoreB/CM + // | \ / . . . / + // | MergeMem + // | / + // || / + // MemBarVolatile + // + // i.e. the leading membar feeds Ctl to a CastP2X (which converts + // the object address to an int used to compute the card offset) and + // Ctl+Mem to a StoreB node (which does the actual card mark). + // + // n.b. a StoreCM node will only appear in this configuration when + // using CMS. StoreCM differs from a normal card mark write (StoreB) + // because it implies a requirement to order visibility of the card + // mark (StoreCM) relative to the object put (StoreP/N) using a + // StoreStore memory barrier (arguably this ought to be represented + // explicitly in the ideal graph but that is not how it works). This + // ordering is required for both non-volatile and volatile + // puts. Normally that means we need to translate a StoreCM using + // the sequence + // + // dmb ishst + // stlrb + // + // However, in the case of a volatile put if we can recognise this + // configuration and plant an stlr for the object write then we can + // omit the dmb and just plant an strb since visibility of the stlr + // is ordered before visibility of subsequent stores. StoreCM nodes + // also arise when using G1 or using CMS with conditional card + // marking. In these cases (as we shall see) we don't need to insert + // the dmb when translating StoreCM because there is already an + // intervening StoreLoad barrier between it and the StoreP/N. + // + // It is also possible to perform the card mark conditionally on it + // currently being unmarked in which case the volatile put graph + // will look slightly different + // + // MemBarRelease + // MemBarCPUOrder___________________________________________ + // || \\ Ctl \ Ctl \ \\ Mem \ + // || StoreN/P[mo_release] CastP2X If LoadB | + // | \ / \ | + // | MergeMem . . . StoreB + // | / / + // || / + // MemBarVolatile + // + // It is worth noting at this stage that both the above + // configurations can be uniquely identified by checking that the + // memory flow includes the following subgraph: + // + // MemBarRelease + // MemBarCPUOrder + // | \ . . . + // | StoreX[mo_release] . . . + // | / + // MergeMem + // | + // MemBarVolatile + // + // This is referred to as a *normal* subgraph. It can easily be + // detected starting from any candidate MemBarRelease, + // StoreX[mo_release] or MemBarVolatile. + // + // the code below uses two helper predicates, leading_to_normal and + // normal_to_leading to identify this configuration, one validating + // the layout starting from the top membar and searching down and + // the other validating the layout starting from the lower membar + // and searching up. + // + // There are two special case GC configurations when a normal graph + // may not be generated: when using G1 (which always employs a + // conditional card mark); and when using CMS with conditional card + // marking configured. These GCs are both concurrent rather than + // stop-the world GCs. So they introduce extra Ctl+Mem flow into the + // graph between the leading and trailing membar nodes, in + // particular enforcing stronger memory serialisation beween the + // object put and the corresponding conditional card mark. CMS + // employs a post-write GC barrier while G1 employs both a pre- and + // post-write GC barrier. Of course the extra nodes may be absent -- + // they are only inserted for object puts. This significantly + // complicates the task of identifying whether a MemBarRelease, + // StoreX[mo_release] or MemBarVolatile forms part of a volatile put + // when using these GC configurations (see below). + // + // In both cases the post-write subtree includes an auxiliary + // MemBarVolatile (StoreLoad barrier) separating the object put and + // the read of the corresponding card. This poses two additional + // problems. + // + // Firstly, a card mark MemBarVolatile needs to be distinguished + // from a normal trailing MemBarVolatile. Resolving this first + // problem is straightforward: a card mark MemBarVolatile always + // projects a Mem feed to a StoreCM node and that is a unique marker + // + // MemBarVolatile (card mark) + // C | \ . . . + // | StoreCM . . . + // . . . + // + // The second problem is how the code generator is to translate the + // card mark barrier? It always needs to be translated to a "dmb + // ish" instruction whether or not it occurs as part of a volatile + // put. A StoreLoad barrier is needed after the object put to ensure + // i) visibility to GC threads of the object put and ii) visibility + // to the mutator thread of any card clearing write by a GC + // thread. Clearly a normal store (str) will not guarantee this + // ordering but neither will a releasing store (stlr). The latter + // guarantees that the object put is visible but does not guarantee + // that writes by other threads have also been observed. + // + // So, returning to the task of translating the object put and the + // leading/trailing membar nodes: what do the non-normal node graph + // look like for these 2 special cases? and how can we determine the + // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile + // in both normal and non-normal cases? + // + // A CMS GC post-barrier wraps its card write (StoreCM) inside an If + // which selects conditonal execution based on the value loaded + // (LoadB) from the card. Ctl and Mem are fed to the If via an + // intervening StoreLoad barrier (MemBarVolatile). + // + // So, with CMS we may see a node graph which looks like this + // + // MemBarRelease + // MemBarCPUOrder_(leading)__________________ + // C | M \ \\ C \ + // | \ StoreN/P[mo_release] CastP2X + // | Bot \ / + // | MergeMem + // | / + // MemBarVolatile (card mark) + // C | || M | + // | LoadB | + // | | | + // | Cmp |\ + // | / | \ + // If | \ + // | \ | \ + // IfFalse IfTrue | \ + // \ / \ | \ + // \ / StoreCM | + // \ / | | + // Region . . . | + // | \ / + // | . . . \ / Bot + // | MergeMem + // | | + // MemBarVolatile (trailing) + // + // The first MergeMem merges the AliasIdxBot Mem slice from the + // leading membar and the oopptr Mem slice from the Store into the + // card mark membar. The trailing MergeMem merges the AliasIdxBot + // Mem slice from the card mark membar and the AliasIdxRaw slice + // from the StoreCM into the trailing membar (n.b. the latter + // proceeds via a Phi associated with the If region). + // + // G1 is quite a lot more complicated. The nodes inserted on behalf + // of G1 may comprise: a pre-write graph which adds the old value to + // the SATB queue; the releasing store itself; and, finally, a + // post-write graph which performs a card mark. + // + // The pre-write graph may be omitted, but only when the put is + // writing to a newly allocated (young gen) object and then only if + // there is a direct memory chain to the Initialize node for the + // object allocation. This will not happen for a volatile put since + // any memory chain passes through the leading membar. + // + // The pre-write graph includes a series of 3 If tests. The outermost + // If tests whether SATB is enabled (no else case). The next If tests + // whether the old value is non-NULL (no else case). The third tests + // whether the SATB queue index is > 0, if so updating the queue. The + // else case for this third If calls out to the runtime to allocate a + // new queue buffer. + // + // So with G1 the pre-write and releasing store subgraph looks like + // this (the nested Ifs are omitted). + // + // MemBarRelease (leading)____________ + // C | || M \ M \ M \ M \ . . . + // | LoadB \ LoadL LoadN \ + // | / \ \ + // If |\ \ + // | \ | \ \ + // IfFalse IfTrue | \ \ + // | | | \ | + // | If | /\ | + // | | \ | + // | \ | + // | . . . \ | + // | / | / | | + // Region Phi[M] | | + // | \ | | | + // | \_____ | ___ | | + // C | C \ | C \ M | | + // | CastP2X | StoreN/P[mo_release] | + // | | | | + // C | M | M | M | + // \ | | / + // . . . + // (post write subtree elided) + // . . . + // C \ M / + // MemBarVolatile (trailing) + // + // n.b. the LoadB in this subgraph is not the card read -- it's a + // read of the SATB queue active flag. + // + // The G1 post-write subtree is also optional, this time when the + // new value being written is either null or can be identified as a + // newly allocated (young gen) object with no intervening control + // flow. The latter cannot happen but the former may, in which case + // the card mark membar is omitted and the memory feeds from the + // leading membar and the StoreN/P are merged direct into the + // trailing membar as per the normal subgraph. So, the only special + // case which arises is when the post-write subgraph is generated. + // + // The kernel of the post-write G1 subgraph is the card mark itself + // which includes a card mark memory barrier (MemBarVolatile), a + // card test (LoadB), and a conditional update (If feeding a + // StoreCM). These nodes are surrounded by a series of nested Ifs + // which try to avoid doing the card mark. The top level If skips if + // the object reference does not cross regions (i.e. it tests if + // (adr ^ val) >> log2(regsize) != 0) -- intra-region references + // need not be recorded. The next If, which skips on a NULL value, + // may be absent (it is not generated if the type of value is >= + // OopPtr::NotNull). The 3rd If skips writes to young regions (by + // checking if card_val != young). n.b. although this test requires + // a pre-read of the card it can safely be done before the StoreLoad + // barrier. However that does not bypass the need to reread the card + // after the barrier. + // + // (pre-write subtree elided) + // . . . . . . . . . . . . + // C | M | M | M | + // Region Phi[M] StoreN | + // | / \ | | + // / \_______ / \ | | + // C / C \ . . . \ | | + // If CastP2X . . . | | | + // / \ | | | + // / \ | | | + // IfFalse IfTrue | | | + // | | | | /| + // | If | | / | + // | / \ | | / | + // | / \ \ | / | + // | IfFalse IfTrue MergeMem | + // | . . . / \ / | + // | / \ / | + // | IfFalse IfTrue / | + // | . . . | / | + // | If / | + // | / \ / | + // | / \ / | + // | IfFalse IfTrue / | + // | . . . | / | + // | \ / | + // | \ / | + // | MemBarVolatile__(card mark) | + // | || C | M \ M \ | + // | LoadB If | | | + // | / \ | | | + // | . . . | | | + // | \ | | / + // | StoreCM | / + // | . . . | / + // | _________/ / + // | / _____________/ + // | . . . . . . | / / + // | | | / _________/ + // | | Phi[M] / / + // | | | / / + // | | | / / + // | Region . . . Phi[M] _____/ + // | / | / + // | | / + // | . . . . . . | / + // | / | / + // Region | | Phi[M] + // | | | / Bot + // \ MergeMem + // \ / + // MemBarVolatile + // + // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice + // from the leading membar and the oopptr Mem slice from the Store + // into the card mark membar i.e. the memory flow to the card mark + // membar still looks like a normal graph. + // + // The trailing MergeMem merges an AliasIdxBot Mem slice with other + // Mem slices (from the StoreCM and other card mark queue stores). + // However in this case the AliasIdxBot Mem slice does not come + // direct from the card mark membar. It is merged through a series + // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow + // from the leading membar with the Mem feed from the card mark + // membar. Each Phi corresponds to one of the Ifs which may skip + // around the card mark membar. So when the If implementing the NULL + // value check has been elided the total number of Phis is 2 + // otherwise it is 3. + // + // So, the upshot is that in all cases the volatile put graph will + // include a *normal* memory subgraph betwen the leading membar and + // its child membar. When that child is not a card mark membar then + // it marks the end of a volatile put subgraph. If the child is a + // card mark membar then the normal subgraph will form part of a + // volatile put subgraph if and only if the child feeds an + // AliasIdxBot Mem feed to a trailing barrier via a MergeMem. That + // feed is either direct (for CMS) or via 2 or 3 Phi nodes merging + // the leading barrier memory flow (for G1). + // + // The predicates controlling generation of instructions for store + // and barrier nodes employ a few simple helper functions (described + // below) which identify the presence or absence of these subgraph + // configurations and provide a means of traversing from one node in + // the subgraph to another. + + // leading_to_normal + // + //graph traversal helper which detects the normal case Mem feed + // from a release membar (or, optionally, its cpuorder child) to a + // dependent volatile membar i.e. it ensures that the following Mem + // flow subgraph is present. + // + // MemBarRelease + // MemBarCPUOrder + // | \ . . . + // | StoreN/P[mo_release] . . . + // | / + // MergeMem + // | + // MemBarVolatile + // + // if the correct configuration is present returns the volatile + // membar otherwise NULL. + // + // the input membar is expected to be either a cpuorder membar or a + // release membar. in the latter case it should not have a cpu membar + // child. + // + // the returned membar may be a card mark membar rather than a + // trailing membar. + + MemBarNode *leading_to_normal(MemBarNode *leading) + { + assert((leading->Opcode() == Op_MemBarRelease || + leading->Opcode() == Op_MemBarCPUOrder), + "expecting a volatile or cpuroder membar!"); + + // check the mem flow + ProjNode *mem = leading->proj_out(TypeFunc::Memory); + + if (!mem) + return NULL; + + Node *x = NULL; + StoreNode * st = NULL; + MergeMemNode *mm = NULL; + + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + x = mem->fast_out(i); + if (x->is_MergeMem()) { + if (mm != NULL) + return NULL; + // two merge mems is one too many + mm = x->as_MergeMem(); + } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { + // two releasing stores is one too many + if (st != NULL) + return NULL; + st = x->as_Store(); + } + } + + if (!mm || !st) + return NULL; + + bool found = false; + // ensure the store feeds the merge + for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { + if (st->fast_out(i) == mm) { + found = true; + break; + } + } + + if (!found) + return NULL; + + MemBarNode *mbvol = NULL; + // ensure the merge feeds a volatile membar + for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { + x = mm->fast_out(i); + if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { + mbvol = x->as_MemBar(); + break; + } + } + + return mbvol; + } + + // normal_to_leading + // + // graph traversal helper which detects the normal case Mem feed + // from either a card mark or a trailing membar to a preceding + // release membar (optionally its cpuorder child) i.e. it ensures + // that the following Mem flow subgraph is present. + // + // MemBarRelease + // MemBarCPUOrder {leading} + // | \ . . . + // | StoreN/P[mo_release] . . . + // | / + // MergeMem + // | + // MemBarVolatile + // + // this predicate checks for the same flow as the previous predicate + // but starting from the bottom rather than the top. + // + // if the configuration is present returns the cpuorder member for + // preference or when absent the release membar otherwise NULL. + // + // n.b. the input membar is expected to be a MemBarVolatile but + // need not be a card mark membar. + + MemBarNode *normal_to_leading(const MemBarNode *barrier) + { + // input must be a volatile membar + assert(barrier->Opcode() == Op_MemBarVolatile, "expecting a volatile membar"); + Node *x; + + // the Mem feed to the membar should be a merge + x = barrier->in(TypeFunc::Memory); + if (!x->is_MergeMem()) + return NULL; + + MergeMemNode *mm = x->as_MergeMem(); + + // the AliasIdxBot slice should be another MemBar projection + x = mm->in(Compile::AliasIdxBot); + // ensure this is a non control projection + if (!x->is_Proj() || x->is_CFG()) + return NULL; + // if it is fed by a membar that's the one we want + x = x->in(0); + + if (!x->is_MemBar()) + return NULL; + + MemBarNode *leading = x->as_MemBar(); + // reject invalid candidates + if (!leading_membar(leading)) + return NULL; + + // ok, we have a leading ReleaseMembar, now for the sanity clauses + + // the leading membar must feed Mem to a releasing store + ProjNode *mem = leading->proj_out(TypeFunc::Memory); + StoreNode *st = NULL; + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + x = mem->fast_out(i); + if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { + st = x->as_Store(); + break; + } + } + if (st == NULL) + return NULL; + + // the releasing store has to feed the same merge + for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { + if (st->fast_out(i) == mm) + return leading; + } + + return NULL; + } + + // card_mark_to_trailing + // + // graph traversal helper which detects extra, non-normal Mem feed + // from a card mark volatile membar to a trailing membar i.e. it + // ensures that one of the following three GC post-write Mem flow + // subgraphs is present. + // + // 1) + // . . . + // | + // MemBarVolatile (card mark) + // | | + // | StoreCM + // | | + // | . . . + // Bot | / + // MergeMem + // | + // MemBarVolatile (trailing) + // + // + // 2) + // MemBarRelease/CPUOrder (leading) + // | + // | + // |\ . . . + // | \ | + // | \ MemBarVolatile (card mark) + // | \ | | + // \ \ | StoreCM . . . + // \ \ | + // \ Phi + // \ / + // Phi . . . + // Bot | / + // MergeMem + // | + // MemBarVolatile (trailing) + // + // 3) + // MemBarRelease/CPUOrder (leading) + // | + // |\ + // | \ + // | \ . . . + // | \ | + // |\ \ MemBarVolatile (card mark) + // | \ \ | | + // | \ \ | StoreCM . . . + // | \ \ | + // \ \ Phi + // \ \ / + // \ Phi + // \ / + // Phi . . . + // Bot | / + // MergeMem + // | + // MemBarVolatile (trailing) + // + // configuration 1 is only valid if UseConcMarkSweepGC && + // UseCondCardMark + // + // configurations 2 and 3 are only valid if UseG1GC. + // + // if a valid configuration is present returns the trailing membar + // otherwise NULL. + // + // n.b. the supplied membar is expected to be a card mark + // MemBarVolatile i.e. the caller must ensure the input node has the + // correct operand and feeds Mem to a StoreCM node + + MemBarNode *card_mark_to_trailing(const MemBarNode *barrier) + { + // input must be a card mark volatile membar + assert(is_card_mark_membar(barrier), "expecting a card mark membar"); + + Node *feed = barrier->proj_out(TypeFunc::Memory); + Node *x; + MergeMemNode *mm = NULL; + + const int MAX_PHIS = 3; // max phis we will search through + int phicount = 0; // current search count + + bool retry_feed = true; + while (retry_feed) { + // see if we have a direct MergeMem feed + for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) { + x = feed->fast_out(i); + // the correct Phi will be merging a Bot memory slice + if (x->is_MergeMem()) { + mm = x->as_MergeMem(); + break; + } + } + if (mm) { + retry_feed = false; + } else if (UseG1GC & phicount++ < MAX_PHIS) { + // the barrier may feed indirectly via one or two Phi nodes + PhiNode *phi = NULL; + for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) { + x = feed->fast_out(i); + // the correct Phi will be merging a Bot memory slice + if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) { + phi = x->as_Phi(); + break; + } + } + if (!phi) + return NULL; + // look for another merge below this phi + feed = phi; + } else { + // couldn't find a merge + return NULL; + } + } + + // sanity check this feed turns up as the expected slice + assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge"); + + MemBarNode *trailing = NULL; + // be sure we have a volatile membar below the merge + for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { + x = mm->fast_out(i); + if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { + trailing = x->as_MemBar(); + break; + } + } + + return trailing; + } + + // trailing_to_card_mark + // + // graph traversal helper which detects extra, non-normal Mem feed + // from a trailing membar to a preceding card mark volatile membar + // i.e. it identifies whether one of the three possible extra GC + // post-write Mem flow subgraphs is present + // + // this predicate checks for the same flow as the previous predicate + // but starting from the bottom rather than the top. + // + // if the configurationis present returns the card mark membar + // otherwise NULL + + MemBarNode *trailing_to_card_mark(const MemBarNode *trailing) + { + assert(!is_card_mark_membar(trailing), "not expecting a card mark membar"); + + Node *x = trailing->in(TypeFunc::Memory); + // the Mem feed to the membar should be a merge + if (!x->is_MergeMem()) + return NULL; + + MergeMemNode *mm = x->as_MergeMem(); + + x = mm->in(Compile::AliasIdxBot); + // with G1 we may possibly see a Phi or two before we see a Memory + // Proj from the card mark membar + + const int MAX_PHIS = 3; // max phis we will search through + int phicount = 0; // current search count + + bool retry_feed = !x->is_Proj(); + + while (retry_feed) { + if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) { + PhiNode *phi = x->as_Phi(); + ProjNode *proj = NULL; + PhiNode *nextphi = NULL; + bool found_leading = false; + for (uint i = 1; i < phi->req(); i++) { + x = phi->in(i); + if (x->is_Phi()) { + nextphi = x->as_Phi(); + } else if (x->is_Proj()) { + int opcode = x->in(0)->Opcode(); + if (opcode == Op_MemBarVolatile) { + proj = x->as_Proj(); + } else if (opcode == Op_MemBarRelease || + opcode == Op_MemBarCPUOrder) { + // probably a leading membar + found_leading = true; + } + } + } + // if we found a correct looking proj then retry from there + // otherwise we must see a leading and a phi or this the + // wrong config + if (proj != NULL) { + x = proj; + retry_feed = false; + } else if (found_leading && nextphi != NULL) { + // retry from this phi to check phi2 + x = nextphi; + } else { + // not what we were looking for + return NULL; + } + } else { + return NULL; + } + } + // the proj has to come from the card mark membar + x = x->in(0); + if (!x->is_MemBar()) + return NULL; + + MemBarNode *card_mark_membar = x->as_MemBar(); + + if (!is_card_mark_membar(card_mark_membar)) + return NULL; + + return card_mark_membar; + } + + // trailing_to_leading + // + // graph traversal helper which checks the Mem flow up the graph + // from a (non-card mark) volatile membar attempting to locate and + // return an associated leading membar. it first looks for a + // subgraph in the normal configuration (relying on helper + // normal_to_leading). failing that it then looks for one of the + // possible post-write card mark subgraphs linking the trailing node + // to a the card mark membar (relying on helper + // trailing_to_card_mark), and then checks that the card mark membar + // is fed by a leading membar (once again relying on auxiliary + // predicate normal_to_leading). + // + // if the configuration is valid returns the cpuorder member for + // preference or when absent the release membar otherwise NULL. + // + // n.b. the input membar is expected to be a volatile membar but + // must *not* be a card mark membar. + + MemBarNode *trailing_to_leading(const MemBarNode *trailing) + { + assert(!is_card_mark_membar(trailing), "not expecting a card mark membar"); + + MemBarNode *leading = normal_to_leading(trailing); + + if (leading) + return leading; + + MemBarNode *card_mark_membar = trailing_to_card_mark(trailing); + + if (!card_mark_membar) + return NULL; + + return normal_to_leading(card_mark_membar); + } + // predicates controlling emit of ldr/ldar and associated dmb -bool unnecessary_acquire(const Node *barrier) { +bool unnecessary_acquire(const Node *barrier) +{ // assert barrier->is_MemBar(); if (UseBarriersForVolatile) // we need to plant a dmb @@ -1323,13 +2146,11 @@ return (x->is_Load() && x->as_Load()->is_acquire()); } - // only continue if we want to try to match unsafe volatile gets - if (UseBarriersForUnsafeVolatileGet) - return false; + // now check for an unsafe volatile get // need to check for // - // MemBarCPUOrder + // MemBarCPUOrder // || \\ // MemBarAcquire* LoadX[mo_acquire] // || @@ -1341,9 +2162,13 @@ // check for a parent MemBarCPUOrder ProjNode *ctl; ProjNode *mem; - MemBarNode *parent = has_parent_membar(barrier, ctl, mem); + MemBarNode *parent = parent_membar(barrier); if (!parent || parent->Opcode() != Op_MemBarCPUOrder) return false; + ctl = parent->proj_out(TypeFunc::Control); + mem = parent->proj_out(TypeFunc::Memory); + if (!ctl || !mem) + return false; // ensure the proj nodes both feed a LoadX[mo_acquire] LoadNode *ld = NULL; for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { @@ -1369,7 +2194,7 @@ if (ld) return false; // check for a child cpuorder membar - MemBarNode *child = has_child_membar(barrier->as_MemBar(), ctl, mem); + MemBarNode *child = child_membar(barrier->as_MemBar()); if (!child || child->Opcode() != Op_MemBarCPUOrder) return false; @@ -1422,9 +2247,7 @@ return true; } - // only continue if we want to try to match unsafe volatile gets - if (UseBarriersForUnsafeVolatileGet) - return false; + // now check for an unsafe volatile get // check if Ctl and Proj feed comes from a MemBarCPUOrder // @@ -1435,22 +2258,20 @@ // MemBarCPUOrder MemBarNode *membar; - ProjNode *ctl; - ProjNode *mem; - - membar = has_parent_membar(ld, ctl, mem); + + membar = parent_membar(ld); if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) return false; // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain - membar = has_child_membar(membar, ctl, mem); + membar = child_membar(membar); if (!membar || !membar->Opcode() == Op_MemBarAcquire) return false; - membar = has_child_membar(membar, ctl, mem); + membar = child_membar(membar); if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) return false; @@ -1458,194 +2279,81 @@ return true; } -bool unnecessary_release(const Node *n) { +bool unnecessary_release(const Node *n) +{ + assert((n->is_MemBar() && + n->Opcode() == Op_MemBarRelease), + "expecting a release membar"); + + if (UseBarriersForVolatile) + // we need to plant a dmb + return false; + + // if there is a dependent CPUOrder barrier then use that as the + // leading + + MemBarNode *barrier = n->as_MemBar(); + // check for an intervening cpuorder membar + MemBarNode *b = child_membar(barrier); + if (b && b->Opcode() == Op_MemBarCPUOrder) { + // ok, so start the check from the dependent cpuorder barrier + barrier = b; + } + + // must start with a normal feed + MemBarNode *child_barrier = leading_to_normal(barrier); + + if (!child_barrier) + return false; + + if (!is_card_mark_membar(child_barrier)) + // this is the trailing membar and we are done + return true; + + // must be sure this card mark feeds a trailing membar + MemBarNode *trailing = card_mark_to_trailing(child_barrier); + return (trailing != NULL); +} + +bool unnecessary_volatile(const Node *n) +{ // assert n->is_MemBar(); if (UseBarriersForVolatile) // we need to plant a dmb return false; - // ok, so we can omit this release barrier if it has been inserted - // as part of a volatile store sequence - // - // MemBarRelease - // { || } - // {MemBarCPUOrder} -- optional - // || \\ - // || StoreX[mo_release] - // | \ / - // | MergeMem - // | / - // MemBarVolatile - // - // where - // || and \\ represent Ctl and Mem feeds via Proj nodes - // | \ and / indicate further routing of the Ctl and Mem feeds - // - // so we need to check that - // - // ia) the release membar (or its dependent cpuorder membar) feeds - // control to a store node (via a Control project node) - // - // ii) the store is ordered release - // - // iii) the release membar (or its dependent cpuorder membar) feeds - // control to a volatile membar (via the same Control project node) - // - // iv) the release membar feeds memory to a merge mem and to the - // same store (both via a single Memory proj node) - // - // v) the store outputs to the merge mem - // - // vi) the merge mem outputs to the same volatile membar - // - // n.b. if this is an inlined unsafe node then the release membar - // may feed its control and memory links via an intervening cpuorder - // membar. this case can be dealt with when we check the release - // membar projections. if they both feed a single cpuorder membar - // node continue to make the same checks as above but with the - // cpuorder membar substituted for the release membar. if they don't - // both feed a cpuorder membar then the check fails. - // - // n.b.b. for an inlined unsafe store of an object in the case where - // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see - // an embedded if then else where we expect the store. this is - // needed to do the right type of store depending on whether - // heap_base is NULL. We could check for that but for now we can - // just take the hit of on inserting a redundant dmb for this - // redundant volatile membar - - MemBarNode *barrier = n->as_MemBar(); - ProjNode *ctl; - ProjNode *mem; - // check for an intervening cpuorder membar - MemBarNode *b = has_child_membar(barrier, ctl, mem); - if (b && b->Opcode() == Op_MemBarCPUOrder) { - // ok, so start form the dependent cpuorder barrier - barrier = b; - } - // check the ctl and mem flow - ctl = barrier->proj_out(TypeFunc::Control); - mem = barrier->proj_out(TypeFunc::Memory); - - // the barrier needs to have both a Ctl and Mem projection - if (! ctl || ! mem) + MemBarNode *mbvol = n->as_MemBar(); + + // first we check if this is part of a card mark. if so then we have + // to generate a StoreLoad barrier + + if (is_card_mark_membar(mbvol)) + return false; + + // ok, if it's not a card mark then we still need to check if it is + // a trailing membar of a volatile put hgraph. + + return (trailing_to_leading(mbvol) != NULL); +} + +// predicates controlling emit of str/stlr and associated dmbs + +bool needs_releasing_store(const Node *n) +{ + // assert n->is_Store(); + if (UseBarriersForVolatile) + // we use a normal store and dmb combination return false; - Node *x = NULL; - Node *mbvol = NULL; - StoreNode * st = NULL; - - // For a normal volatile write the Ctl ProjNode should have output - // to a MemBarVolatile and a Store marked as releasing - // - // n.b. for an inlined unsafe store of an object in the case where - // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see - // an embedded if then else where we expect the store. this is - // needed to do the right type of store depending on whether - // heap_base is NULL. We could check for that case too but for now - // we can just take the hit of inserting a dmb and a non-volatile - // store to implement the volatile store - - for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { - x = ctl->fast_out(i); - if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { - if (mbvol) { - return false; - } - mbvol = x; - } else if (x->is_Store()) { - st = x->as_Store(); - if (! st->is_release()) { - return false; - } - } else if (!x->is_Mach()) { - // we may see mach nodes added during matching but nothing else - return false; - } - } - - if (!mbvol || !st) - return false; - - // the Mem ProjNode should output to a MergeMem and the same Store - Node *mm = NULL; - for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { - x = mem->fast_out(i); - if (!mm && x->is_MergeMem()) { - mm = x; - } else if (x != st && !x->is_Mach()) { - // we may see mach nodes added during matching but nothing else - return false; - } - } - - if (!mm) + StoreNode *st = n->as_Store(); + + // the store must be marked as releasing + if (!st->is_release()) return false; - // the MergeMem should output to the MemBarVolatile - for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { - x = mm->fast_out(i); - if (x != mbvol && !x->is_Mach()) { - // we may see mach nodes added during matching but nothing else - return false; - } - } - - return true; -} - -bool unnecessary_volatile(const Node *n) { - // assert n->is_MemBar(); - if (UseBarriersForVolatile) - // we need to plant a dmb - return false; - - // ok, so we can omit this volatile barrier if it has been inserted - // as part of a volatile store sequence - // - // MemBarRelease - // { || } - // {MemBarCPUOrder} -- optional - // || \\ - // || StoreX[mo_release] - // | \ / - // | MergeMem - // | / - // MemBarVolatile - // - // where - // || and \\ represent Ctl and Mem feeds via Proj nodes - // | \ and / indicate further routing of the Ctl and Mem feeds - // - // we need to check that - // - // i) the volatile membar gets its control feed from a release - // membar (or its dependent cpuorder membar) via a Control project - // node - // - // ii) the release membar (or its dependent cpuorder membar) also - // feeds control to a store node via the same proj node - // - // iii) the store is ordered release - // - // iv) the release membar (or its dependent cpuorder membar) feeds - // memory to a merge mem and to the same store (both via a single - // Memory proj node) - // - // v) the store outputs to the merge mem - // - // vi) the merge mem outputs to the volatile membar - // - // n.b. for an inlined unsafe store of an object in the case where - // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see - // an embedded if then else where we expect the store. this is - // needed to do the right type of store depending on whether - // heap_base is NULL. We could check for that but for now we can - // just take the hit of on inserting a redundant dmb for this - // redundant volatile membar - - MemBarNode *mbvol = n->as_MemBar(); - Node *x = n->lookup(TypeFunc::Control); + // the store must be fed by a membar + + Node *x = st->lookup(StoreNode::Memory); if (! x || !x->is_Proj()) return false; @@ -1659,200 +2367,78 @@ MemBarNode *barrier = x->as_MemBar(); - // if the barrier is a release membar we have what we want. if it is - // a cpuorder membar then we need to ensure that it is fed by a - // release membar in which case we proceed to check the graph below - // this cpuorder membar as the feed - - if (x->Opcode() != Op_MemBarRelease) { - if (x->Opcode() != Op_MemBarCPUOrder) - return false; - ProjNode *ctl; - ProjNode *mem; - MemBarNode *b = has_parent_membar(x, ctl, mem); - if (!b || !b->Opcode() == Op_MemBarRelease) - return false; - } - - ProjNode *ctl = barrier->proj_out(TypeFunc::Control); - ProjNode *mem = barrier->proj_out(TypeFunc::Memory); - - // barrier needs to have both a Ctl and Mem projection - // and we need to have reached it via the Ctl projection - if (! ctl || ! mem || ctl != proj) - return false; - - StoreNode * st = NULL; - - // The Ctl ProjNode should have output to a MemBarVolatile and - // a Store marked as releasing - for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { - x = ctl->fast_out(i); - if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { - if (x != mbvol) { - return false; - } - } else if (x->is_Store()) { - st = x->as_Store(); - if (! st->is_release()) { - return false; - } - } else if (!x->is_Mach()){ - // we may see mach nodes added during matching but nothing else - return false; - } - } - - if (!st) - return false; - - // the Mem ProjNode should output to a MergeMem and the same Store - Node *mm = NULL; - for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { - x = mem->fast_out(i); - if (!mm && x->is_MergeMem()) { - mm = x; - } else if (x != st && !x->is_Mach()) { - // we may see mach nodes added during matching but nothing else - return false; - } - } - - if (!mm) - return false; - - // the MergeMem should output to the MemBarVolatile - for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { - x = mm->fast_out(i); - if (x != mbvol && !x->is_Mach()) { - // we may see mach nodes added during matching but nothing else - return false; - } - } - - return true; -} - - - -bool needs_releasing_store(const Node *n) -{ - // assert n->is_Store(); - if (UseBarriersForVolatile) - // we use a normal store and dmb combination + // if the barrier is a release membar or a cpuorder mmebar fed by a + // release membar then we need to check whether that forms part of a + // volatile put graph. + + // reject invalid candidates + if (!leading_membar(barrier)) return false; - StoreNode *st = n->as_Store(); - - if (!st->is_release()) - return false; - - // check if this store is bracketed by a release (or its dependent - // cpuorder membar) and a volatile membar - // - // MemBarRelease - // { || } - // {MemBarCPUOrder} -- optional - // || \\ - // || StoreX[mo_release] - // | \ / - // | MergeMem - // | / - // MemBarVolatile - // - // where - // || and \\ represent Ctl and Mem feeds via Proj nodes - // | \ and / indicate further routing of the Ctl and Mem feeds - // - - - Node *x = st->lookup(TypeFunc::Control); - - if (! x || !x->is_Proj()) - return false; - - ProjNode *proj = x->as_Proj(); - - x = proj->lookup(0); - - if (!x || !x->is_MemBar()) - return false; - - MemBarNode *barrier = x->as_MemBar(); - - // if the barrier is a release membar we have what we want. if it is - // a cpuorder membar then we need to ensure that it is fed by a - // release membar in which case we proceed to check the graph below - // this cpuorder membar as the feed - - if (x->Opcode() != Op_MemBarRelease) { - if (x->Opcode() != Op_MemBarCPUOrder) - return false; - Node *ctl = x->lookup(TypeFunc::Control); - Node *mem = x->lookup(TypeFunc::Memory); - if (!ctl || !ctl->is_Proj() || !mem || !mem->is_Proj()) - return false; - x = ctl->lookup(0); - if (!x || !x->is_MemBar() || !x->Opcode() == Op_MemBarRelease) - return false; - Node *y = mem->lookup(0); - if (!y || y != x) - return false; - } - - ProjNode *ctl = barrier->proj_out(TypeFunc::Control); - ProjNode *mem = barrier->proj_out(TypeFunc::Memory); - - // MemBarRelease needs to have both a Ctl and Mem projection - // and we need to have reached it via the Ctl projection - if (! ctl || ! mem || ctl != proj) - return false; - - MemBarNode *mbvol = NULL; - - // The Ctl ProjNode should have output to a MemBarVolatile and - // a Store marked as releasing - for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { - x = ctl->fast_out(i); - if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { - mbvol = x->as_MemBar(); - } else if (x->is_Store()) { - if (x != st) { - return false; - } - } else if (!x->is_Mach()){ - return false; - } - } + // does this lead a normal subgraph? + MemBarNode *mbvol = leading_to_normal(barrier); if (!mbvol) return false; - // the Mem ProjNode should output to a MergeMem and the same Store - Node *mm = NULL; - for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { - x = mem->fast_out(i); - if (!mm && x->is_MergeMem()) { - mm = x; - } else if (x != st && !x->is_Mach()) { - return false; - } - } - - if (!mm) + // all done unless this is a card mark + if (!is_card_mark_membar(mbvol)) + return true; + + // we found a card mark -- just make sure we have a trailing barrier + + return (card_mark_to_trailing(mbvol) != NULL); +} + +// predicate controlling translation of StoreCM +// +// returns true if a StoreStore must precede the card write otherwise +// false + +bool unnecessary_storestore(const Node *storecm) +{ + assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); + + // we only ever need to generate a dmb ishst between an object put + // and the associated card mark when we are using CMS without + // conditional card marking + + if (!UseConcMarkSweepGC || UseCondCardMark) + return true; + + // if we are implementing volatile puts using barriers then the + // object put as an str so we must insert the dmb ishst + + if (UseBarriersForVolatile) return false; - // the MergeMem should output to the MemBarVolatile - for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { - x = mm->fast_out(i); - if (x != mbvol && !x->is_Mach()) { - return false; - } - } - - return true; -} - + // we can omit the dmb ishst if this StoreCM is part of a volatile + // put because in thta case the put will be implemented by stlr + // + // we need to check for a normal subgraph feeding this StoreCM. + // that means the StoreCM must be fed Memory from a leading membar, + // either a MemBarRelease or its dependent MemBarCPUOrder, and the + // leading membar must be part of a normal subgraph + + Node *x = storecm->in(StoreNode::Memory); + + if (!x->is_Proj()) + return false; + + x = x->in(0); + + if (!x->is_MemBar()) + return false; + + MemBarNode *leading = x->as_MemBar(); + + // reject invalid candidates + if (!leading_membar(leading)) + return false; + + // we can omit the StoreStore if it is the head of a normal subgraph + return (leading_to_normal(leading) != NULL); +} #define __ _masm. @@ -2944,6 +3530,13 @@ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} + enc_class aarch64_enc_strb0_ordered(memory mem) %{ + MacroAssembler _masm(&cbuf); + __ membar(Assembler::StoreStore); + loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + enc_class aarch64_enc_strh(iRegI src, memory mem) %{ Register src_reg = as_Register($src$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(), @@ -6613,6 +7206,7 @@ instruct storeimmCM0(immI0 zero, memory mem) %{ match(Set mem (StoreCM mem zero)); + predicate(unnecessary_storestore(n)); ins_cost(INSN_COST); format %{ "strb zr, $mem\t# byte" %} @@ -6622,6 +7216,21 @@ ins_pipe(istore_mem); %} +// Store CMS card-mark Immediate with intervening StoreStore +// needed when using CMS with no conditional card marking +instruct storeimmCM0_ordered(immI0 zero, memory mem) +%{ + match(Set mem (StoreCM mem zero)); + + ins_cost(INSN_COST * 2); + format %{ "dmb ishst" + "\n\tstrb zr, $mem\t# byte" %} + + ins_encode(aarch64_enc_strb0_ordered(mem)); + + ins_pipe(istore_mem); +%} + // Store Byte instruct storeB(iRegIorL2I src, memory mem) %{ @@ -6643,7 +7252,7 @@ predicate(!needs_releasing_store(n)); ins_cost(INSN_COST); - format %{ "strb zr, $mem\t# byte" %} + format %{ "strb rscractch2, $mem\t# byte" %} ins_encode(aarch64_enc_strb0(mem)); @@ -7396,6 +8005,7 @@ format %{ "membar_acquire" %} ins_encode %{ + __ block_comment("membar_acquire"); __ membar(Assembler::LoadLoad|Assembler::LoadStore); %} @@ -7448,6 +8058,7 @@ format %{ "membar_release" %} ins_encode %{ + __ block_comment("membar_release"); __ membar(Assembler::LoadStore|Assembler::StoreStore); %} ins_pipe(pipe_serial); @@ -7499,6 +8110,7 @@ format %{ "membar_volatile" %} ins_encode %{ + __ block_comment("membar_volatile"); __ membar(Assembler::StoreLoad); %} @@ -9429,7 +10041,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9465,7 +10077,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::ASR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9501,7 +10113,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9537,7 +10149,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9573,7 +10185,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::ASR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9609,7 +10221,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9645,7 +10257,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9681,7 +10293,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::ASR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9717,7 +10329,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9754,7 +10366,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9792,7 +10404,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::ASR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9830,7 +10442,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9868,7 +10480,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9906,7 +10518,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::ASR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9944,7 +10556,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -9982,7 +10594,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -10020,7 +10632,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::ASR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -10058,7 +10670,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -10096,7 +10708,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -10134,7 +10746,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::ASR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -10172,7 +10784,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -10210,7 +10822,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -10248,7 +10860,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::ASR, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); @@ -10286,7 +10898,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::LSL, - $src3$$constant & 0x3f); + $src3$$constant & 0x1f); %} ins_pipe(ialu_reg_reg_shift); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/aarch64/vm/aarch64_ad.m4 --- a/hotspot/src/cpu/aarch64/vm/aarch64_ad.m4 Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/aarch64_ad.m4 Thu Sep 03 16:14:02 2015 -0700 @@ -42,7 +42,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::$5, - $src3$$constant & 0x3f); + $src3$$constant & ifelse($1,I,0x1f,0x3f)); %} ins_pipe(ialu_reg_reg_shift); @@ -87,7 +87,7 @@ as_Register($src1$$reg), as_Register($src2$$reg), Assembler::$5, - $src3$$constant & 0x3f); + $src3$$constant & ifelse($1,I,0x1f,0x3f)); %} ins_pipe(ialu_reg_reg_shift); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -268,7 +268,7 @@ __ ldar(r21, r28); // ldar x21, [x28] // LoadStoreExclusiveOp - __ stxrw(r24, r24, r7); // stxr w24, w24, [x7] + __ stxrw(r21, r24, r7); // stxr w21, w24, [x7] __ stlxrw(r21, r26, r28); // stlxr w21, w26, [x28] __ ldxrw(r21, r6); // ldxr w21, [x6] __ ldaxrw(r15, r30); // ldaxr w15, [x30] @@ -299,7 +299,7 @@ // LoadStoreExclusiveOp __ ldxpw(r25, r4, r22); // ldxp w25, w4, [x22] - __ ldaxpw(r14, r14, r15); // ldaxp w14, w14, [x15] + __ ldaxpw(r13, r14, r15); // ldaxp w13, w14, [x15] __ stxpw(r20, r26, r8, r10); // stxp w20, w26, w8, [x10] __ stlxpw(r23, r18, r18, r18); // stlxp w23, w18, w18, [x18] @@ -773,7 +773,7 @@ 260: c85fffbb ldaxr x27, [x29] 264: c89fffa0 stlr x0, [x29] 268: c8dfff95 ldar x21, [x28] - 26c: 88187cf8 stxr w24, w24, [x7] + 26c: 88157cf8 stxr w21, w24, [x7] 270: 8815ff9a stlxr w21, w26, [x28] 274: 885f7cd5 ldxr w21, [x6] 278: 885fffcf ldaxr w15, [x30] @@ -796,7 +796,7 @@ 2bc: c82870bb stxp w8, x27, x28, [x5] 2c0: c825b8c8 stlxp w5, x8, x14, [x6] 2c4: 887f12d9 ldxp w25, w4, [x22] - 2c8: 887fb9ee ldaxp w14, w14, [x15] + 2c8: 887fb9ed ldaxp w13, w14, [x15] 2cc: 8834215a stxp w20, w26, w8, [x10] 2d0: 8837ca52 stlxp w23, w18, w18, [x18] 2d4: f806317e str x30, [x11,#99] @@ -1085,13 +1085,13 @@ 0xd444c320, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, 0xd5033fdf, 0xd5033f9f, 0xd5033abf, 0xd61f0040, 0xd63f00a0, 0xc8147c55, 0xc805fcfd, 0xc85f7e05, - 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88187cf8, + 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88157cf8, 0x8815ff9a, 0x885f7cd5, 0x885fffcf, 0x889ffc73, 0x88dffc56, 0x48127c0f, 0x480bff85, 0x485f7cdd, 0x485ffcf2, 0x489fff99, 0x48dffe62, 0x080a7c3e, 0x0814fed5, 0x085f7c59, 0x085ffcb8, 0x089ffc70, 0x08dfffb6, 0xc87f0a68, 0xc87fcdc7, 0xc82870bb, - 0xc825b8c8, 0x887f12d9, 0x887fb9ee, 0x8834215a, + 0xc825b8c8, 0x887f12d9, 0x887fb9ed, 0x8834215a, 0x8837ca52, 0xf806317e, 0xb81b3337, 0x39000dc2, 0x78005149, 0xf84391f4, 0xb85b220c, 0x385fd356, 0x785d127e, 0x389f4149, 0x79801e3c, 0x79c014a3, diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp --- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -1106,13 +1106,13 @@ #define INSN4(NAME, sz, op, o0) /* Four registers */ \ void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) { \ - assert(Rs != Rn, "unpredictable instruction"); \ + guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \ load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0); \ } #define INSN3(NAME, sz, op, o0) /* Three registers */ \ void NAME(Register Rs, Register Rt, Register Rn) { \ - assert(Rs != Rn, "unpredictable instruction"); \ + guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \ } @@ -1124,6 +1124,7 @@ #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \ void NAME(Register Rt1, Register Rt2, Register Rn) { \ + guarantee(Rt1 != Rt2, "unpredictable instruction"); \ load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0); \ } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -611,6 +611,7 @@ Label done; const Register swap_reg = r0; + const Register tmp = c_rarg2; const Register obj_reg = c_rarg3; // Will contain the oop const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); @@ -624,7 +625,7 @@ ldr(obj_reg, Address(lock_reg, obj_offset)); if (UseBiasedLocking) { - biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, done, &slow_case); + biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); } // Load (object->mark() | 1) into swap_reg @@ -643,7 +644,7 @@ cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail); bind(fast); atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), - rscratch2, rscratch1); + rscratch2, rscratch1, tmp); b(done); bind(fail); } else { @@ -671,7 +672,7 @@ if (PrintBiasedLockingStatistics) { br(Assembler::NE, slow_case); atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), - rscratch2, rscratch1); + rscratch2, rscratch1, tmp); } br(Assembler::EQ, done); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -34,6 +34,7 @@ #include "memory/resourceArea.hpp" #include "nativeInst_aarch64.hpp" #include "oops/klass.inline.hpp" +#include "oops/oop.inline.hpp" #include "opto/compile.hpp" #include "opto/node.hpp" #include "runtime/biasedLocking.hpp" @@ -398,11 +399,7 @@ if (PrintBiasedLockingStatistics && counters == NULL) counters = BiasedLocking::counters(); - bool need_tmp_reg = false; - if (tmp_reg == noreg) { - tmp_reg = rscratch2; - } - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1); + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1, rscratch2, noreg); assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); @@ -432,7 +429,7 @@ if (counters != NULL) { Label around; cbnz(tmp_reg, around); - atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1); + atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1, rscratch2); b(done); bind(around); } else { @@ -485,7 +482,7 @@ bind(here); if (counters != NULL) { atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), - tmp_reg, rscratch1); + tmp_reg, rscratch1, rscratch2); } } b(done); @@ -511,7 +508,7 @@ bind(here); if (counters != NULL) { atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), - tmp_reg, rscratch1); + tmp_reg, rscratch1, rscratch2); } } b(done); @@ -539,7 +536,7 @@ // removing the bias bit from the object's header. if (counters != NULL) { atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, - rscratch1); + rscratch1, rscratch2); } bind(nope); } @@ -1640,15 +1637,15 @@ return Address(Rd); } -void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { +void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) { Label retry_load; bind(retry_load); // flush and load exclusive from the memory location ldxrw(tmp, counter_addr); addw(tmp, tmp, 1); // if we store+flush with no intervening write tmp wil be zero - stxrw(tmp, tmp, counter_addr); - cbnzw(tmp, retry_load); + stxrw(tmp2, tmp, counter_addr); + cbnzw(tmp2, retry_load); } @@ -2021,6 +2018,14 @@ } } +void MacroAssembler::subw(Register Rd, Register Rn, RegisterOrConstant decrement) { + if (decrement.is_register()) { + subw(Rd, Rn, decrement.as_register()); + } else { + subw(Rd, Rn, decrement.as_constant()); + } +} + void MacroAssembler::reinit_heapbase() { if (UseCompressedOops) { @@ -2110,7 +2115,7 @@ return a != b.as_register() && a != c && b.as_register() != c; } -#define ATOMIC_OP(LDXR, OP, STXR) \ +#define ATOMIC_OP(LDXR, OP, IOP, STXR) \ void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \ Register result = rscratch2; \ if (prev->is_valid()) \ @@ -2120,14 +2125,15 @@ bind(retry_load); \ LDXR(result, addr); \ OP(rscratch1, result, incr); \ - STXR(rscratch1, rscratch1, addr); \ - cbnzw(rscratch1, retry_load); \ - if (prev->is_valid() && prev != result) \ - mov(prev, result); \ + STXR(rscratch2, rscratch1, addr); \ + cbnzw(rscratch2, retry_load); \ + if (prev->is_valid() && prev != result) { \ + IOP(prev, rscratch1, incr); \ + } \ } -ATOMIC_OP(ldxr, add, stxr) -ATOMIC_OP(ldxrw, addw, stxrw) +ATOMIC_OP(ldxr, add, sub, stxr) +ATOMIC_OP(ldxrw, addw, subw, stxrw) #undef ATOMIC_OP diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -107,9 +107,7 @@ // Biased locking support // lock_reg and obj_reg must be loaded up with the appropriate values. // swap_reg is killed. - // tmp_reg is optional. If it is supplied (i.e., != noreg) it will - // be killed; if not supplied, push/pop will be used internally to - // allocate a temporary (inefficient, avoid if possible). + // tmp_reg must be supplied and must not be rscratch1 or rscratch2 // Optional slow case is for implementations (interpreter and C1) which branch to // slow case directly. Leaves condition codes set for C2's Fast_Lock node. // Returns offset of first potentially-faulting instruction for null @@ -126,10 +124,10 @@ // Helper functions for statistics gathering. // Unconditional atomic increment. - void atomic_incw(Register counter_addr, Register tmp); - void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { + void atomic_incw(Register counter_addr, Register tmp, Register tmp2); + void atomic_incw(Address counter_addr, Register tmp1, Register tmp2, Register tmp3) { lea(tmp1, counter_addr); - atomic_incw(tmp1, tmp2); + atomic_incw(tmp1, tmp2, tmp3); } // Load Effective Address void lea(Register r, const Address &a) { @@ -1057,6 +1055,7 @@ void add(Register Rd, Register Rn, RegisterOrConstant increment); void addw(Register Rd, Register Rn, RegisterOrConstant increment); void sub(Register Rd, Register Rn, RegisterOrConstant decrement); + void subw(Register Rd, Register Rn, RegisterOrConstant decrement); void adrp(Register reg1, const Address &dest, unsigned long &byte_offset); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1774,6 +1774,7 @@ const Register obj_reg = r19; // Will contain the oop const Register lock_reg = r13; // Address of compiler lock object (BasicLock) const Register old_hdr = r13; // value of old header at unlock time + const Register tmp = c_rarg3; Label slow_path_lock; Label lock_done; @@ -1795,7 +1796,7 @@ __ ldr(obj_reg, Address(oop_handle_reg, 0)); if (UseBiasedLocking) { - __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, lock_done, &slow_path_lock); + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); } // Load (object->mark() | 1) into swap_reg %r0 diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1913,15 +1913,18 @@ } void TemplateInterpreterGenerator::count_bytecode() { + Register rscratch3 = r0; __ push(rscratch1); __ push(rscratch2); + __ push(rscratch3); Label L; __ mov(rscratch2, (address) &BytecodeCounter::_counter_value); __ bind(L); __ ldxr(rscratch1, rscratch2); __ add(rscratch1, rscratch1, 1); - __ stxr(rscratch1, rscratch1, rscratch2); - __ cbnzw(rscratch1, L); + __ stxr(rscratch3, rscratch1, rscratch2); + __ cbnzw(rscratch3, L); + __ pop(rscratch3); __ pop(rscratch2); __ pop(rscratch1); } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/x86/vm/assembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1674,6 +1674,13 @@ emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, true); } +void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true); + emit_int8(0x2A); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); @@ -6604,13 +6611,6 @@ emit_operand(dst, src); } -void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true); - emit_int8(0x2A); - emit_int8((unsigned char)(0xC0 | encode)); -} - void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); if (VM_Version::supports_evex()) { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/x86/vm/interp_masm_x86.cpp --- a/hotspot/src/cpu/x86/vm/interp_masm_x86.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -355,8 +355,8 @@ case ctos: // fall through case stos: // fall through case itos: movl(rax, val_addr); break; - case ftos: movflt(xmm0, val_addr); break; - case dtos: movdbl(xmm0, val_addr); break; + case ftos: load_float(val_addr); break; + case dtos: load_double(val_addr); break; case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } @@ -376,8 +376,8 @@ case ctos: // fall through case stos: // fall through case itos: movl(rax, val_addr); break; - case ftos: fld_s(val_addr); break; - case dtos: fld_d(val_addr); break; + case ftos: load_float(val_addr); break; + case dtos: load_double(val_addr); break; case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } @@ -578,6 +578,26 @@ push(r); } +void InterpreterMacroAssembler::push_f(XMMRegister r) { + subptr(rsp, wordSize); + movflt(Address(rsp, 0), r); +} + +void InterpreterMacroAssembler::pop_f(XMMRegister r) { + movflt(r, Address(rsp, 0)); + addptr(rsp, wordSize); +} + +void InterpreterMacroAssembler::push_d(XMMRegister r) { + subptr(rsp, 2 * wordSize); + movdbl(Address(rsp, 0), r); +} + +void InterpreterMacroAssembler::pop_d(XMMRegister r) { + movdbl(r, Address(rsp, 0)); + addptr(rsp, 2 * Interpreter::stackElementSize); +} + #ifdef _LP64 void InterpreterMacroAssembler::pop_i(Register r) { // XXX can't use pop currently, upper half non clean @@ -590,31 +610,11 @@ addptr(rsp, 2 * Interpreter::stackElementSize); } -void InterpreterMacroAssembler::pop_f(XMMRegister r) { - movflt(r, Address(rsp, 0)); - addptr(rsp, wordSize); -} - -void InterpreterMacroAssembler::pop_d(XMMRegister r) { - movdbl(r, Address(rsp, 0)); - addptr(rsp, 2 * Interpreter::stackElementSize); -} - void InterpreterMacroAssembler::push_l(Register r) { subptr(rsp, 2 * wordSize); movq(Address(rsp, 0), r); } -void InterpreterMacroAssembler::push_f(XMMRegister r) { - subptr(rsp, wordSize); - movflt(Address(rsp, 0), r); -} - -void InterpreterMacroAssembler::push_d(XMMRegister r) { - subptr(rsp, 2 * wordSize); - movdbl(Address(rsp, 0), r); -} - void InterpreterMacroAssembler::pop(TosState state) { switch (state) { case atos: pop_ptr(); break; @@ -623,8 +623,8 @@ case stos: case itos: pop_i(); break; case ltos: pop_l(); break; - case ftos: pop_f(); break; - case dtos: pop_d(); break; + case ftos: pop_f(xmm0); break; + case dtos: pop_d(xmm0); break; case vtos: /* nothing to do */ break; default: ShouldNotReachHere(); } @@ -640,8 +640,8 @@ case stos: case itos: push_i(); break; case ltos: push_l(); break; - case ftos: push_f(); break; - case dtos: push_d(); break; + case ftos: push_f(xmm0); break; + case dtos: push_d(xmm0); break; case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } @@ -675,8 +675,20 @@ case stos: // fall through case itos: pop_i(rax); break; case ltos: pop_l(rax, rdx); break; - case ftos: pop_f(); break; - case dtos: pop_d(); break; + case ftos: + if (UseSSE >= 1) { + pop_f(xmm0); + } else { + pop_f(); + } + break; + case dtos: + if (UseSSE >= 2) { + pop_d(xmm0); + } else { + pop_d(); + } + break; case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } @@ -695,7 +707,7 @@ fstp_s(Address(rsp, 0)); } -void InterpreterMacroAssembler::push_d(Register r) { +void InterpreterMacroAssembler::push_d() { // Do not schedule for no AGI! Never write beyond rsp! subptr(rsp, 2 * wordSize); fstp_d(Address(rsp, 0)); @@ -711,8 +723,20 @@ case stos: // fall through case itos: push_i(rax); break; case ltos: push_l(rax, rdx); break; - case ftos: push_f(); break; - case dtos: push_d(rax); break; + case ftos: + if (UseSSE >= 1) { + push_f(xmm0); + } else { + push_f(); + } + break; + case dtos: + if (UseSSE >= 2) { + push_d(xmm0); + } else { + push_d(); + } + break; case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } @@ -995,22 +1019,6 @@ leave(); // remove frame anchor pop(ret_addr); // get return address mov(rsp, rbx); // set sp to sender sp -#ifndef _LP64 - if (UseSSE) { - // float and double are returned in xmm register in SSE-mode - if (state == ftos && UseSSE >= 1) { - subptr(rsp, wordSize); - fstp_s(Address(rsp, 0)); - movflt(xmm0, Address(rsp, 0)); - addptr(rsp, wordSize); - } else if (state == dtos && UseSSE >= 2) { - subptr(rsp, 2*wordSize); - fstp_d(Address(rsp, 0)); - movdbl(xmm0, Address(rsp, 0)); - addptr(rsp, 2*wordSize); - } - } -#endif // _LP64 } #endif // !CC_INTERP @@ -1783,7 +1791,10 @@ void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { #ifndef _LP64 - if (state == ftos || state == dtos) MacroAssembler::verify_FPU(stack_depth); + if ((state == ftos && UseSSE < 1) || + (state == dtos && UseSSE < 2)) { + MacroAssembler::verify_FPU(stack_depth); + } #endif } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/x86/vm/interp_masm_x86.hpp --- a/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -140,20 +140,20 @@ void push_ptr(Register r = rax); void push_i(Register r = rax); + void push_f(XMMRegister r); + void pop_f(XMMRegister r); + void pop_d(XMMRegister r); + void push_d(XMMRegister r); #ifdef _LP64 void pop_l(Register r = rax); - void pop_f(XMMRegister r = xmm0); - void pop_d(XMMRegister r = xmm0); void push_l(Register r = rax); - void push_f(XMMRegister r = xmm0); - void push_d(XMMRegister r = xmm0); #else void pop_l(Register lo = rax, Register hi = rdx); void pop_f(); void pop_d(); void push_l(Register lo = rax, Register hi = rdx); - void push_d(Register r = rax); + void push_d(); void push_f(); #endif // _LP64 diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp --- a/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -42,6 +42,12 @@ address generate_Reference_get_entry(); address generate_CRC32_update_entry(); address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); +#ifndef _LP64 + address generate_Float_intBitsToFloat_entry(); + address generate_Float_floatToRawIntBits_entry(); + address generate_Double_longBitsToDouble_entry(); + address generate_Double_doubleToRawLongBits_entry(); +#endif void lock_method(void); void generate_stack_overflow_check(void); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -3314,6 +3314,42 @@ fincstp(); } +void MacroAssembler::load_float(Address src) { + if (UseSSE >= 1) { + movflt(xmm0, src); + } else { + LP64_ONLY(ShouldNotReachHere()); + NOT_LP64(fld_s(src)); + } +} + +void MacroAssembler::store_float(Address dst) { + if (UseSSE >= 1) { + movflt(dst, xmm0); + } else { + LP64_ONLY(ShouldNotReachHere()); + NOT_LP64(fstp_s(dst)); + } +} + +void MacroAssembler::load_double(Address src) { + if (UseSSE >= 2) { + movdbl(xmm0, src); + } else { + LP64_ONLY(ShouldNotReachHere()); + NOT_LP64(fld_d(src)); + } +} + +void MacroAssembler::store_double(Address dst) { + if (UseSSE >= 2) { + movdbl(dst, xmm0); + } else { + LP64_ONLY(ShouldNotReachHere()); + NOT_LP64(fstp_d(dst)); + } +} + void MacroAssembler::fremr(Register tmp) { save_rax(tmp); { Label L; diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -471,6 +471,22 @@ // Pop ST (ffree & fincstp combined) void fpop(); + // Load float value from 'address'. If UseSSE >= 1, the value is loaded into + // register xmm0. Otherwise, the value is loaded onto the FPU stack. + void load_float(Address src); + + // Store float value to 'address'. If UseSSE >= 1, the value is stored + // from register xmm0. Otherwise, the value is stored from the FPU stack. + void store_float(Address dst); + + // Load double value from 'address'. If UseSSE >= 2, the value is loaded into + // register xmm0. Otherwise, the value is loaded onto the FPU stack. + void load_double(Address src); + + // Store double value to 'address'. If UseSSE >= 2, the value is stored + // from register xmm0. Otherwise, the value is stored from the FPU stack. + void store_double(Address dst); + // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack void push_fTOS(); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp --- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -170,22 +170,12 @@ __ MacroAssembler::verify_FPU(0, "generate_return_entry_for compiled"); } - // In SSE mode, interpreter returns FP results in xmm0 but they need - // to end up back on the FPU so it can operate on them. - if (state == ftos && UseSSE >= 1) { - __ subptr(rsp, wordSize); - __ movflt(Address(rsp, 0), xmm0); - __ fld_s(Address(rsp, 0)); - __ addptr(rsp, wordSize); - } else if (state == dtos && UseSSE >= 2) { - __ subptr(rsp, 2*wordSize); - __ movdbl(Address(rsp, 0), xmm0); - __ fld_d(Address(rsp, 0)); - __ addptr(rsp, 2*wordSize); + if (state == ftos) { + __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_return_entry_for in interpreter"); + } else if (state == dtos) { + __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_return_entry_for in interpreter"); } - __ MacroAssembler::verify_FPU(state == ftos || state == dtos ? 1 : 0, "generate_return_entry_for in interpreter"); - // Restore stack bottom in case i2c adjusted stack __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); // and NULL it as marker that rsp is now tos until next java call @@ -217,21 +207,12 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) { address entry = __ pc(); - // In SSE mode, FP results are in xmm0 - if (state == ftos && UseSSE > 0) { - __ subptr(rsp, wordSize); - __ movflt(Address(rsp, 0), xmm0); - __ fld_s(Address(rsp, 0)); - __ addptr(rsp, wordSize); - } else if (state == dtos && UseSSE >= 2) { - __ subptr(rsp, 2*wordSize); - __ movdbl(Address(rsp, 0), xmm0); - __ fld_d(Address(rsp, 0)); - __ addptr(rsp, 2*wordSize); + if (state == ftos) { + __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_deopt_entry_for in interpreter"); + } else if (state == dtos) { + __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_deopt_entry_for in interpreter"); } - __ MacroAssembler::verify_FPU(state == ftos || state == dtos ? 1 : 0, "generate_deopt_entry_for in interpreter"); - // The stack is not extended by deopt but we must NULL last_sp as this // entry is like a "return". __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); @@ -735,7 +716,7 @@ if (UseCRC32Intrinsics) { address entry = __ pc(); - // rbx,: Method* + // rbx: Method* // rsi: senderSP must preserved for slow path, set SP to it on fast path // rdx: scratch // rdi: scratch @@ -841,6 +822,124 @@ return generate_native_entry(false); } +/** + * Method entry for static native method: + * java.lang.Float.intBitsToFloat(int bits) + */ +address InterpreterGenerator::generate_Float_intBitsToFloat_entry() { + address entry; + + if (UseSSE >= 1) { + entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load 'bits' into xmm0 (interpreter returns results in xmm0) + __ movflt(xmm0, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + } else { + entry = generate_native_entry(false); + } + + return entry; +} + +/** + * Method entry for static native method: + * java.lang.Float.floatToRawIntBits(float value) + */ +address InterpreterGenerator::generate_Float_floatToRawIntBits_entry() { + address entry; + + if (UseSSE >= 1) { + entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load the parameter (a floating-point value) into rax. + __ movl(rax, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + } else { + entry = generate_native_entry(false); + } + + return entry; +} + + +/** + * Method entry for static native method: + * java.lang.Double.longBitsToDouble(long bits) + */ +address InterpreterGenerator::generate_Double_longBitsToDouble_entry() { + address entry; + + if (UseSSE >= 2) { + entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load 'bits' into xmm0 (interpreter returns results in xmm0) + __ movdbl(xmm0, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + } else { + entry = generate_native_entry(false); + } + + return entry; +} + +/** + * Method entry for static native method: + * java.lang.Double.doubleToRawLongBits(double value) + */ +address InterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { + address entry; + + if (UseSSE >= 2) { + entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load the parameter (a floating-point value) into rax. + __ movl(rdx, Address(rsp, 2*wordSize)); + __ movl(rax, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + } else { + entry = generate_native_entry(false); + } + + return entry; +} + // // Interpreter stub for calling a native method. (asm interpreter) // This sets up a somewhat different looking stack for calling the native method @@ -1090,7 +1189,7 @@ double_handler.addr()); __ jcc(Assembler::notEqual, L); __ bind(push_double); - __ push(dtos); + __ push_d(); // FP values are returned using the FPU, so push FPU contents (even if UseSSE > 0). __ bind(L); } __ push(ltos); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp --- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1707,10 +1707,10 @@ address& vep) { assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); Label L; - aep = __ pc(); __ push_ptr(); __ jmp(L); - fep = __ pc(); __ push_f(); __ jmp(L); - dep = __ pc(); __ push_d(); __ jmp(L); - lep = __ pc(); __ push_l(); __ jmp(L); + aep = __ pc(); __ push_ptr(); __ jmp(L); + fep = __ pc(); __ push_f(xmm0); __ jmp(L); + dep = __ pc(); __ push_d(xmm0); __ jmp(L); + lep = __ pc(); __ push_l(); __ jmp(L); bep = cep = sep = iep = __ pc(); __ push_i(); vep = __ pc(); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/cpu/x86/vm/templateTable_x86.cpp --- a/hotspot/src/cpu/x86/vm/templateTable_x86.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/templateTable_x86.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -349,53 +349,60 @@ void TemplateTable::fconst(int value) { transition(vtos, ftos); + if (UseSSE >= 1) { + static float one = 1.0f, two = 2.0f; + switch (value) { + case 0: + __ xorps(xmm0, xmm0); + break; + case 1: + __ movflt(xmm0, ExternalAddress((address) &one)); + break; + case 2: + __ movflt(xmm0, ExternalAddress((address) &two)); + break; + default: + ShouldNotReachHere(); + break; + } + } else { #ifdef _LP64 - static float one = 1.0f, two = 2.0f; - switch (value) { - case 0: - __ xorps(xmm0, xmm0); - break; - case 1: - __ movflt(xmm0, ExternalAddress((address) &one)); - break; - case 2: - __ movflt(xmm0, ExternalAddress((address) &two)); - break; - default: ShouldNotReachHere(); - break; +#else + if (value == 0) { __ fldz(); + } else if (value == 1) { __ fld1(); + } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here + } else { ShouldNotReachHere(); + } +#endif // _LP64 } -#else - if (value == 0) { __ fldz(); - } else if (value == 1) { __ fld1(); - } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here - } else { ShouldNotReachHere(); - } -#endif } void TemplateTable::dconst(int value) { transition(vtos, dtos); + if (UseSSE >= 2) { + static double one = 1.0; + switch (value) { + case 0: + __ xorpd(xmm0, xmm0); + break; + case 1: + __ movdbl(xmm0, ExternalAddress((address) &one)); + break; + default: + ShouldNotReachHere(); + break; + } + } else { #ifdef _LP64 - static double one = 1.0; - switch (value) { - case 0: - __ xorpd(xmm0, xmm0); - break; - case 1: - __ movdbl(xmm0, ExternalAddress((address) &one)); - break; - default: ShouldNotReachHere(); - break; +#else + if (value == 0) { __ fldz(); + } else if (value == 1) { __ fld1(); + } else { ShouldNotReachHere(); + } +#endif } - -#else - if (value == 0) { __ fldz(); - } else if (value == 1) { __ fld1(); - } else { ShouldNotReachHere(); - } -#endif } void TemplateTable::bipush() { @@ -454,8 +461,7 @@ __ jccb(Assembler::notEqual, notFloat); // ftos - LP64_ONLY(__ movflt(xmm0, Address(rcx, rbx, Address::times_8, base_offset))); - NOT_LP64(__ fld_s( Address(rcx, rbx, Address::times_ptr, base_offset))); + __ load_float(Address(rcx, rbx, Address::times_ptr, base_offset)); __ push(ftos); __ jmp(Done); @@ -522,8 +528,7 @@ __ jccb(Assembler::notEqual, Long); // dtos - LP64_ONLY(__ movdbl(xmm0, Address(rcx, rbx, Address::times_8, base_offset))); - NOT_LP64(__ fld_d( Address(rcx, rbx, Address::times_ptr, base_offset))); + __ load_double(Address(rcx, rbx, Address::times_ptr, base_offset)); __ push(dtos); __ jmpb(Done); @@ -617,15 +622,13 @@ void TemplateTable::fload() { transition(vtos, ftos); locals_index(rbx); - LP64_ONLY(__ movflt(xmm0, faddress(rbx))); - NOT_LP64(__ fld_s(faddress(rbx))); + __ load_float(faddress(rbx)); } void TemplateTable::dload() { transition(vtos, dtos); locals_index(rbx); - LP64_ONLY(__ movdbl(xmm0, daddress(rbx))); - NOT_LP64(__ fld_d(daddress(rbx))); + __ load_double(daddress(rbx)); } void TemplateTable::aload() { @@ -657,15 +660,13 @@ void TemplateTable::wide_fload() { transition(vtos, ftos); locals_index_wide(rbx); - LP64_ONLY(__ movflt(xmm0, faddress(rbx))); - NOT_LP64(__ fld_s(faddress(rbx))); + __ load_float(faddress(rbx)); } void TemplateTable::wide_dload() { transition(vtos, dtos); locals_index_wide(rbx); - LP64_ONLY(__ movdbl(xmm0, daddress(rbx))); - NOT_LP64(__ fld_d(daddress(rbx))); + __ load_double(daddress(rbx)); } void TemplateTable::wide_aload() { @@ -726,10 +727,9 @@ // rax: index // rdx: array index_check(rdx, rax); // kills rbx - LP64_ONLY(__ movflt(xmm0, Address(rdx, rax, - Address::times_4, - arrayOopDesc::base_offset_in_bytes(T_FLOAT)))); - NOT_LP64(__ fld_s(Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)))); + __ load_float(Address(rdx, rax, + Address::times_4, + arrayOopDesc::base_offset_in_bytes(T_FLOAT))); } void TemplateTable::daload() { @@ -737,10 +737,9 @@ // rax: index // rdx: array index_check(rdx, rax); // kills rbx - LP64_ONLY(__ movdbl(xmm0, Address(rdx, rax, - Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_DOUBLE)))); - NOT_LP64(__ fld_d(Address(rdx, rax, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)))); + __ load_double(Address(rdx, rax, + Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_DOUBLE))); } void TemplateTable::aaload() { @@ -807,14 +806,12 @@ void TemplateTable::fload(int n) { transition(vtos, ftos); - LP64_ONLY(__ movflt(xmm0, faddress(n))); - NOT_LP64(__ fld_s(faddress(n))); + __ load_float(faddress(n)); } void TemplateTable::dload(int n) { transition(vtos, dtos); - LP64_ONLY(__ movdbl(xmm0, daddress(n))); - NOT_LP64(__ fld_d(daddress(n))); + __ load_double(daddress(n)); } void TemplateTable::aload(int n) { @@ -919,15 +916,13 @@ void TemplateTable::fstore() { transition(ftos, vtos); locals_index(rbx); - LP64_ONLY(__ movflt(faddress(rbx), xmm0)); - NOT_LP64(__ fstp_s(faddress(rbx))); + __ store_float(faddress(rbx)); } void TemplateTable::dstore() { transition(dtos, vtos); locals_index(rbx); - LP64_ONLY(__ movdbl(daddress(rbx), xmm0)); - NOT_LP64(__ fstp_d(daddress(rbx))); + __ store_double(daddress(rbx)); } void TemplateTable::astore() { @@ -956,7 +951,7 @@ void TemplateTable::wide_fstore() { #ifdef _LP64 transition(vtos, vtos); - __ pop_f(); + __ pop_f(xmm0); locals_index_wide(rbx); __ movflt(faddress(rbx), xmm0); #else @@ -967,7 +962,7 @@ void TemplateTable::wide_dstore() { #ifdef _LP64 transition(vtos, vtos); - __ pop_d(); + __ pop_d(xmm0); locals_index_wide(rbx); __ movdbl(daddress(rbx), xmm0); #else @@ -1011,29 +1006,21 @@ void TemplateTable::fastore() { transition(ftos, vtos); __ pop_i(rbx); - // xmm0: value + // value is in UseSSE >= 1 ? xmm0 : ST(0) // rbx: index // rdx: array index_check(rdx, rbx); // prefer index in rbx - LP64_ONLY(__ movflt(Address(rdx, rbx, - Address::times_4, - arrayOopDesc::base_offset_in_bytes(T_FLOAT)), - xmm0)); - NOT_LP64(__ fstp_s(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)))); + __ store_float(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT))); } void TemplateTable::dastore() { transition(dtos, vtos); __ pop_i(rbx); - // xmm0: value + // value is in UseSSE >= 2 ? xmm0 : ST(0) // rbx: index // rdx: array index_check(rdx, rbx); // prefer index in rbx - LP64_ONLY(__ movdbl(Address(rdx, rbx, - Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), - xmm0)); - NOT_LP64(__ fstp_d(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)))); + __ store_double(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))); } void TemplateTable::aastore() { @@ -1134,14 +1121,12 @@ void TemplateTable::fstore(int n) { transition(ftos, vtos); - LP64_ONLY(__ movflt(faddress(n), xmm0)); - NOT_LP64(__ fstp_s(faddress(n))); + __ store_float(faddress(n)); } void TemplateTable::dstore(int n) { transition(dtos, vtos); - LP64_ONLY(__ movdbl(daddress(n), xmm0)); - NOT_LP64(__ fstp_d(daddress(n))); + __ store_double(daddress(n)); } @@ -1425,82 +1410,127 @@ void TemplateTable::fop2(Operation op) { transition(ftos, ftos); + + if (UseSSE >= 1) { + switch (op) { + case add: + __ addss(xmm0, at_rsp()); + __ addptr(rsp, Interpreter::stackElementSize); + break; + case sub: + __ movflt(xmm1, xmm0); + __ pop_f(xmm0); + __ subss(xmm0, xmm1); + break; + case mul: + __ mulss(xmm0, at_rsp()); + __ addptr(rsp, Interpreter::stackElementSize); + break; + case div: + __ movflt(xmm1, xmm0); + __ pop_f(xmm0); + __ divss(xmm0, xmm1); + break; + case rem: + // On x86_64 platforms the SharedRuntime::frem method is called to perform the + // modulo operation. The frem method calls the function + // double fmod(double x, double y) in math.h. The documentation of fmod states: + // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN + // (signalling or quiet) is returned. + // + // On x86_32 platforms the FPU is used to perform the modulo operation. The + // reason is that on 32-bit Windows the sign of modulo operations diverges from + // what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f). + // The fprem instruction used on x86_32 is functionally equivalent to + // SharedRuntime::frem in that it returns a NaN. #ifdef _LP64 - switch (op) { - case add: - __ addss(xmm0, at_rsp()); - __ addptr(rsp, Interpreter::stackElementSize); - break; - case sub: - __ movflt(xmm1, xmm0); - __ pop_f(xmm0); - __ subss(xmm0, xmm1); - break; - case mul: - __ mulss(xmm0, at_rsp()); - __ addptr(rsp, Interpreter::stackElementSize); - break; - case div: - __ movflt(xmm1, xmm0); - __ pop_f(xmm0); - __ divss(xmm0, xmm1); - break; - case rem: - __ movflt(xmm1, xmm0); - __ pop_f(xmm0); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); - break; - default: + __ movflt(xmm1, xmm0); + __ pop_f(xmm0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); +#else + __ push_f(xmm0); + __ pop_f(); + __ fld_s(at_rsp()); + __ fremr(rax); + __ f2ieee(); + __ pop(rax); // pop second operand off the stack + __ push_f(); + __ pop_f(xmm0); +#endif + break; + default: + ShouldNotReachHere(); + break; + } + } else { +#ifdef _LP64 ShouldNotReachHere(); - break; - } #else - switch (op) { + switch (op) { case add: __ fadd_s (at_rsp()); break; case sub: __ fsubr_s(at_rsp()); break; case mul: __ fmul_s (at_rsp()); break; case div: __ fdivr_s(at_rsp()); break; case rem: __ fld_s (at_rsp()); __ fremr(rax); break; default : ShouldNotReachHere(); + } + __ f2ieee(); + __ pop(rax); // pop second operand off the stack +#endif // _LP64 } - __ f2ieee(); - __ pop(rax); // pop float thing off -#endif } void TemplateTable::dop2(Operation op) { transition(dtos, dtos); + if (UseSSE >= 2) { + switch (op) { + case add: + __ addsd(xmm0, at_rsp()); + __ addptr(rsp, 2 * Interpreter::stackElementSize); + break; + case sub: + __ movdbl(xmm1, xmm0); + __ pop_d(xmm0); + __ subsd(xmm0, xmm1); + break; + case mul: + __ mulsd(xmm0, at_rsp()); + __ addptr(rsp, 2 * Interpreter::stackElementSize); + break; + case div: + __ movdbl(xmm1, xmm0); + __ pop_d(xmm0); + __ divsd(xmm0, xmm1); + break; + case rem: + // Similar to fop2(), the modulo operation is performed using the + // SharedRuntime::drem method (on x86_64 platforms) or using the + // FPU (on x86_32 platforms) for the same reasons as mentioned in fop2(). #ifdef _LP64 - switch (op) { - case add: - __ addsd(xmm0, at_rsp()); - __ addptr(rsp, 2 * Interpreter::stackElementSize); - break; - case sub: - __ movdbl(xmm1, xmm0); - __ pop_d(xmm0); - __ subsd(xmm0, xmm1); - break; - case mul: - __ mulsd(xmm0, at_rsp()); - __ addptr(rsp, 2 * Interpreter::stackElementSize); - break; - case div: - __ movdbl(xmm1, xmm0); - __ pop_d(xmm0); - __ divsd(xmm0, xmm1); - break; - case rem: - __ movdbl(xmm1, xmm0); - __ pop_d(xmm0); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); - break; - default: + __ movdbl(xmm1, xmm0); + __ pop_d(xmm0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); +#else + __ push_d(xmm0); + __ pop_d(); + __ fld_d(at_rsp()); + __ fremr(rax); + __ d2ieee(); + __ pop(rax); + __ pop(rdx); + __ push_d(); + __ pop_d(xmm0); +#endif + break; + default: + ShouldNotReachHere(); + break; + } + } else { +#ifdef _LP64 ShouldNotReachHere(); - break; - } #else - switch (op) { + switch (op) { case add: __ fadd_d (at_rsp()); break; case sub: __ fsubr_d(at_rsp()); break; case mul: { @@ -1543,12 +1573,13 @@ } case rem: __ fld_d (at_rsp()); __ fremr(rax); break; default : ShouldNotReachHere(); + } + __ d2ieee(); + // Pop double precision number from rsp. + __ pop(rax); + __ pop(rdx); +#endif } - __ d2ieee(); - // Pop double precision number from rsp. - __ pop(rax); - __ pop(rdx); -#endif } void TemplateTable::ineg() { @@ -1562,7 +1593,6 @@ NOT_LP64(__ lneg(rdx, rax)); } -#ifdef _LP64 // Note: 'double' and 'long long' have 32-bits alignment on x86. static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { // Use the expression (adr)&(~0xF) to provide 128-bits aligned address @@ -1577,26 +1607,30 @@ // Buffer for 128-bits masks used by SSE instructions. static jlong float_signflip_pool[2*2]; static jlong double_signflip_pool[2*2]; -#endif void TemplateTable::fneg() { transition(ftos, ftos); -#ifdef _LP64 - static jlong *float_signflip = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000); - __ xorps(xmm0, ExternalAddress((address) float_signflip)); -#else - __ fchs(); -#endif + if (UseSSE >= 1) { + static jlong *float_signflip = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000); + __ xorps(xmm0, ExternalAddress((address) float_signflip)); + } else { + LP64_ONLY(ShouldNotReachHere()); + NOT_LP64(__ fchs()); + } } void TemplateTable::dneg() { transition(dtos, dtos); + if (UseSSE >= 2) { + static jlong *double_signflip = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000); + __ xorpd(xmm0, ExternalAddress((address) double_signflip)); + } else { #ifdef _LP64 - static jlong *double_signflip = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000); - __ xorpd(xmm0, ExternalAddress((address) double_signflip)); + ShouldNotReachHere(); #else - __ fchs(); + __ fchs(); #endif + } } void TemplateTable::iinc() { @@ -1798,18 +1832,26 @@ __ extend_sign(rdx, rax); break; case Bytecodes::_i2f: - __ push(rax); // store int on tos - __ fild_s(at_rsp()); // load int to ST0 - __ f2ieee(); // truncate to float size - __ pop(rcx); // adjust rsp + if (UseSSE >= 1) { + __ cvtsi2ssl(xmm0, rax); + } else { + __ push(rax); // store int on tos + __ fild_s(at_rsp()); // load int to ST0 + __ f2ieee(); // truncate to float size + __ pop(rcx); // adjust rsp + } break; case Bytecodes::_i2d: + if (UseSSE >= 2) { + __ cvtsi2sdl(xmm0, rax); + } else { __ push(rax); // add one slot for d2ieee() __ push(rax); // store int on tos __ fild_s(at_rsp()); // load int to ST0 __ d2ieee(); // truncate to double size __ pop(rcx); // adjust rsp __ pop(rcx); + } break; case Bytecodes::_i2b: __ shll(rax, 24); // truncate upper 24 bits @@ -1829,50 +1871,102 @@ /* nothing to do */ break; case Bytecodes::_l2f: + // On 64-bit platforms, the cvtsi2ssq instruction is used to convert + // 64-bit long values to floats. On 32-bit platforms it is not possible + // to use that instruction with 64-bit operands, therefore the FPU is + // used to perform the conversion. __ push(rdx); // store long on tos __ push(rax); __ fild_d(at_rsp()); // load long to ST0 __ f2ieee(); // truncate to float size __ pop(rcx); // adjust rsp __ pop(rcx); + if (UseSSE >= 1) { + __ push_f(); + __ pop_f(xmm0); + } break; case Bytecodes::_l2d: + // On 32-bit platforms the FPU is used for conversion because on + // 32-bit platforms it is not not possible to use the cvtsi2sdq + // instruction with 64-bit operands. __ push(rdx); // store long on tos __ push(rax); __ fild_d(at_rsp()); // load long to ST0 __ d2ieee(); // truncate to double size __ pop(rcx); // adjust rsp __ pop(rcx); + if (UseSSE >= 2) { + __ push_d(); + __ pop_d(xmm0); + } break; case Bytecodes::_f2i: - __ push(rcx); // reserve space for argument - __ fstp_s(at_rsp()); // pass float argument on stack + // SharedRuntime::f2i does not differentiate between sNaNs and qNaNs + // as it returns 0 for any NaN. + if (UseSSE >= 1) { + __ push_f(xmm0); + } else { + __ push(rcx); // reserve space for argument + __ fstp_s(at_rsp()); // pass float argument on stack + } __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); break; case Bytecodes::_f2l: - __ push(rcx); // reserve space for argument - __ fstp_s(at_rsp()); // pass float argument on stack + // SharedRuntime::f2l does not differentiate between sNaNs and qNaNs + // as it returns 0 for any NaN. + if (UseSSE >= 1) { + __ push_f(xmm0); + } else { + __ push(rcx); // reserve space for argument + __ fstp_s(at_rsp()); // pass float argument on stack + } __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); break; case Bytecodes::_f2d: - /* nothing to do */ + if (UseSSE < 1) { + /* nothing to do */ + } else if (UseSSE == 1) { + __ push_f(xmm0); + __ pop_f(); + } else { // UseSSE >= 2 + __ cvtss2sd(xmm0, xmm0); + } break; case Bytecodes::_d2i: - __ push(rcx); // reserve space for argument - __ push(rcx); - __ fstp_d(at_rsp()); // pass double argument on stack + if (UseSSE >= 2) { + __ push_d(xmm0); + } else { + __ push(rcx); // reserve space for argument + __ push(rcx); + __ fstp_d(at_rsp()); // pass double argument on stack + } __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2); break; case Bytecodes::_d2l: - __ push(rcx); // reserve space for argument - __ push(rcx); - __ fstp_d(at_rsp()); // pass double argument on stack + if (UseSSE >= 2) { + __ push_d(xmm0); + } else { + __ push(rcx); // reserve space for argument + __ push(rcx); + __ fstp_d(at_rsp()); // pass double argument on stack + } __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2); break; case Bytecodes::_d2f: - __ push(rcx); // reserve space for f2ieee() - __ f2ieee(); // truncate to float size - __ pop(rcx); // adjust rsp + if (UseSSE <= 1) { + __ push(rcx); // reserve space for f2ieee() + __ f2ieee(); // truncate to float size + __ pop(rcx); // adjust rsp + if (UseSSE == 1) { + // The cvtsd2ss instruction is not available if UseSSE==1, therefore + // the conversion is performed using the FPU in this case. + __ push_f(); + __ pop_f(xmm0); + } + } else { // UseSSE >= 2 + __ cvtsd2ss(xmm0, xmm0); + } break; default : ShouldNotReachHere(); @@ -1901,42 +1995,47 @@ } void TemplateTable::float_cmp(bool is_float, int unordered_result) { -#ifdef _LP64 - Label done; - if (is_float) { - // XXX get rid of pop here, use ... reg, mem32 - __ pop_f(xmm1); - __ ucomiss(xmm1, xmm0); - } else { - // XXX get rid of pop here, use ... reg, mem64 - __ pop_d(xmm1); - __ ucomisd(xmm1, xmm0); - } - if (unordered_result < 0) { - __ movl(rax, -1); - __ jccb(Assembler::parity, done); - __ jccb(Assembler::below, done); - __ setb(Assembler::notEqual, rdx); - __ movzbl(rax, rdx); + if ((is_float && UseSSE >= 1) || + (!is_float && UseSSE >= 2)) { + Label done; + if (is_float) { + // XXX get rid of pop here, use ... reg, mem32 + __ pop_f(xmm1); + __ ucomiss(xmm1, xmm0); + } else { + // XXX get rid of pop here, use ... reg, mem64 + __ pop_d(xmm1); + __ ucomisd(xmm1, xmm0); + } + if (unordered_result < 0) { + __ movl(rax, -1); + __ jccb(Assembler::parity, done); + __ jccb(Assembler::below, done); + __ setb(Assembler::notEqual, rdx); + __ movzbl(rax, rdx); + } else { + __ movl(rax, 1); + __ jccb(Assembler::parity, done); + __ jccb(Assembler::above, done); + __ movl(rax, 0); + __ jccb(Assembler::equal, done); + __ decrementl(rax); + } + __ bind(done); } else { - __ movl(rax, 1); - __ jccb(Assembler::parity, done); - __ jccb(Assembler::above, done); - __ movl(rax, 0); - __ jccb(Assembler::equal, done); - __ decrementl(rax); - } - __ bind(done); +#ifdef _LP64 + ShouldNotReachHere(); #else - if (is_float) { - __ fld_s(at_rsp()); - } else { - __ fld_d(at_rsp()); - __ pop(rdx); + if (is_float) { + __ fld_s(at_rsp()); + } else { + __ fld_d(at_rsp()); + __ pop(rdx); + } + __ pop(rcx); + __ fcmp2int(rax, unordered_result < 0); +#endif // _LP64 } - __ pop(rcx); - __ fcmp2int(rax, unordered_result < 0); -#endif } void TemplateTable::branch(bool is_jsr, bool is_wide) { @@ -2014,6 +2113,7 @@ __ pop(rcx); __ pop(rdx); __ movptr(rax, Address(rcx, Method::method_counters_offset())); + __ testptr(rax, rax); __ jcc(Assembler::zero, dispatch); __ bind(has_counters); @@ -2747,8 +2847,7 @@ __ jcc(Assembler::notEqual, notFloat); // ftos - LP64_ONLY(__ movflt(xmm0, field)); - NOT_LP64(__ fld_s(field)); + __ load_float(field); __ push(ftos); // Rewrite bytecode to be faster if (!is_static && rc == may_rewrite) { @@ -2762,8 +2861,7 @@ __ jcc(Assembler::notEqual, notDouble); #endif // dtos - LP64_ONLY(__ movdbl(xmm0, field)); - NOT_LP64(__ fld_d(field)); + __ load_double(field); __ push(dtos); // Rewrite bytecode to be faster if (!is_static && rc == may_rewrite) { @@ -3045,8 +3143,7 @@ { __ pop(ftos); if (!is_static) pop_and_check_object(obj); - NOT_LP64( __ fstp_s(field);) - LP64_ONLY( __ movflt(field, xmm0);) + __ store_float(field); if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no); } @@ -3063,8 +3160,7 @@ { __ pop(dtos); if (!is_static) pop_and_check_object(obj); - NOT_LP64( __ fstp_d(field);) - LP64_ONLY( __ movdbl(field, xmm0);) + __ store_double(field); if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no); } @@ -3122,8 +3218,8 @@ case Bytecodes::_fast_sputfield: // fall through case Bytecodes::_fast_cputfield: // fall through case Bytecodes::_fast_iputfield: __ push_i(rax); break; - case Bytecodes::_fast_dputfield: __ push_d(); break; - case Bytecodes::_fast_fputfield: __ push_f(); break; + case Bytecodes::_fast_dputfield: __ push(dtos); break; + case Bytecodes::_fast_fputfield: __ push(ftos); break; case Bytecodes::_fast_lputfield: __ push_l(rax); break; default: @@ -3146,8 +3242,8 @@ case Bytecodes::_fast_sputfield: // fall through case Bytecodes::_fast_cputfield: // fall through case Bytecodes::_fast_iputfield: __ pop_i(rax); break; - case Bytecodes::_fast_dputfield: __ pop_d(); break; - case Bytecodes::_fast_fputfield: __ pop_f(); break; + case Bytecodes::_fast_dputfield: __ pop(dtos); break; + case Bytecodes::_fast_fputfield: __ pop(ftos); break; case Bytecodes::_fast_lputfield: __ pop_l(rax); break; } __ bind(L2); @@ -3211,12 +3307,10 @@ __ movw(field, rax); break; case Bytecodes::_fast_fputfield: - NOT_LP64( __ fstp_s(field); ) - LP64_ONLY( __ movflt(field, xmm0);) + __ store_float(field); break; case Bytecodes::_fast_dputfield: - NOT_LP64( __ fstp_d(field); ) - LP64_ONLY( __ movdbl(field, xmm0);) + __ store_double(field); break; default: ShouldNotReachHere(); @@ -3301,12 +3395,10 @@ __ load_unsigned_short(rax, field); break; case Bytecodes::_fast_fgetfield: - LP64_ONLY(__ movflt(xmm0, field)); - NOT_LP64(__ fld_s(field)); + __ load_float(field); break; case Bytecodes::_fast_dgetfield: - LP64_ONLY(__ movdbl(xmm0, field)); - NOT_LP64(__ fld_d(field)); + __ load_double(field); break; default: ShouldNotReachHere(); @@ -3346,8 +3438,7 @@ __ verify_oop(rax); break; case ftos: - LP64_ONLY(__ movflt(xmm0, field)); - NOT_LP64(__ fld_s(field)); + __ load_float(field); break; default: ShouldNotReachHere(); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/os/aix/vm/perfMemory_aix.cpp --- a/hotspot/src/os/aix/vm/perfMemory_aix.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/os/aix/vm/perfMemory_aix.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright 2012, 2013 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -454,13 +454,27 @@ *saved_cwd_fd = result; } - // Set the current directory to dirname by using the fd of the directory. + // Set the current directory to dirname by using the fd of the directory and + // handle errors, otherwise shared memory files will be created in cwd. result = fchdir(fd); - - return dirp; + if (result == OS_ERR) { + if (PrintMiscellaneous && Verbose) { + warning("could not change to directory %s", dirname); + } + if (*saved_cwd_fd != -1) { + ::close(*saved_cwd_fd); + *saved_cwd_fd = -1; + } + // Close the directory. + os::closedir(dirp); + return NULL; + } else { + return dirp; + } } // Close the directory and restore the current working directory. +// static void close_directory_secure_cwd(DIR* dirp, int saved_cwd_fd) { int result; diff -r eb1661ea942c -r 6675700073c1 hotspot/src/os/bsd/vm/perfMemory_bsd.cpp --- a/hotspot/src/os/bsd/vm/perfMemory_bsd.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/os/bsd/vm/perfMemory_bsd.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -375,10 +375,23 @@ *saved_cwd_fd = result; } - // Set the current directory to dirname by using the fd of the directory. + // Set the current directory to dirname by using the fd of the directory and + // handle errors, otherwise shared memory files will be created in cwd. result = fchdir(fd); - - return dirp; + if (result == OS_ERR) { + if (PrintMiscellaneous && Verbose) { + warning("could not change to directory %s", dirname); + } + if (*saved_cwd_fd != -1) { + ::close(*saved_cwd_fd); + *saved_cwd_fd = -1; + } + // Close the directory. + os::closedir(dirp); + return NULL; + } else { + return dirp; + } } // Close the directory and restore the current working directory. diff -r eb1661ea942c -r 6675700073c1 hotspot/src/os/linux/vm/os_linux.cpp --- a/hotspot/src/os/linux/vm/os_linux.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/os/linux/vm/os_linux.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -5785,9 +5785,11 @@ status = pthread_mutex_unlock(_mutex); assert(status == 0, "invariant"); } else { + // must capture correct index before unlocking + int index = _cur_index; status = pthread_mutex_unlock(_mutex); assert(status == 0, "invariant"); - status = pthread_cond_signal(&_cond[_cur_index]); + status = pthread_cond_signal(&_cond[index]); assert(status == 0, "invariant"); } } else { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/os/linux/vm/perfMemory_linux.cpp --- a/hotspot/src/os/linux/vm/perfMemory_linux.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/os/linux/vm/perfMemory_linux.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -374,10 +374,23 @@ *saved_cwd_fd = result; } - // Set the current directory to dirname by using the fd of the directory. + // Set the current directory to dirname by using the fd of the directory and + // handle errors, otherwise shared memory files will be created in cwd. result = fchdir(fd); - - return dirp; + if (result == OS_ERR) { + if (PrintMiscellaneous && Verbose) { + warning("could not change to directory %s", dirname); + } + if (*saved_cwd_fd != -1) { + ::close(*saved_cwd_fd); + *saved_cwd_fd = -1; + } + // Close the directory. + os::closedir(dirp); + return NULL; + } else { + return dirp; + } } // Close the directory and restore the current working directory. diff -r eb1661ea942c -r 6675700073c1 hotspot/src/os/solaris/vm/perfMemory_solaris.cpp --- a/hotspot/src/os/solaris/vm/perfMemory_solaris.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/os/solaris/vm/perfMemory_solaris.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -377,10 +377,23 @@ *saved_cwd_fd = result; } - // Set the current directory to dirname by using the fd of the directory. + // Set the current directory to dirname by using the fd of the directory and + // handle errors, otherwise shared memory files will be created in cwd. result = fchdir(fd); - - return dirp; + if (result == OS_ERR) { + if (PrintMiscellaneous && Verbose) { + warning("could not change to directory %s", dirname); + } + if (*saved_cwd_fd != -1) { + ::close(*saved_cwd_fd); + *saved_cwd_fd = -1; + } + // Close the directory. + os::closedir(dirp); + return NULL; + } else { + return dirp; + } } // Close the directory and restore the current working directory. diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/classfile/systemDictionary.cpp --- a/hotspot/src/share/vm/classfile/systemDictionary.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/classfile/systemDictionary.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -2680,187 +2680,3 @@ #endif // INCLUDE_TRACE } -#ifndef PRODUCT - -// statistics code -class ClassStatistics: AllStatic { - private: - static int nclasses; // number of classes - static int nmethods; // number of methods - static int nmethoddata; // number of methodData - static int class_size; // size of class objects in words - static int method_size; // size of method objects in words - static int debug_size; // size of debug info in methods - static int methoddata_size; // size of methodData objects in words - - static void do_class(Klass* k) { - nclasses++; - class_size += k->size(); - if (k->oop_is_instance()) { - InstanceKlass* ik = (InstanceKlass*)k; - class_size += ik->methods()->size(); - class_size += ik->constants()->size(); - class_size += ik->local_interfaces()->size(); - class_size += ik->transitive_interfaces()->size(); - // We do not have to count implementors, since we only store one! - // SSS: How should these be accounted now that they have moved? - // class_size += ik->fields()->length(); - } - } - - static void do_method(Method* m) { - nmethods++; - method_size += m->size(); - // class loader uses same objArray for empty vectors, so don't count these - if (m->has_stackmap_table()) { - method_size += m->stackmap_data()->size(); - } - - MethodData* mdo = m->method_data(); - if (mdo != NULL) { - nmethoddata++; - methoddata_size += mdo->size(); - } - } - - public: - static void print() { - SystemDictionary::classes_do(do_class); - SystemDictionary::methods_do(do_method); - tty->print_cr("Class statistics:"); - tty->print_cr("%d classes (%d bytes)", nclasses, class_size * oopSize); - tty->print_cr("%d methods (%d bytes = %d base + %d debug info)", nmethods, - (method_size + debug_size) * oopSize, method_size * oopSize, debug_size * oopSize); - tty->print_cr("%d methoddata (%d bytes)", nmethoddata, methoddata_size * oopSize); - } -}; - - -int ClassStatistics::nclasses = 0; -int ClassStatistics::nmethods = 0; -int ClassStatistics::nmethoddata = 0; -int ClassStatistics::class_size = 0; -int ClassStatistics::method_size = 0; -int ClassStatistics::debug_size = 0; -int ClassStatistics::methoddata_size = 0; - -void SystemDictionary::print_class_statistics() { - ResourceMark rm; - ClassStatistics::print(); -} - - -class MethodStatistics: AllStatic { - public: - enum { - max_parameter_size = 10 - }; - private: - - static int _number_of_methods; - static int _number_of_final_methods; - static int _number_of_static_methods; - static int _number_of_native_methods; - static int _number_of_synchronized_methods; - static int _number_of_profiled_methods; - static int _number_of_bytecodes; - static int _parameter_size_profile[max_parameter_size]; - static int _bytecodes_profile[Bytecodes::number_of_java_codes]; - - static void initialize() { - _number_of_methods = 0; - _number_of_final_methods = 0; - _number_of_static_methods = 0; - _number_of_native_methods = 0; - _number_of_synchronized_methods = 0; - _number_of_profiled_methods = 0; - _number_of_bytecodes = 0; - for (int i = 0; i < max_parameter_size ; i++) _parameter_size_profile[i] = 0; - for (int j = 0; j < Bytecodes::number_of_java_codes; j++) _bytecodes_profile [j] = 0; - }; - - static void do_method(Method* m) { - _number_of_methods++; - // collect flag info - if (m->is_final() ) _number_of_final_methods++; - if (m->is_static() ) _number_of_static_methods++; - if (m->is_native() ) _number_of_native_methods++; - if (m->is_synchronized()) _number_of_synchronized_methods++; - if (m->method_data() != NULL) _number_of_profiled_methods++; - // collect parameter size info (add one for receiver, if any) - _parameter_size_profile[MIN2(m->size_of_parameters() + (m->is_static() ? 0 : 1), max_parameter_size - 1)]++; - // collect bytecodes info - { - Thread *thread = Thread::current(); - HandleMark hm(thread); - BytecodeStream s(methodHandle(thread, m)); - Bytecodes::Code c; - while ((c = s.next()) >= 0) { - _number_of_bytecodes++; - _bytecodes_profile[c]++; - } - } - } - - public: - static void print() { - initialize(); - SystemDictionary::methods_do(do_method); - // generate output - tty->cr(); - tty->print_cr("Method statistics (static):"); - // flag distribution - tty->cr(); - tty->print_cr("%6d final methods %6.1f%%", _number_of_final_methods , _number_of_final_methods * 100.0F / _number_of_methods); - tty->print_cr("%6d static methods %6.1f%%", _number_of_static_methods , _number_of_static_methods * 100.0F / _number_of_methods); - tty->print_cr("%6d native methods %6.1f%%", _number_of_native_methods , _number_of_native_methods * 100.0F / _number_of_methods); - tty->print_cr("%6d synchronized methods %6.1f%%", _number_of_synchronized_methods, _number_of_synchronized_methods * 100.0F / _number_of_methods); - tty->print_cr("%6d profiled methods %6.1f%%", _number_of_profiled_methods, _number_of_profiled_methods * 100.0F / _number_of_methods); - // parameter size profile - tty->cr(); - { int tot = 0; - int avg = 0; - for (int i = 0; i < max_parameter_size; i++) { - int n = _parameter_size_profile[i]; - tot += n; - avg += n*i; - tty->print_cr("parameter size = %1d: %6d methods %5.1f%%", i, n, n * 100.0F / _number_of_methods); - } - assert(tot == _number_of_methods, "should be the same"); - tty->print_cr(" %6d methods 100.0%%", _number_of_methods); - tty->print_cr("(average parameter size = %3.1f including receiver, if any)", (float)avg / _number_of_methods); - } - // bytecodes profile - tty->cr(); - { int tot = 0; - for (int i = 0; i < Bytecodes::number_of_java_codes; i++) { - if (Bytecodes::is_defined(i)) { - Bytecodes::Code c = Bytecodes::cast(i); - int n = _bytecodes_profile[c]; - tot += n; - tty->print_cr("%9d %7.3f%% %s", n, n * 100.0F / _number_of_bytecodes, Bytecodes::name(c)); - } - } - assert(tot == _number_of_bytecodes, "should be the same"); - tty->print_cr("%9d 100.000%%", _number_of_bytecodes); - } - tty->cr(); - } -}; - -int MethodStatistics::_number_of_methods; -int MethodStatistics::_number_of_final_methods; -int MethodStatistics::_number_of_static_methods; -int MethodStatistics::_number_of_native_methods; -int MethodStatistics::_number_of_synchronized_methods; -int MethodStatistics::_number_of_profiled_methods; -int MethodStatistics::_number_of_bytecodes; -int MethodStatistics::_parameter_size_profile[MethodStatistics::max_parameter_size]; -int MethodStatistics::_bytecodes_profile[Bytecodes::number_of_java_codes]; - - -void SystemDictionary::print_method_statistics() { - MethodStatistics::print(); -} - -#endif // PRODUCT diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/classfile/systemDictionary.hpp --- a/hotspot/src/share/vm/classfile/systemDictionary.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/classfile/systemDictionary.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -366,8 +366,6 @@ // Printing static void print(bool details = true); static void print_shared(bool details = true); - static void print_class_statistics() PRODUCT_RETURN; - static void print_method_statistics() PRODUCT_RETURN; // Number of contained klasses // This is both fully loaded classes and classes in the process diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/code/codeCache.cpp --- a/hotspot/src/share/vm/code/codeCache.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/code/codeCache.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -746,14 +746,17 @@ void CodeCache::gc_epilogue() { assert_locked_or_safepoint(CodeCache_lock); NMethodIterator iter; - while(iter.next_alive()) { + while(iter.next()) { nmethod* nm = iter.method(); - assert(!nm->is_unloaded(), "Tautology"); - if (needs_cache_clean()) { - nm->cleanup_inline_caches(); + if (!nm->is_zombie()) { + if (needs_cache_clean()) { + // Clean ICs of unloaded nmethods as well because they may reference other + // unloaded nmethods that may be flushed earlier in the sweeper cycle. + nm->cleanup_inline_caches(); + } + DEBUG_ONLY(nm->verify()); + DEBUG_ONLY(nm->verify_oop_relocations()); } - DEBUG_ONLY(nm->verify()); - DEBUG_ONLY(nm->verify_oop_relocations()); } set_needs_cache_clean(false); prune_scavenge_root_nmethods(); @@ -993,29 +996,6 @@ return number_of_marked_CodeBlobs; } -void CodeCache::make_marked_nmethods_zombies() { - assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint"); - NMethodIterator iter; - while(iter.next_alive()) { - nmethod* nm = iter.method(); - if (nm->is_marked_for_deoptimization()) { - - // If the nmethod has already been made non-entrant and it can be converted - // then zombie it now. Otherwise make it non-entrant and it will eventually - // be zombied when it is no longer seen on the stack. Note that the nmethod - // might be "entrant" and not on the stack and so could be zombied immediately - // but we can't tell because we don't track it on stack until it becomes - // non-entrant. - - if (nm->is_not_entrant() && nm->can_not_entrant_be_converted()) { - nm->make_zombie(); - } else { - nm->make_not_entrant(); - } - } - } -} - void CodeCache::make_marked_nmethods_not_entrant() { assert_locked_or_safepoint(CodeCache_lock); NMethodIterator iter; @@ -1072,7 +1052,7 @@ // Deoptimize all activations depending on marked nmethods Deoptimization::deoptimize_dependents(); - // Make the dependent methods not entrant (in VM_Deoptimize they are made zombies) + // Make the dependent methods not entrant make_marked_nmethods_not_entrant(); } } @@ -1102,7 +1082,7 @@ // Deoptimize all activations depending on marked nmethods Deoptimization::deoptimize_dependents(); - // Make the dependent methods not entrant (in VM_Deoptimize they are made zombies) + // Make the dependent methods not entrant make_marked_nmethods_not_entrant(); } } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/code/codeCache.hpp --- a/hotspot/src/share/vm/code/codeCache.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/code/codeCache.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -225,7 +225,6 @@ public: static void mark_all_nmethods_for_deoptimization(); static int mark_for_deoptimization(Method* dependee); - static void make_marked_nmethods_zombies(); static void make_marked_nmethods_not_entrant(); // Flushing and deoptimization diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/code/compiledIC.cpp --- a/hotspot/src/share/vm/code/compiledIC.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/code/compiledIC.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -343,8 +343,8 @@ // Kill any leftover stub we might have too clear_ic_stub(); if (is_optimized()) { - set_ic_destination(entry); - } else { + set_ic_destination(entry); + } else { set_ic_destination_and_value(entry, (void*)NULL); } } else { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/code/compiledIC.hpp --- a/hotspot/src/share/vm/code/compiledIC.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/code/compiledIC.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -214,7 +214,7 @@ // // They all takes a TRAP argument, since they can cause a GC if the inline-cache buffer is full. // - void set_to_clean(); // Can only be called during a safepoint operation + void set_to_clean(); void set_to_monomorphic(CompiledICInfo& info); void clear_ic_stub(); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/code/nmethod.cpp --- a/hotspot/src/share/vm/code/nmethod.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/code/nmethod.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1021,7 +1021,6 @@ void nmethod::cleanup_inline_caches() { - assert_locked_or_safepoint(CompiledIC_lock); // If the method is not entrant or zombie then a JMP is plastered over the @@ -1037,7 +1036,8 @@ // In fact, why are we bothering to look at oops in a non-entrant method?? } - // Find all calls in an nmethod, and clear the ones that points to zombie methods + // Find all calls in an nmethod and clear the ones that point to non-entrant, + // zombie and unloaded nmethods. ResourceMark rm; RelocIterator iter(this, low_boundary); while(iter.next()) { @@ -1049,7 +1049,7 @@ CodeBlob *cb = CodeCache::find_blob_unsafe(ic->ic_destination()); if( cb != NULL && cb->is_nmethod() ) { nmethod* nm = (nmethod*)cb; - // Clean inline caches pointing to both zombie and not_entrant methods + // Clean inline caches pointing to zombie, non-entrant and unloaded methods if (!nm->is_in_use() || (nm->method()->code() != nm)) ic->set_to_clean(); } break; @@ -1059,7 +1059,7 @@ CodeBlob *cb = CodeCache::find_blob_unsafe(csc->destination()); if( cb != NULL && cb->is_nmethod() ) { nmethod* nm = (nmethod*)cb; - // Clean inline caches pointing to both zombie and not_entrant methods + // Clean inline caches pointing to zombie, non-entrant and unloaded methods if (!nm->is_in_use() || (nm->method()->code() != nm)) csc->set_to_clean(); } break; @@ -2529,7 +2529,7 @@ // Hmm. OSR methods can be deopted but not marked as zombie or not_entrant // seems odd. - if( is_zombie() || is_not_entrant() ) + if (is_zombie() || is_not_entrant() || is_unloaded()) return; // Make sure all the entry points are correctly aligned for patching. diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/compiler/compileBroker.cpp --- a/hotspot/src/share/vm/compiler/compileBroker.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/compiler/compileBroker.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1399,6 +1399,28 @@ // do the compilation if (method->is_native()) { if (!PreferInterpreterNativeStubs || method->is_method_handle_intrinsic()) { + // The following native methods: + // + // java.lang.Float.intBitsToFloat + // java.lang.Float.floatToRawIntBits + // java.lang.Double.longBitsToDouble + // java.lang.Double.doubleToRawLongBits + // + // are called through the interpreter even if interpreter native stubs + // are not preferred (i.e., calling through adapter handlers is preferred). + // The reason is that on x86_32 signaling NaNs (sNaNs) are not preserved + // if the version of the methods from the native libraries is called. + // As the interpreter and the C2-intrinsified version of the methods preserves + // sNaNs, that would result in an inconsistent way of handling of sNaNs. + if ((UseSSE >= 1 && + (method->intrinsic_id() == vmIntrinsics::_intBitsToFloat || + method->intrinsic_id() == vmIntrinsics::_floatToRawIntBits)) || + (UseSSE >= 2 && + (method->intrinsic_id() == vmIntrinsics::_longBitsToDouble || + method->intrinsic_id() == vmIntrinsics::_doubleToRawLongBits))) { + return NULL; + } + // To properly handle the appendix argument for out-of-line calls we are using a small trampoline that // pops off the appendix argument and jumps to the target (see gen_special_dispatch in SharedRuntime). // diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp --- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -620,7 +620,7 @@ // Support for parallelizing survivor space rescan if ((CMSParallelRemarkEnabled && CMSParallelSurvivorRemarkEnabled) || CMSParallelInitialMarkEnabled) { const size_t max_plab_samples = - _young_gen->max_survivor_size() / (ThreadLocalAllocBuffer::min_size() * HeapWordSize); + _young_gen->max_survivor_size() / (PLAB::min_size() * HeapWordSize); _survivor_plab_array = NEW_C_HEAP_ARRAY(ChunkArray, ParallelGCThreads, mtGC); _survivor_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, max_plab_samples, mtGC); @@ -3005,7 +3005,7 @@ COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;) if (CMSParallelInitialMarkEnabled) { // The parallel version. - FlexibleWorkGang* workers = gch->workers(); + WorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); uint n_workers = workers->active_workers(); @@ -4488,7 +4488,7 @@ // workers to be taken from the active workers in the work gang. CMSParRemarkTask(CMSCollector* collector, CompactibleFreeListSpace* cms_space, - uint n_workers, FlexibleWorkGang* workers, + uint n_workers, WorkGang* workers, OopTaskQueueSet* task_queues, StrongRootsScope* strong_roots_scope): CMSParMarkTask("Rescan roots and grey objects in parallel", @@ -5061,7 +5061,7 @@ // Parallel version of remark void CMSCollector::do_remark_parallel() { GenCollectedHeap* gch = GenCollectedHeap::heap(); - FlexibleWorkGang* workers = gch->workers(); + WorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); // Choose to use the number of GC workers most recently set // into "active_workers". @@ -5236,6 +5236,16 @@ //////////////////////////////////////////////////////// // Parallel Reference Processing Task Proxy Class //////////////////////////////////////////////////////// +class AbstractGangTaskWOopQueues : public AbstractGangTask { + OopTaskQueueSet* _queues; + ParallelTaskTerminator _terminator; + public: + AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues, uint n_threads) : + AbstractGangTask(name), _queues(queues), _terminator(n_threads, _queues) {} + ParallelTaskTerminator* terminator() { return &_terminator; } + OopTaskQueueSet* queues() { return _queues; } +}; + class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues { typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; CMSCollector* _collector; @@ -5372,7 +5382,7 @@ void CMSRefProcTaskExecutor::execute(ProcessTask& task) { GenCollectedHeap* gch = GenCollectedHeap::heap(); - FlexibleWorkGang* workers = gch->workers(); + WorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); CMSRefProcTaskProxy rp_task(task, &_collector, _collector.ref_processor()->span(), @@ -5385,7 +5395,7 @@ { GenCollectedHeap* gch = GenCollectedHeap::heap(); - FlexibleWorkGang* workers = gch->workers(); + WorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); CMSRefEnqueueTaskProxy enq_task(task); workers->run_task(&enq_task); @@ -5419,7 +5429,7 @@ // balance_all_queues() and balance_queues()). GenCollectedHeap* gch = GenCollectedHeap::heap(); uint active_workers = ParallelGCThreads; - FlexibleWorkGang* workers = gch->workers(); + WorkGang* workers = gch->workers(); if (workers != NULL) { active_workers = workers->active_workers(); // The expectation is that active_workers will have already diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/cms/parNewGeneration.cpp --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -25,7 +25,7 @@ #include "precompiled.hpp" #include "gc/cms/compactibleFreeListSpace.hpp" #include "gc/cms/concurrentMarkSweepGeneration.hpp" -#include "gc/cms/parNewGeneration.hpp" +#include "gc/cms/parNewGeneration.inline.hpp" #include "gc/cms/parOopClosures.inline.hpp" #include "gc/serial/defNewGeneration.inline.hpp" #include "gc/shared/adaptiveSizePolicy.hpp" @@ -248,8 +248,7 @@ } } if (buf_space != NULL) { - plab->set_word_size(buf_size); - plab->set_buf(buf_space); + plab->set_buf(buf_space, buf_size); record_survivor_plab(buf_space, buf_size); obj = plab->allocate_aligned(word_sz, SurvivorAlignmentInBytes); // Note that we cannot compare buf_size < word_sz below @@ -803,7 +802,7 @@ void ParNewRefProcTaskExecutor::execute(ProcessTask& task) { GenCollectedHeap* gch = GenCollectedHeap::heap(); - FlexibleWorkGang* workers = gch->workers(); + WorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); _state_set.reset(workers->active_workers(), _young_gen.promotion_failed()); ParNewRefProcTaskProxy rp_task(task, _young_gen, _old_gen, @@ -816,7 +815,7 @@ void ParNewRefProcTaskExecutor::execute(EnqueueTask& task) { GenCollectedHeap* gch = GenCollectedHeap::heap(); - FlexibleWorkGang* workers = gch->workers(); + WorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); ParNewRefEnqueueTaskProxy enq_task(task); workers->run_task(&enq_task); @@ -890,7 +889,7 @@ _gc_timer->register_gc_start(); AdaptiveSizePolicy* size_policy = gch->gen_policy()->size_policy(); - FlexibleWorkGang* workers = gch->workers(); + WorkGang* workers = gch->workers(); assert(workers != NULL, "Need workgang for parallel work"); uint active_workers = AdaptiveSizePolicy::calc_active_workers(workers->total_workers(), diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/cms/parNewGeneration.hpp --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -169,11 +169,7 @@ // Allocate a to-space block of size "sz", or else return NULL. HeapWord* alloc_in_to_space_slow(size_t word_sz); - HeapWord* alloc_in_to_space(size_t word_sz) { - HeapWord* obj = to_space_alloc_buffer()->allocate_aligned(word_sz, SurvivorAlignmentInBytes); - if (obj != NULL) return obj; - else return alloc_in_to_space_slow(word_sz); - } + inline HeapWord* alloc_in_to_space(size_t word_sz); HeapWord* young_old_boundary() { return _young_old_boundary; } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/cms/parNewGeneration.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.inline.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_CMS_PARNEWGENERATION_INLINE_HPP +#define SHARE_VM_GC_CMS_PARNEWGENERATION_INLINE_HPP + +#include "gc/cms/parNewGeneration.hpp" +#include "gc/shared/plab.inline.hpp" +#include "utilities/globalDefinitions.hpp" + +inline HeapWord* ParScanThreadState::alloc_in_to_space(size_t word_sz) { + HeapWord* obj = to_space_alloc_buffer()->allocate_aligned(word_sz, SurvivorAlignmentInBytes); + if (obj != NULL) return obj; + else return alloc_in_to_space_slow(word_sz); +} +#endif // SHARE_VM_GC_CMS_PARNEWGENERATION_INLINE_HPP diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/cms/yieldingWorkgroup.cpp --- a/hotspot/src/share/vm/gc/cms/yieldingWorkgroup.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/cms/yieldingWorkgroup.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -26,20 +26,45 @@ #include "gc/cms/yieldingWorkgroup.hpp" #include "utilities/macros.hpp" -// Forward declaration of classes declared here. - -class GangWorker; -class WorkData; +YieldingFlexibleGangWorker::YieldingFlexibleGangWorker(YieldingFlexibleWorkGang* gang, int id) + : AbstractGangWorker(gang, id) {} YieldingFlexibleWorkGang::YieldingFlexibleWorkGang( - const char* name, uint workers, bool are_GC_task_threads) : - FlexibleWorkGang(name, workers, are_GC_task_threads, false), - _yielded_workers(0) {} + const char* name, uint workers, bool are_GC_task_threads) : + AbstractWorkGang(name, workers, are_GC_task_threads, false), + _yielded_workers(0), + _started_workers(0), + _finished_workers(0), + _sequence_number(0), + _task(NULL) { + + // Other initialization. + _monitor = new Monitor(/* priority */ Mutex::leaf, + /* name */ "WorkGroup monitor", + /* allow_vm_block */ are_GC_task_threads, + Monitor::_safepoint_check_sometimes); + + assert(monitor() != NULL, "Failed to allocate monitor"); +} -GangWorker* YieldingFlexibleWorkGang::allocate_worker(uint which) { - YieldingFlexibleGangWorker* new_member = - new YieldingFlexibleGangWorker(this, which); - return (YieldingFlexibleGangWorker*) new_member; +AbstractGangWorker* YieldingFlexibleWorkGang::allocate_worker(uint which) { + return new YieldingFlexibleGangWorker(this, which); +} + +void YieldingFlexibleWorkGang::internal_worker_poll(YieldingWorkData* data) const { + assert(data != NULL, "worker data is null"); + data->set_task(task()); + data->set_sequence_number(sequence_number()); +} + +void YieldingFlexibleWorkGang::internal_note_start() { + assert(monitor()->owned_by_self(), "note_finish is an internal method"); + _started_workers += 1; +} + +void YieldingFlexibleWorkGang::internal_note_finish() { + assert(monitor()->owned_by_self(), "note_finish is an internal method"); + _finished_workers += 1; } // Run a task; returns when the task is done, or the workers yield, @@ -292,37 +317,37 @@ /////////////////////////////// void YieldingFlexibleGangWorker::loop() { int previous_sequence_number = 0; - Monitor* gang_monitor = gang()->monitor(); + Monitor* gang_monitor = yf_gang()->monitor(); MutexLockerEx ml(gang_monitor, Mutex::_no_safepoint_check_flag); - WorkData data; + YieldingWorkData data; int id; while (true) { // Check if there is work to do. - gang()->internal_worker_poll(&data); + yf_gang()->internal_worker_poll(&data); if (data.task() != NULL && data.sequence_number() != previous_sequence_number) { // There is work to be done. // First check if we need to become active or if there // are already the requisite number of workers - if (gang()->started_workers() == yf_gang()->active_workers()) { + if (yf_gang()->started_workers() == yf_gang()->active_workers()) { // There are already enough workers, we do not need to // to run; fall through and wait on monitor. } else { // We need to pitch in and do the work. - assert(gang()->started_workers() < yf_gang()->active_workers(), + assert(yf_gang()->started_workers() < yf_gang()->active_workers(), "Unexpected state"); - id = gang()->started_workers(); - gang()->internal_note_start(); + id = yf_gang()->started_workers(); + yf_gang()->internal_note_start(); // Now, release the gang mutex and do the work. { MutexUnlockerEx mul(gang_monitor, Mutex::_no_safepoint_check_flag); data.task()->work(id); // This might include yielding } // Reacquire monitor and note completion of this worker - gang()->internal_note_finish(); + yf_gang()->internal_note_finish(); // Update status of task based on whether all workers have // finished or some have yielded - assert(data.task() == gang()->task(), "Confused task binding"); - if (gang()->finished_workers() == yf_gang()->active_workers()) { + assert(data.task() == yf_gang()->task(), "Confused task binding"); + if (yf_gang()->finished_workers() == yf_gang()->active_workers()) { switch (data.yf_task()->status()) { case ABORTING: { data.yf_task()->set_status(ABORTED); @@ -338,7 +363,7 @@ } gang_monitor->notify_all(); // Notify overseer } else { // at least one worker is still working or yielded - assert(gang()->finished_workers() < yf_gang()->active_workers(), + assert(yf_gang()->finished_workers() < yf_gang()->active_workers(), "Counts inconsistent"); switch (data.yf_task()->status()) { case ACTIVE: { @@ -347,7 +372,7 @@ break; } case YIELDING: { - if (gang()->finished_workers() + yf_gang()->yielded_workers() + if (yf_gang()->finished_workers() + yf_gang()->yielded_workers() == yf_gang()->active_workers()) { data.yf_task()->set_status(YIELDED); gang_monitor->notify_all(); // notify overseer diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/cms/yieldingWorkgroup.hpp --- a/hotspot/src/share/vm/gc/cms/yieldingWorkgroup.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/cms/yieldingWorkgroup.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -29,6 +29,7 @@ #include "utilities/macros.hpp" // Forward declarations +class YieldingFlexibleGangTask; class YieldingFlexibleWorkGang; // Status of tasks @@ -43,13 +44,32 @@ COMPLETED }; +class YieldingWorkData: public StackObj { + // This would be a struct, but I want accessor methods. +private: + AbstractGangTask* _task; + int _sequence_number; +public: + // Constructor and destructor + YieldingWorkData() : _task(NULL), _sequence_number(0) {} + ~YieldingWorkData() {} + + // Accessors and modifiers + AbstractGangTask* task() const { return _task; } + void set_task(AbstractGangTask* value) { _task = value; } + int sequence_number() const { return _sequence_number; } + void set_sequence_number(int value) { _sequence_number = value; } + + YieldingFlexibleGangTask* yf_task() const { + return (YieldingFlexibleGangTask*)_task; + } +}; + // Class YieldingFlexibleGangWorker: // Several instances of this class run in parallel as workers for a gang. -class YieldingFlexibleGangWorker: public GangWorker { +class YieldingFlexibleGangWorker: public AbstractGangWorker { public: - // Ctor - YieldingFlexibleGangWorker(AbstractWorkGang* gang, int id) : - GangWorker(gang, id) { } + YieldingFlexibleGangWorker(YieldingFlexibleWorkGang* gang, int id); public: YieldingFlexibleWorkGang* yf_gang() const @@ -108,9 +128,6 @@ friend class YieldingFlexibleWorkGang; friend class YieldingFlexibleGangWorker; - NOT_PRODUCT(virtual bool is_YieldingFlexibleGang_task() const { - return true; - }) void set_status(Status s) { _status = s; @@ -160,7 +177,7 @@ // YieldingGangWorkers, and provides infrastructure // supporting yielding to the "GangOverseer", // being the thread that orchestrates the WorkGang via run_task(). -class YieldingFlexibleWorkGang: public FlexibleWorkGang { +class YieldingFlexibleWorkGang: public AbstractWorkGang { // Here's the public interface to this class. public: // Constructor and destructor. @@ -168,12 +185,10 @@ bool are_GC_task_threads); YieldingFlexibleGangTask* yielding_task() const { - assert(task() == NULL || task()->is_YieldingFlexibleGang_task(), - "Incorrect cast"); - return (YieldingFlexibleGangTask*)task(); + return task(); } // Allocate a worker and return a pointer to it. - GangWorker* allocate_worker(uint which); + AbstractGangWorker* allocate_worker(uint which); // Run a task; returns when the task is done, or the workers yield, // or the task is aborted. @@ -216,6 +231,42 @@ private: friend class YieldingFlexibleGangWorker; void reset(); // NYI + + + // The monitor which protects these data, + // and notifies of changes in it. + Monitor* _monitor; + // Accessors for fields + Monitor* monitor() const { + return _monitor; + } + + // The number of started workers. + uint _started_workers; + // The number of finished workers. + uint _finished_workers; + + uint started_workers() const { + return _started_workers; + } + uint finished_workers() const { + return _finished_workers; + } + + // A sequence number for the current task. + int _sequence_number; + int sequence_number() const { + return _sequence_number; + } + + YieldingFlexibleGangTask* _task; + YieldingFlexibleGangTask* task() const { + return _task; + } + + void internal_worker_poll(YieldingWorkData* data) const; + void internal_note_start(); + void internal_note_finish(); }; #endif // SHARE_VM_GC_CMS_YIELDINGWORKGROUP_HPP diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/concurrentMark.cpp --- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -629,7 +629,7 @@ gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); #endif - _parallel_workers = new FlexibleWorkGang("G1 Marker", + _parallel_workers = new WorkGang("G1 Marker", _max_parallel_marking_threads, false, true); if (_parallel_workers == NULL) { vm_exit_during_initialization("Failed necessary allocation."); @@ -3088,29 +3088,6 @@ } #endif -template -inline void CMTask::process_grey_object(oop obj) { - assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray"); - assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); - - if (_cm->verbose_high()) { - gclog_or_tty->print_cr("[%u] processing grey object " PTR_FORMAT, - _worker_id, p2i((void*) obj)); - } - - size_t obj_size = obj->size(); - _words_scanned += obj_size; - - if (scan) { - obj->oop_iterate(_cm_oop_closure); - } - statsOnly( ++_objs_scanned ); - check_limits(); -} - -template void CMTask::process_grey_object(oop); -template void CMTask::process_grey_object(oop); - // Closure for iteration over bitmaps class CMBitMapClosure : public BitMapClosure { private: diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/concurrentMark.hpp --- a/hotspot/src/share/vm/gc/g1/concurrentMark.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/concurrentMark.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -451,7 +451,7 @@ double* _accum_task_vtime; // Accumulated task vtime - FlexibleWorkGang* _parallel_workers; + WorkGang* _parallel_workers; ForceOverflowSettings _force_overflow_conc; ForceOverflowSettings _force_overflow_stw; @@ -1126,7 +1126,7 @@ inline void deal_with_reference(oop obj); // It scans an object and visits its children. - void scan_object(oop obj) { process_grey_object(obj); } + inline void scan_object(oop obj); // It pushes an object on the local queue. inline void push(oop obj); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/concurrentMark.inline.hpp --- a/hotspot/src/share/vm/gc/g1/concurrentMark.inline.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/concurrentMark.inline.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -232,6 +232,9 @@ } } +// It scans an object and visits its children. +inline void CMTask::scan_object(oop obj) { process_grey_object(obj); } + inline void CMTask::push(oop obj) { HeapWord* objAddr = (HeapWord*) obj; assert(_g1h->is_in_g1_reserved(objAddr), "invariant"); @@ -299,6 +302,28 @@ return objAddr < global_finger; } +template +inline void CMTask::process_grey_object(oop obj) { + assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray"); + assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); + + if (_cm->verbose_high()) { + gclog_or_tty->print_cr("[%u] processing grey object " PTR_FORMAT, + _worker_id, p2i((void*) obj)); + } + + size_t obj_size = obj->size(); + _words_scanned += obj_size; + + if (scan) { + obj->oop_iterate(_cm_oop_closure); + } + statsOnly( ++_objs_scanned ); + check_limits(); +} + + + inline void CMTask::make_reference_grey(oop obj, HeapRegion* hr) { if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1AllocRegion.cpp --- a/hotspot/src/share/vm/gc/g1/g1AllocRegion.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1AllocRegion.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -46,10 +46,11 @@ _dummy_region = dummy_region; } -void G1AllocRegion::fill_up_remaining_space(HeapRegion* alloc_region, - bool bot_updates) { +size_t G1AllocRegion::fill_up_remaining_space(HeapRegion* alloc_region, + bool bot_updates) { assert(alloc_region != NULL && alloc_region != _dummy_region, "pre-condition"); + size_t result = 0; // Other threads might still be trying to allocate using a CAS out // of the region we are trying to retire, as they can do so without @@ -73,6 +74,7 @@ // If the allocation was successful we should fill in the space. CollectedHeap::fill_with_object(dummy, free_word_size); alloc_region->set_pre_dummy_top(dummy); + result += free_word_size * HeapWordSize; break; } @@ -81,13 +83,18 @@ // allocation and they fill up the region. In that case, we can // just get out of the loop. } + result += alloc_region->free(); + assert(alloc_region->free() / HeapWordSize < min_word_size_to_fill, "post-condition"); + return result; } -void G1AllocRegion::retire(bool fill_up) { +size_t G1AllocRegion::retire(bool fill_up) { assert(_alloc_region != NULL, ar_ext_msg(this, "not initialized properly")); + size_t result = 0; + trace("retiring"); HeapRegion* alloc_region = _alloc_region; if (alloc_region != _dummy_region) { @@ -98,7 +105,7 @@ ar_ext_msg(this, "the alloc region should never be empty")); if (fill_up) { - fill_up_remaining_space(alloc_region, _bot_updates); + result = fill_up_remaining_space(alloc_region, _bot_updates); } assert(alloc_region->used() >= _used_bytes_before, @@ -109,6 +116,8 @@ _alloc_region = _dummy_region; } trace("retired"); + + return result; } HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size, @@ -196,11 +205,11 @@ } #if G1_ALLOC_REGION_TRACING -void G1AllocRegion::trace(const char* str, size_t word_size, HeapWord* result) { +void G1AllocRegion::trace(const char* str, size_t min_word_size, size_t desired_word_size, size_t actual_word_size, HeapWord* result) { // All the calls to trace that set either just the size or the size // and the result are considered part of level 2 tracing and are // skipped during level 1 tracing. - if ((word_size == 0 && result == NULL) || (G1_ALLOC_REGION_TRACING > 1)) { + if ((actual_word_size == 0 && result == NULL) || (G1_ALLOC_REGION_TRACING > 1)) { const size_t buffer_length = 128; char hr_buffer[buffer_length]; char rest_buffer[buffer_length]; @@ -217,10 +226,10 @@ if (G1_ALLOC_REGION_TRACING > 1) { if (result != NULL) { - jio_snprintf(rest_buffer, buffer_length, SIZE_FORMAT " " PTR_FORMAT, - word_size, result); - } else if (word_size != 0) { - jio_snprintf(rest_buffer, buffer_length, SIZE_FORMAT, word_size); + jio_snprintf(rest_buffer, buffer_length, "min " SIZE_FORMAT " desired " SIZE_FORMAT " actual " SIZE_FORMAT " " PTR_FORMAT, + min_word_size, desired_word_size, actual_word_size, result); + } else if (min_word_size != 0) { + jio_snprintf(rest_buffer, buffer_length, "min " SIZE_FORMAT " desired " SIZE_FORMAT, min_word_size, desired_word_size); } else { jio_snprintf(rest_buffer, buffer_length, ""); } @@ -251,26 +260,25 @@ _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes); } -HeapRegion* SurvivorGCAllocRegion::allocate_new_region(size_t word_size, - bool force) { +HeapRegion* G1GCAllocRegion::allocate_new_region(size_t word_size, + bool force) { assert(!force, "not supported for GC alloc regions"); - return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Young); + return _g1h->new_gc_alloc_region(word_size, count(), _purpose); } -void SurvivorGCAllocRegion::retire_region(HeapRegion* alloc_region, - size_t allocated_bytes) { - _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Young); +void G1GCAllocRegion::retire_region(HeapRegion* alloc_region, + size_t allocated_bytes) { + _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, _purpose); } -HeapRegion* OldGCAllocRegion::allocate_new_region(size_t word_size, - bool force) { - assert(!force, "not supported for GC alloc regions"); - return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Old); -} - -void OldGCAllocRegion::retire_region(HeapRegion* alloc_region, - size_t allocated_bytes) { - _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Old); +size_t G1GCAllocRegion::retire(bool fill_up) { + HeapRegion* retired = get(); + size_t end_waste = G1AllocRegion::retire(fill_up); + // Do not count retirement of the dummy allocation region. + if (retired != NULL) { + _stats->add_region_end_waste(end_waste / HeapWordSize); + } + return end_waste; } HeapRegion* OldGCAllocRegion::release() { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1AllocRegion.hpp --- a/hotspot/src/share/vm/gc/g1/g1AllocRegion.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1AllocRegion.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -26,6 +26,8 @@ #define SHARE_VM_GC_G1_G1ALLOCREGION_HPP #include "gc/g1/heapRegion.hpp" +#include "gc/g1/g1EvacStats.hpp" +#include "gc/g1/g1InCSetState.hpp" class G1CollectedHeap; @@ -102,16 +104,22 @@ static inline HeapWord* par_allocate(HeapRegion* alloc_region, size_t word_size, bool bot_updates); + // Perform a MT-safe allocation out of the given region, with the given + // minimum and desired size. Returns the actual size allocated (between + // minimum and desired size) in actual_word_size if the allocation has been + // successful. + static inline HeapWord* par_allocate(HeapRegion* alloc_region, + size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, + bool bot_updates); // Ensure that the region passed as a parameter has been filled up // so that noone else can allocate out of it any more. - static void fill_up_remaining_space(HeapRegion* alloc_region, - bool bot_updates); - - // Retire the active allocating region. If fill_up is true then make - // sure that the region is full before we retire it so that noone - // else can allocate out of it. - void retire(bool fill_up); + // Returns the number of bytes that have been wasted by filled up + // the space. + static size_t fill_up_remaining_space(HeapRegion* alloc_region, + bool bot_updates); // After a region is allocated by alloc_new_region, this // method is used to set it as the active alloc_region @@ -126,6 +134,12 @@ void fill_in_ext_msg(ar_ext_msg* msg, const char* message); protected: + // Retire the active allocating region. If fill_up is true then make + // sure that the region is full before we retire it so that no one + // else can allocate out of it. + // Returns the number of bytes that have been filled up during retire. + virtual size_t retire(bool fill_up); + // For convenience as subclasses use it. static G1CollectedHeap* _g1h; @@ -154,7 +168,18 @@ // First-level allocation: Should be called without holding a // lock. It will try to allocate lock-free out of the active region, // or return NULL if it was unable to. - inline HeapWord* attempt_allocation(size_t word_size, bool bot_updates); + inline HeapWord* attempt_allocation(size_t word_size, + bool bot_updates); + // Perform an allocation out of the current allocation region, with the given + // minimum and desired size. Returns the actual size allocated (between + // minimum and desired size) in actual_word_size if the allocation has been + // successful. + // Should be called without holding a lock. It will try to allocate lock-free + // out of the active region, or return NULL if it was unable to. + inline HeapWord* attempt_allocation(size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, + bool bot_updates); // Second-level allocation: Should be called while holding a // lock. It will try to first allocate lock-free out of the active @@ -164,6 +189,14 @@ // it conform to its locking protocol. inline HeapWord* attempt_allocation_locked(size_t word_size, bool bot_updates); + // Same as attempt_allocation_locked(size_t, bool), but allowing specification + // of minimum word size of the block in min_word_size, and the maximum word + // size of the allocation in desired_word_size. The actual size of the block is + // returned in actual_word_size. + inline HeapWord* attempt_allocation_locked(size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, + bool bot_updates); // Should be called to allocate a new region even if the max of this // type of regions has been reached. Should only be called if other @@ -186,9 +219,17 @@ virtual HeapRegion* release(); #if G1_ALLOC_REGION_TRACING - void trace(const char* str, size_t word_size = 0, HeapWord* result = NULL); + void trace(const char* str, + size_t min_word_size = 0, + size_t desired_word_size = 0, + size_t actual_word_size = 0, + HeapWord* result = NULL); #else // G1_ALLOC_REGION_TRACING - void trace(const char* str, size_t word_size = 0, HeapWord* result = NULL) { } + void trace(const char* str, + size_t min_word_size = 0, + size_t desired_word_size = 0, + size_t actual_word_size = 0, + HeapWord* result = NULL) { } #endif // G1_ALLOC_REGION_TRACING }; @@ -201,22 +242,33 @@ : G1AllocRegion("Mutator Alloc Region", false /* bot_updates */) { } }; -class SurvivorGCAllocRegion : public G1AllocRegion { +// Common base class for allocation regions used during GC. +class G1GCAllocRegion : public G1AllocRegion { protected: + G1EvacStats* _stats; + InCSetState::in_cset_state_t _purpose; + virtual HeapRegion* allocate_new_region(size_t word_size, bool force); virtual void retire_region(HeapRegion* alloc_region, size_t allocated_bytes); + + virtual size_t retire(bool fill_up); public: - SurvivorGCAllocRegion() - : G1AllocRegion("Survivor GC Alloc Region", false /* bot_updates */) { } + G1GCAllocRegion(const char* name, bool bot_updates, G1EvacStats* stats, InCSetState::in_cset_state_t purpose) + : G1AllocRegion(name, bot_updates), _stats(stats), _purpose(purpose) { + assert(stats != NULL, "Must pass non-NULL PLAB statistics"); + } }; -class OldGCAllocRegion : public G1AllocRegion { -protected: - virtual HeapRegion* allocate_new_region(size_t word_size, bool force); - virtual void retire_region(HeapRegion* alloc_region, size_t allocated_bytes); +class SurvivorGCAllocRegion : public G1GCAllocRegion { public: - OldGCAllocRegion() - : G1AllocRegion("Old GC Alloc Region", true /* bot_updates */) { } + SurvivorGCAllocRegion(G1EvacStats* stats) + : G1GCAllocRegion("Survivor GC Alloc Region", false /* bot_updates */, stats, InCSetState::Young) { } +}; + +class OldGCAllocRegion : public G1GCAllocRegion { +public: + OldGCAllocRegion(G1EvacStats* stats) + : G1GCAllocRegion("Old GC Alloc Region", true /* bot_updates */, stats, InCSetState::Old) { } // This specialization of release() makes sure that the last card that has // been allocated into has been completely filled by a dummy object. This diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1AllocRegion.inline.hpp --- a/hotspot/src/share/vm/gc/g1/g1AllocRegion.inline.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1AllocRegion.inline.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -40,52 +40,74 @@ } } +inline HeapWord* G1AllocRegion::par_allocate(HeapRegion* alloc_region, size_t word_size, bool bot_updates) { + size_t temp; + return par_allocate(alloc_region, word_size, word_size, &temp, bot_updates); +} + inline HeapWord* G1AllocRegion::par_allocate(HeapRegion* alloc_region, - size_t word_size, + size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, bool bot_updates) { assert(alloc_region != NULL, err_msg("pre-condition")); assert(!alloc_region->is_empty(), err_msg("pre-condition")); if (!bot_updates) { - return alloc_region->par_allocate_no_bot_updates(word_size); + return alloc_region->par_allocate_no_bot_updates(min_word_size, desired_word_size, actual_word_size); } else { - return alloc_region->par_allocate(word_size); + return alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size); } } -inline HeapWord* G1AllocRegion::attempt_allocation(size_t word_size, +inline HeapWord* G1AllocRegion::attempt_allocation(size_t word_size, bool bot_updates) { + size_t temp; + return attempt_allocation(word_size, word_size, &temp, bot_updates); +} + +inline HeapWord* G1AllocRegion::attempt_allocation(size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, bool bot_updates) { assert(bot_updates == _bot_updates, ar_ext_msg(this, "pre-condition")); HeapRegion* alloc_region = _alloc_region; assert(alloc_region != NULL, ar_ext_msg(this, "not initialized properly")); - HeapWord* result = par_allocate(alloc_region, word_size, bot_updates); + HeapWord* result = par_allocate(alloc_region, min_word_size, desired_word_size, actual_word_size, bot_updates); if (result != NULL) { - trace("alloc", word_size, result); + trace("alloc", min_word_size, desired_word_size, *actual_word_size, result); return result; } - trace("alloc failed", word_size); + trace("alloc failed", min_word_size, desired_word_size); return NULL; } -inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t word_size, +inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t word_size, bool bot_updates) { + size_t temp; + return attempt_allocation_locked(word_size, word_size, &temp, bot_updates); +} + +inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, bool bot_updates) { // First we have to redo the allocation, assuming we're holding the // appropriate lock, in case another thread changed the region while // we were waiting to get the lock. - HeapWord* result = attempt_allocation(word_size, bot_updates); + HeapWord* result = attempt_allocation(min_word_size, desired_word_size, actual_word_size, bot_updates); if (result != NULL) { return result; } retire(true /* fill_up */); - result = new_alloc_region_and_allocate(word_size, false /* force */); + result = new_alloc_region_and_allocate(desired_word_size, false /* force */); if (result != NULL) { - trace("alloc locked (second attempt)", word_size, result); + *actual_word_size = desired_word_size; + trace("alloc locked (second attempt)", min_word_size, desired_word_size, *actual_word_size, result); return result; } - trace("alloc locked failed", word_size); + trace("alloc locked failed", min_word_size, desired_word_size); return NULL; } @@ -94,13 +116,13 @@ assert(bot_updates == _bot_updates, ar_ext_msg(this, "pre-condition")); assert(_alloc_region != NULL, ar_ext_msg(this, "not initialized properly")); - trace("forcing alloc"); + trace("forcing alloc", word_size, word_size); HeapWord* result = new_alloc_region_and_allocate(word_size, true /* force */); if (result != NULL) { - trace("alloc forced", word_size, result); + trace("alloc forced", word_size, word_size, word_size, result); return result; } - trace("alloc forced failed", word_size); + trace("alloc forced failed", word_size, word_size); return NULL; } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1Allocator.cpp --- a/hotspot/src/share/vm/gc/g1/g1Allocator.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1Allocator.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -24,12 +24,20 @@ #include "precompiled.hpp" #include "gc/g1/g1Allocator.inline.hpp" +#include "gc/g1/g1AllocRegion.inline.hpp" #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1CollectorPolicy.hpp" #include "gc/g1/g1MarkSweep.hpp" #include "gc/g1/heapRegion.inline.hpp" #include "gc/g1/heapRegionSet.inline.hpp" +G1DefaultAllocator::G1DefaultAllocator(G1CollectedHeap* heap) : + G1Allocator(heap), + _retained_old_gc_alloc_region(NULL), + _survivor_gc_alloc_region(heap->alloc_buffer_stats(InCSetState::Young)), + _old_gc_alloc_region(heap->alloc_buffer_stats(InCSetState::Old)) { +} + void G1DefaultAllocator::init_mutator_alloc_region() { assert(_mutator_alloc_region.get() == NULL, "pre-condition"); _mutator_alloc_region.init(); @@ -79,6 +87,8 @@ void G1DefaultAllocator::init_gc_alloc_regions(EvacuationInfo& evacuation_info) { assert_at_safepoint(true /* should_be_vm_thread */); + G1Allocator::init_gc_alloc_regions(evacuation_info); + _survivor_gc_alloc_region.init(); _old_gc_alloc_region.init(); reuse_retained_old_region(evacuation_info, @@ -101,10 +111,8 @@ _retained_old_gc_alloc_region->record_retained_region(); } - if (ResizePLAB) { - _g1h->alloc_buffer_stats(InCSetState::Young)->adjust_desired_plab_sz(); - _g1h->alloc_buffer_stats(InCSetState::Old)->adjust_desired_plab_sz(); - } + _g1h->alloc_buffer_stats(InCSetState::Young)->adjust_desired_plab_sz(); + _g1h->alloc_buffer_stats(InCSetState::Old)->adjust_desired_plab_sz(); } void G1DefaultAllocator::abandon_gc_alloc_regions() { @@ -136,78 +144,159 @@ HeapWord* G1Allocator::par_allocate_during_gc(InCSetState dest, size_t word_size, AllocationContext_t context) { + size_t temp = 0; + HeapWord* result = par_allocate_during_gc(dest, word_size, word_size, &temp, context); + assert(result == NULL || temp == word_size, + err_msg("Requested " SIZE_FORMAT " words, but got " SIZE_FORMAT " at " PTR_FORMAT, + word_size, temp, p2i(result))); + return result; +} + +HeapWord* G1Allocator::par_allocate_during_gc(InCSetState dest, + size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, + AllocationContext_t context) { switch (dest.value()) { case InCSetState::Young: - return survivor_attempt_allocation(word_size, context); + return survivor_attempt_allocation(min_word_size, desired_word_size, actual_word_size, context); case InCSetState::Old: - return old_attempt_allocation(word_size, context); + return old_attempt_allocation(min_word_size, desired_word_size, actual_word_size, context); default: ShouldNotReachHere(); return NULL; // Keep some compilers happy } } -HeapWord* G1Allocator::survivor_attempt_allocation(size_t word_size, +bool G1Allocator::survivor_is_full(AllocationContext_t context) const { + return _survivor_is_full; +} + +bool G1Allocator::old_is_full(AllocationContext_t context) const { + return _old_is_full; +} + +void G1Allocator::set_survivor_full(AllocationContext_t context) { + _survivor_is_full = true; +} + +void G1Allocator::set_old_full(AllocationContext_t context) { + _old_is_full = true; +} + +HeapWord* G1Allocator::survivor_attempt_allocation(size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, AllocationContext_t context) { - assert(!_g1h->is_humongous(word_size), + assert(!_g1h->is_humongous(desired_word_size), "we should not be seeing humongous-size allocations in this path"); - HeapWord* result = survivor_gc_alloc_region(context)->attempt_allocation(word_size, + HeapWord* result = survivor_gc_alloc_region(context)->attempt_allocation(min_word_size, + desired_word_size, + actual_word_size, false /* bot_updates */); - if (result == NULL) { + if (result == NULL && !survivor_is_full(context)) { MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag); - result = survivor_gc_alloc_region(context)->attempt_allocation_locked(word_size, + result = survivor_gc_alloc_region(context)->attempt_allocation_locked(min_word_size, + desired_word_size, + actual_word_size, false /* bot_updates */); + if (result == NULL) { + set_survivor_full(context); + } } if (result != NULL) { - _g1h->dirty_young_block(result, word_size); + _g1h->dirty_young_block(result, *actual_word_size); } return result; } -HeapWord* G1Allocator::old_attempt_allocation(size_t word_size, +HeapWord* G1Allocator::old_attempt_allocation(size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, AllocationContext_t context) { - assert(!_g1h->is_humongous(word_size), + assert(!_g1h->is_humongous(desired_word_size), "we should not be seeing humongous-size allocations in this path"); - HeapWord* result = old_gc_alloc_region(context)->attempt_allocation(word_size, + HeapWord* result = old_gc_alloc_region(context)->attempt_allocation(min_word_size, + desired_word_size, + actual_word_size, true /* bot_updates */); - if (result == NULL) { + if (result == NULL && !old_is_full(context)) { MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag); - result = old_gc_alloc_region(context)->attempt_allocation_locked(word_size, + result = old_gc_alloc_region(context)->attempt_allocation_locked(min_word_size, + desired_word_size, + actual_word_size, true /* bot_updates */); + if (result == NULL) { + set_old_full(context); + } } return result; } +void G1Allocator::init_gc_alloc_regions(EvacuationInfo& evacuation_info) { + _survivor_is_full = false; + _old_is_full = false; +} + G1PLABAllocator::G1PLABAllocator(G1Allocator* allocator) : _g1h(G1CollectedHeap::heap()), _allocator(allocator), _survivor_alignment_bytes(calc_survivor_alignment_bytes()) { + for (size_t i = 0; i < ARRAY_SIZE(_direct_allocated); i++) { + _direct_allocated[i] = 0; + } +} + +bool G1PLABAllocator::may_throw_away_buffer(size_t const allocation_word_sz, size_t const buffer_size) const { + return (allocation_word_sz * 100 < buffer_size * ParallelGCBufferWastePct); } HeapWord* G1PLABAllocator::allocate_direct_or_new_plab(InCSetState dest, size_t word_sz, - AllocationContext_t context) { - size_t gclab_word_size = _g1h->desired_plab_sz(dest); - if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) { + AllocationContext_t context, + bool* plab_refill_failed) { + size_t plab_word_size = G1CollectedHeap::heap()->desired_plab_sz(dest); + size_t required_in_plab = PLAB::size_required_for_allocation(word_sz); + + // Only get a new PLAB if the allocation fits and it would not waste more than + // ParallelGCBufferWastePct in the existing buffer. + if ((required_in_plab <= plab_word_size) && + may_throw_away_buffer(required_in_plab, plab_word_size)) { + G1PLAB* alloc_buf = alloc_buffer(dest, context); alloc_buf->retire(); - HeapWord* buf = _allocator->par_allocate_during_gc(dest, gclab_word_size, context); - if (buf == NULL) { - return NULL; // Let caller handle allocation failure. + size_t actual_plab_size = 0; + HeapWord* buf = _allocator->par_allocate_during_gc(dest, + required_in_plab, + plab_word_size, + &actual_plab_size, + context); + + assert(buf == NULL || ((actual_plab_size >= required_in_plab) && (actual_plab_size <= plab_word_size)), + err_msg("Requested at minimum " SIZE_FORMAT ", desired " SIZE_FORMAT " words, but got " SIZE_FORMAT " at " PTR_FORMAT, + required_in_plab, plab_word_size, actual_plab_size, p2i(buf))); + + if (buf != NULL) { + alloc_buf->set_buf(buf, actual_plab_size); + + HeapWord* const obj = alloc_buf->allocate(word_sz); + assert(obj != NULL, err_msg("PLAB should have been big enough, tried to allocate " + SIZE_FORMAT " requiring " SIZE_FORMAT " PLAB size " SIZE_FORMAT, + word_sz, required_in_plab, plab_word_size)); + return obj; } // Otherwise. - alloc_buf->set_word_size(gclab_word_size); - alloc_buf->set_buf(buf); - - HeapWord* const obj = alloc_buf->allocate(word_sz); - assert(obj != NULL, "buffer was definitely big enough..."); - return obj; - } else { - return _allocator->par_allocate_during_gc(dest, word_sz, context); + *plab_refill_failed = true; } + // Try direct allocation. + HeapWord* result = _allocator->par_allocate_during_gc(dest, word_sz, context); + if (result != NULL) { + _direct_allocated[dest.value()] += word_sz; + } + return result; } void G1PLABAllocator::undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context) { @@ -225,11 +314,14 @@ _alloc_buffers[InCSetState::Old] = &_tenured_alloc_buffer; } -void G1DefaultPLABAllocator::retire_alloc_buffers() { +void G1DefaultPLABAllocator::flush_and_retire_stats() { for (uint state = 0; state < InCSetState::Num; state++) { G1PLAB* const buf = _alloc_buffers[state]; if (buf != NULL) { - buf->flush_and_retire_stats(_g1h->alloc_buffer_stats(state)); + G1EvacStats* stats = _g1h->alloc_buffer_stats(state); + buf->flush_and_retire_stats(stats); + stats->add_direct_allocated(_direct_allocated[state]); + _direct_allocated[state] = 0; } } } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1Allocator.hpp --- a/hotspot/src/share/vm/gc/g1/g1Allocator.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1Allocator.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -38,23 +38,36 @@ // Also keeps track of retained regions across GCs. class G1Allocator : public CHeapObj { friend class VMStructs; +private: + bool _survivor_is_full; + bool _old_is_full; protected: G1CollectedHeap* _g1h; virtual MutatorAllocRegion* mutator_alloc_region(AllocationContext_t context) = 0; + virtual bool survivor_is_full(AllocationContext_t context) const; + virtual bool old_is_full(AllocationContext_t context) const; + + virtual void set_survivor_full(AllocationContext_t context); + virtual void set_old_full(AllocationContext_t context); + // Accessors to the allocation regions. virtual SurvivorGCAllocRegion* survivor_gc_alloc_region(AllocationContext_t context) = 0; virtual OldGCAllocRegion* old_gc_alloc_region(AllocationContext_t context) = 0; // Allocation attempt during GC for a survivor object / PLAB. - inline HeapWord* survivor_attempt_allocation(size_t word_size, + inline HeapWord* survivor_attempt_allocation(size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, AllocationContext_t context); // Allocation attempt during GC for an old object / PLAB. - inline HeapWord* old_attempt_allocation(size_t word_size, + inline HeapWord* old_attempt_allocation(size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, AllocationContext_t context); public: - G1Allocator(G1CollectedHeap* heap) : _g1h(heap) { } + G1Allocator(G1CollectedHeap* heap) : _g1h(heap), _survivor_is_full(false), _old_is_full(false) { } virtual ~G1Allocator() { } static G1Allocator* create_allocator(G1CollectedHeap* g1h); @@ -66,7 +79,7 @@ virtual void init_mutator_alloc_region() = 0; virtual void release_mutator_alloc_region() = 0; - virtual void init_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0; + virtual void init_gc_alloc_regions(EvacuationInfo& evacuation_info); virtual void release_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0; virtual void abandon_gc_alloc_regions() = 0; @@ -93,6 +106,12 @@ size_t word_size, AllocationContext_t context); + HeapWord* par_allocate_during_gc(InCSetState dest, + size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size, + AllocationContext_t context); + virtual size_t used_in_alloc_regions() = 0; }; @@ -114,7 +133,7 @@ HeapRegion* _retained_old_gc_alloc_region; public: - G1DefaultAllocator(G1CollectedHeap* heap) : G1Allocator(heap), _retained_old_gc_alloc_region(NULL) { } + G1DefaultAllocator(G1CollectedHeap* heap); virtual void init_mutator_alloc_region(); virtual void release_mutator_alloc_region(); @@ -163,8 +182,12 @@ guarantee(_retired, "Allocation buffer has not been retired"); } - virtual void set_buf(HeapWord* buf) { - PLAB::set_buf(buf); + // The amount of space in words wasted within the PLAB including + // waste due to refills and alignment. + size_t wasted() const { return _wasted; } + + virtual void set_buf(HeapWord* buf, size_t word_size) { + PLAB::set_buf(buf, word_size); _retired = false; } @@ -198,7 +221,10 @@ // architectures have a special compare against zero instructions. const uint _survivor_alignment_bytes; - virtual void retire_alloc_buffers() = 0; + // Number of words allocated directly (not counting PLAB allocation). + size_t _direct_allocated[InCSetState::Num]; + + virtual void flush_and_retire_stats() = 0; virtual G1PLAB* alloc_buffer(InCSetState dest, AllocationContext_t context) = 0; // Calculate the survivor space object alignment in bytes. Returns that or 0 if @@ -215,6 +241,11 @@ } } + HeapWord* allocate_new_plab(InCSetState dest, + size_t word_sz, + AllocationContext_t context); + + bool may_throw_away_buffer(size_t const allocation_word_sz, size_t const buffer_size) const; public: G1PLABAllocator(G1Allocator* allocator); virtual ~G1PLABAllocator() { } @@ -225,31 +256,28 @@ // Allocate word_sz words in dest, either directly into the regions or by // allocating a new PLAB. Returns the address of the allocated memory, NULL if - // not successful. + // not successful. Plab_refill_failed indicates whether an attempt to refill the + // PLAB failed or not. HeapWord* allocate_direct_or_new_plab(InCSetState dest, size_t word_sz, - AllocationContext_t context); + AllocationContext_t context, + bool* plab_refill_failed); // Allocate word_sz words in the PLAB of dest. Returns the address of the // allocated memory, NULL if not successful. - HeapWord* plab_allocate(InCSetState dest, - size_t word_sz, - AllocationContext_t context) { - G1PLAB* buffer = alloc_buffer(dest, context); - if (_survivor_alignment_bytes == 0 || !dest.is_young()) { - return buffer->allocate(word_sz); - } else { - return buffer->allocate_aligned(word_sz, _survivor_alignment_bytes); - } - } + inline HeapWord* plab_allocate(InCSetState dest, + size_t word_sz, + AllocationContext_t context); - HeapWord* allocate(InCSetState dest, size_t word_sz, - AllocationContext_t context) { + HeapWord* allocate(InCSetState dest, + size_t word_sz, + AllocationContext_t context, + bool* refill_failed) { HeapWord* const obj = plab_allocate(dest, word_sz, context); if (obj != NULL) { return obj; } - return allocate_direct_or_new_plab(dest, word_sz, context); + return allocate_direct_or_new_plab(dest, word_sz, context, refill_failed); } void undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context); @@ -273,7 +301,7 @@ return _alloc_buffers[dest.value()]; } - virtual void retire_alloc_buffers(); + virtual void flush_and_retire_stats(); virtual void waste(size_t& wasted, size_t& undo_wasted); }; diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1Allocator.inline.hpp --- a/hotspot/src/share/vm/gc/g1/g1Allocator.inline.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1Allocator.inline.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -27,6 +27,7 @@ #include "gc/g1/g1Allocator.hpp" #include "gc/g1/g1AllocRegion.inline.hpp" +#include "gc/shared/plab.inline.hpp" HeapWord* G1Allocator::attempt_allocation(size_t word_size, AllocationContext_t context) { return mutator_alloc_region(context)->attempt_allocation(word_size, false /* bot_updates */); @@ -43,4 +44,15 @@ return mutator_alloc_region(context)->attempt_allocation_force(word_size, false /* bot_updates */); } +inline HeapWord* G1PLABAllocator::plab_allocate(InCSetState dest, + size_t word_sz, + AllocationContext_t context) { + G1PLAB* buffer = alloc_buffer(dest, context); + if (_survivor_alignment_bytes == 0 || !dest.is_young()) { + return buffer->allocate(word_sz); + } else { + return buffer->allocate_aligned(word_sz, _survivor_alignment_bytes); + } +} + #endif // SHARE_VM_GC_G1_G1ALLOCATOR_HPP diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1Allocator_ext.cpp --- a/hotspot/src/share/vm/gc/g1/g1Allocator_ext.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1Allocator_ext.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "gc/g1/g1Allocator.hpp" +#include "gc/g1/g1Allocator.inline.hpp" #include "gc/g1/g1CollectedHeap.hpp" G1Allocator* G1Allocator::create_allocator(G1CollectedHeap* g1h) { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp --- a/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -26,7 +26,7 @@ #define SHARE_VM_GC_G1_G1BLOCKOFFSETTABLE_INLINE_HPP #include "gc/g1/g1BlockOffsetTable.hpp" -#include "gc/g1/heapRegion.inline.hpp" +#include "gc/g1/heapRegion.hpp" #include "gc/shared/space.hpp" inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1944,8 +1944,8 @@ _young_list(new YoungList(this)), _gc_time_stamp(0), _summary_bytes_used(0), - _survivor_plab_stats(YoungPLABSize, PLABWeight), - _old_plab_stats(OldPLABSize, PLABWeight), + _survivor_evac_stats(YoungPLABSize, PLABWeight), + _old_evac_stats(OldPLABSize, PLABWeight), _expand_heap_after_alloc_failure(true), _surviving_young_words(NULL), _old_marking_cycles_started(0), @@ -1960,7 +1960,7 @@ _gc_tracer_stw(new (ResourceObj::C_HEAP, mtGC) G1NewTracer()), _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()) { - _workers = new FlexibleWorkGang("GC Thread", ParallelGCThreads, + _workers = new WorkGang("GC Thread", ParallelGCThreads, /* are_GC_task_threads */true, /* are_ConcurrentGC_threads */false); _workers->initialize_workers(); @@ -3504,6 +3504,13 @@ return G1HeapSummary(heap_summary, used(), eden_used_bytes, eden_capacity_bytes, survivor_used_bytes); } +G1EvacSummary G1CollectedHeap::create_g1_evac_summary(G1EvacStats* stats) { + return G1EvacSummary(stats->allocated(), stats->wasted(), stats->undo_wasted(), + stats->unused(), stats->used(), stats->region_end_waste(), + stats->regions_filled(), stats->direct_allocated(), + stats->failure_used(), stats->failure_waste()); +} + void G1CollectedHeap::trace_heap(GCWhen::Type when, const GCTracer* gc_tracer) { const G1HeapSummary& heap_summary = create_g1_heap_summary(); gc_tracer->report_gc_heap_summary(when, heap_summary); @@ -3753,8 +3760,7 @@ cl.flush_rem_set_entries(); } -void -G1CollectedHeap::setup_surviving_young_words() { +void G1CollectedHeap::setup_surviving_young_words() { assert(_surviving_young_words == NULL, "pre-condition"); uint array_length = g1_policy()->young_cset_region_length(); _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, (size_t) array_length, mtGC); @@ -3770,17 +3776,15 @@ #endif // !ASSERT } -void -G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) { - MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); +void G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) { + assert_at_safepoint(true); uint array_length = g1_policy()->young_cset_region_length(); for (uint i = 0; i < array_length; ++i) { _surviving_young_words[i] += surv_young_words[i]; } } -void -G1CollectedHeap::cleanup_surviving_young_words() { +void G1CollectedHeap::cleanup_surviving_young_words() { guarantee( _surviving_young_words != NULL, "pre-condition" ); FREE_C_HEAP_ARRAY(size_t, _surviving_young_words); _surviving_young_words = NULL; @@ -4375,6 +4379,13 @@ } class G1ParEvacuateFollowersClosure : public VoidClosure { +private: + double _start_term; + double _term_time; + size_t _term_attempts; + + void start_term_time() { _term_attempts++; _start_term = os::elapsedTime(); } + void end_term_time() { _term_time += os::elapsedTime() - _start_term; } protected: G1CollectedHeap* _g1h; G1ParScanThreadState* _par_scan_state; @@ -4391,19 +4402,23 @@ RefToScanQueueSet* queues, ParallelTaskTerminator* terminator) : _g1h(g1h), _par_scan_state(par_scan_state), - _queues(queues), _terminator(terminator) {} + _queues(queues), _terminator(terminator), + _start_term(0.0), _term_time(0.0), _term_attempts(0) {} void do_void(); + double term_time() const { return _term_time; } + size_t term_attempts() const { return _term_attempts; } + private: inline bool offer_termination(); }; bool G1ParEvacuateFollowersClosure::offer_termination() { G1ParScanThreadState* const pss = par_scan_state(); - pss->start_term_time(); + start_term_time(); const bool res = terminator()->offer_termination(); - pss->end_term_time(); + end_term_time(); return res; } @@ -4444,15 +4459,17 @@ class G1ParTask : public AbstractGangTask { protected: G1CollectedHeap* _g1h; - RefToScanQueueSet *_queues; + G1ParScanThreadState** _pss; + RefToScanQueueSet* _queues; G1RootProcessor* _root_processor; ParallelTaskTerminator _terminator; uint _n_workers; public: - G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers) + G1ParTask(G1CollectedHeap* g1h, G1ParScanThreadState** per_thread_states, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers) : AbstractGangTask("G1 collection"), _g1h(g1h), + _pss(per_thread_states), _queues(task_queues), _root_processor(root_processor), _terminator(n_workers, _queues), @@ -4499,7 +4516,8 @@ void work(uint worker_id) { if (worker_id >= _n_workers) return; // no work needed this round - _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime()); + double start_sec = os::elapsedTime(); + _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, start_sec); { ResourceMark rm; @@ -4507,23 +4525,24 @@ ReferenceProcessor* rp = _g1h->ref_processor_stw(); - G1ParScanThreadState pss(_g1h, worker_id, rp); + G1ParScanThreadState* pss = _pss[worker_id]; + pss->set_ref_processor(rp); bool only_young = _g1h->collector_state()->gcs_are_young(); // Non-IM young GC. - G1ParCopyClosure scan_only_root_cl(_g1h, &pss, rp); + G1ParCopyClosure scan_only_root_cl(_g1h, pss, rp); G1CLDClosure scan_only_cld_cl(&scan_only_root_cl, only_young, // Only process dirty klasses. false); // No need to claim CLDs. // IM young GC. // Strong roots closures. - G1ParCopyClosure scan_mark_root_cl(_g1h, &pss, rp); + G1ParCopyClosure scan_mark_root_cl(_g1h, pss, rp); G1CLDClosure scan_mark_cld_cl(&scan_mark_root_cl, false, // Process all klasses. true); // Need to claim CLDs. // Weak roots closures. - G1ParCopyClosure scan_mark_weak_root_cl(_g1h, &pss, rp); + G1ParCopyClosure scan_mark_weak_root_cl(_g1h, pss, rp); G1CLDClosure scan_mark_weak_cld_cl(&scan_mark_weak_root_cl, false, // Process all klasses. true); // Need to claim CLDs. @@ -4554,8 +4573,7 @@ weak_cld_cl = &scan_only_cld_cl; } - pss.start_strong_roots(); - + double start_strong_roots_sec = os::elapsedTime(); _root_processor->evacuate_roots(strong_root_cl, weak_root_cl, strong_cld_cl, @@ -4563,32 +4581,45 @@ trace_metadata, worker_id); - G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss); + G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss); _root_processor->scan_remembered_sets(&push_heap_rs_cl, weak_root_cl, worker_id); - pss.end_strong_roots(); - + double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec; + + double term_sec = 0.0; + size_t evac_term_attempts = 0; { double start = os::elapsedTime(); - G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator); + G1ParEvacuateFollowersClosure evac(_g1h, pss, _queues, &_terminator); evac.do_void(); + + evac_term_attempts = evac.term_attempts(); + term_sec = evac.term_time(); double elapsed_sec = os::elapsedTime() - start; - double term_sec = pss.term_time(); _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec); _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec); - _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss.term_attempts()); + _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, evac_term_attempts); } - _g1h->g1_policy()->record_thread_age_table(pss.age_table()); - _g1h->update_surviving_young_words(pss.surviving_young_words()+1); + + assert(pss->queue_is_empty(), "should be empty"); if (PrintTerminationStats) { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); - pss.print_termination_stats(); + size_t lab_waste; + size_t lab_undo_waste; + pss->waste(lab_waste, lab_undo_waste); + _g1h->print_termination_stats(gclog_or_tty, + worker_id, + (os::elapsedTime() - start_sec) * 1000.0, /* elapsed time */ + strong_roots_sec * 1000.0, /* strong roots time */ + term_sec * 1000.0, /* evac term time */ + evac_term_attempts, /* evac term attempts */ + lab_waste, /* alloc buffer waste */ + lab_undo_waste /* undo waste */ + ); } - assert(pss.queue_is_empty(), "should be empty"); - // Close the inner scope so that the ResourceMark and HandleMark // destructors are executed here and are included as part of the // "GC Worker Time". @@ -4597,6 +4628,31 @@ } }; +void G1CollectedHeap::print_termination_stats_hdr(outputStream* const st) { + st->print_raw_cr("GC Termination Stats"); + st->print_raw_cr(" elapsed --strong roots-- -------termination------- ------waste (KiB)------"); + st->print_raw_cr("thr ms ms % ms % attempts total alloc undo"); + st->print_raw_cr("--- --------- --------- ------ --------- ------ -------- ------- ------- -------"); +} + +void G1CollectedHeap::print_termination_stats(outputStream* const st, + uint worker_id, + double elapsed_ms, + double strong_roots_ms, + double term_ms, + size_t term_attempts, + size_t alloc_buffer_waste, + size_t undo_waste) const { + st->print_cr("%3d %9.2f %9.2f %6.2f " + "%9.2f %6.2f " SIZE_FORMAT_W(8) " " + SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7), + worker_id, elapsed_ms, strong_roots_ms, strong_roots_ms * 100 / elapsed_ms, + term_ms, term_ms * 100 / elapsed_ms, term_attempts, + (alloc_buffer_waste + undo_waste) * HeapWordSize / K, + alloc_buffer_waste * HeapWordSize / K, + undo_waste * HeapWordSize / K); +} + class G1StringSymbolTableUnlinkTask : public AbstractGangTask { private: BoolObjectClosure* _is_alive; @@ -5125,17 +5181,20 @@ class G1STWRefProcTaskExecutor: public AbstractRefProcTaskExecutor { private: - G1CollectedHeap* _g1h; - RefToScanQueueSet* _queues; - FlexibleWorkGang* _workers; - uint _active_workers; + G1CollectedHeap* _g1h; + G1ParScanThreadState** _pss; + RefToScanQueueSet* _queues; + WorkGang* _workers; + uint _active_workers; public: G1STWRefProcTaskExecutor(G1CollectedHeap* g1h, - FlexibleWorkGang* workers, + G1ParScanThreadState** per_thread_states, + WorkGang* workers, RefToScanQueueSet *task_queues, uint n_workers) : _g1h(g1h), + _pss(per_thread_states), _queues(task_queues), _workers(workers), _active_workers(n_workers) @@ -5154,17 +5213,20 @@ typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; ProcessTask& _proc_task; G1CollectedHeap* _g1h; - RefToScanQueueSet *_task_queues; + G1ParScanThreadState** _pss; + RefToScanQueueSet* _task_queues; ParallelTaskTerminator* _terminator; public: G1STWRefProcTaskProxy(ProcessTask& proc_task, - G1CollectedHeap* g1h, - RefToScanQueueSet *task_queues, - ParallelTaskTerminator* terminator) : + G1CollectedHeap* g1h, + G1ParScanThreadState** per_thread_states, + RefToScanQueueSet *task_queues, + ParallelTaskTerminator* terminator) : AbstractGangTask("Process reference objects in parallel"), _proc_task(proc_task), _g1h(g1h), + _pss(per_thread_states), _task_queues(task_queues), _terminator(terminator) {} @@ -5176,11 +5238,12 @@ G1STWIsAliveClosure is_alive(_g1h); - G1ParScanThreadState pss(_g1h, worker_id, NULL); - - G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, &pss, NULL); - - G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL); + G1ParScanThreadState* pss = _pss[worker_id]; + pss->set_ref_processor(NULL); + + G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, pss, NULL); + + G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss, NULL); OopClosure* copy_non_heap_cl = &only_copy_non_heap_cl; @@ -5190,10 +5253,10 @@ } // Keep alive closure. - G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss); + G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, pss); // Complete GC closure - G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _task_queues, _terminator); + G1ParEvacuateFollowersClosure drain_queue(_g1h, pss, _task_queues, _terminator); // Call the reference processing task's work routine. _proc_task.work(worker_id, is_alive, keep_alive, drain_queue); @@ -5212,7 +5275,7 @@ assert(_workers != NULL, "Need parallel worker threads."); ParallelTaskTerminator terminator(_active_workers, _queues); - G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _queues, &terminator); + G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, &terminator); _workers->run_task(&proc_task_proxy); } @@ -5254,15 +5317,17 @@ class G1ParPreserveCMReferentsTask: public AbstractGangTask { protected: - G1CollectedHeap* _g1h; - RefToScanQueueSet *_queues; + G1CollectedHeap* _g1h; + G1ParScanThreadState** _pss; + RefToScanQueueSet* _queues; ParallelTaskTerminator _terminator; uint _n_workers; public: - G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h, uint workers, RefToScanQueueSet *task_queues) : + G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h, G1ParScanThreadState** per_thread_states, int workers, RefToScanQueueSet *task_queues) : AbstractGangTask("ParPreserveCMReferents"), _g1h(g1h), + _pss(per_thread_states), _queues(task_queues), _terminator(workers, _queues), _n_workers(workers) @@ -5272,12 +5337,13 @@ ResourceMark rm; HandleMark hm; - G1ParScanThreadState pss(_g1h, worker_id, NULL); - assert(pss.queue_is_empty(), "both queue and overflow should be empty"); - - G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, &pss, NULL); - - G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL); + G1ParScanThreadState* pss = _pss[worker_id]; + pss->set_ref_processor(NULL); + assert(pss->queue_is_empty(), "both queue and overflow should be empty"); + + G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, pss, NULL); + + G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss, NULL); OopClosure* copy_non_heap_cl = &only_copy_non_heap_cl; @@ -5291,7 +5357,7 @@ // Copying keep alive closure. Applied to referent objects that need // to be copied. - G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss); + G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, pss); ReferenceProcessor* rp = _g1h->ref_processor_cm(); @@ -5324,15 +5390,15 @@ } // Drain the queue - which may cause stealing - G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _queues, &_terminator); + G1ParEvacuateFollowersClosure drain_queue(_g1h, pss, _queues, &_terminator); drain_queue.do_void(); // Allocation buffers were retired at the end of G1ParEvacuateFollowersClosure - assert(pss.queue_is_empty(), "should be"); + assert(pss->queue_is_empty(), "should be"); } }; // Weak Reference processing during an evacuation pause (part 1). -void G1CollectedHeap::process_discovered_references() { +void G1CollectedHeap::process_discovered_references(G1ParScanThreadState** per_thread_states) { double ref_proc_start = os::elapsedTime(); ReferenceProcessor* rp = _ref_processor_stw; @@ -5362,6 +5428,7 @@ uint no_of_gc_workers = workers()->active_workers(); G1ParPreserveCMReferentsTask keep_cm_referents(this, + per_thread_states, no_of_gc_workers, _task_queues); @@ -5376,16 +5443,17 @@ // JNI refs. // Use only a single queue for this PSS. - G1ParScanThreadState pss(this, 0, NULL); - assert(pss.queue_is_empty(), "pre-condition"); + G1ParScanThreadState* pss = per_thread_states[0]; + pss->set_ref_processor(NULL); + assert(pss->queue_is_empty(), "pre-condition"); // We do not embed a reference processor in the copying/scanning // closures while we're actually processing the discovered // reference objects. - G1ParScanExtRootClosure only_copy_non_heap_cl(this, &pss, NULL); - - G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, &pss, NULL); + G1ParScanExtRootClosure only_copy_non_heap_cl(this, pss, NULL); + + G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, pss, NULL); OopClosure* copy_non_heap_cl = &only_copy_non_heap_cl; @@ -5395,10 +5463,10 @@ } // Keep alive closure. - G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, &pss); + G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, pss); // Serial Complete GC closure - G1STWDrainQueueClosure drain_queue(this, &pss); + G1STWDrainQueueClosure drain_queue(this, pss); // Setup the soft refs policy... rp->setup_policy(false); @@ -5417,7 +5485,7 @@ assert(rp->num_q() == no_of_gc_workers, "sanity"); assert(no_of_gc_workers <= rp->max_num_q(), "sanity"); - G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, no_of_gc_workers); + G1STWRefProcTaskExecutor par_task_executor(this, per_thread_states, workers(), _task_queues, no_of_gc_workers); stats = rp->process_discovered_references(&is_alive, &keep_alive, &drain_queue, @@ -5429,14 +5497,14 @@ _gc_tracer_stw->report_gc_reference_stats(stats); // We have completed copying any necessary live referent objects. - assert(pss.queue_is_empty(), "both queue and overflow should be empty"); + assert(pss->queue_is_empty(), "both queue and overflow should be empty"); double ref_proc_time = os::elapsedTime() - ref_proc_start; g1_policy()->phase_times()->record_ref_proc_time(ref_proc_time * 1000.0); } // Weak Reference processing during an evacuation pause (part 2). -void G1CollectedHeap::enqueue_discovered_references() { +void G1CollectedHeap::enqueue_discovered_references(G1ParScanThreadState** per_thread_states) { double ref_enq_start = os::elapsedTime(); ReferenceProcessor* rp = _ref_processor_stw; @@ -5455,7 +5523,7 @@ assert(rp->num_q() == n_workers, "sanity"); assert(n_workers <= rp->max_num_q(), "sanity"); - G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, n_workers); + G1STWRefProcTaskExecutor par_task_executor(this, per_thread_states, workers(), _task_queues, n_workers); rp->enqueue_discovered_references(&par_task_executor); } @@ -5491,9 +5559,14 @@ double start_par_time_sec = os::elapsedTime(); double end_par_time_sec; + G1ParScanThreadState** per_thread_states = NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC); + for (uint i = 0; i < n_workers; i++) { + per_thread_states[i] = new_par_scan_state(i); + } + { G1RootProcessor root_processor(this, n_workers); - G1ParTask g1_par_task(this, _task_queues, &root_processor, n_workers); + G1ParTask g1_par_task(this, per_thread_states, _task_queues, &root_processor, n_workers); // InitialMark needs claim bits to keep track of the marked-through CLDs. if (collector_state()->during_initial_mark_pause()) { ClassLoaderDataGraph::clear_claimed_marks(); @@ -5501,7 +5574,7 @@ // The individual threads will set their evac-failure closures. if (PrintTerminationStats) { - G1ParScanThreadState::print_termination_stats_hdr(); + print_termination_stats_hdr(gclog_or_tty); } workers()->run_task(&g1_par_task); @@ -5528,7 +5601,7 @@ // as we may have to copy some 'reachable' referent // objects (and their reachable sub-graphs) that were // not copied during the pause. - process_discovered_references(); + process_discovered_references(per_thread_states); if (G1StringDedup::is_enabled()) { double fixup_start = os::elapsedTime(); @@ -5544,6 +5617,14 @@ _allocator->release_gc_alloc_regions(evacuation_info); g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + for (uint i = 0; i < n_workers; i++) { + G1ParScanThreadState* pss = per_thread_states[i]; + delete pss; + } + FREE_C_HEAP_ARRAY(G1ParScanThreadState*, per_thread_states); + + record_obj_copy_mem_stats(); + // Reset and re-enable the hot card cache. // Note the counts for the cards in the regions in the // collection set are reset when the collection set is freed. @@ -5568,12 +5649,17 @@ // will log these updates (and dirty their associated // cards). We need these updates logged to update any // RSets. - enqueue_discovered_references(); + enqueue_discovered_references(per_thread_states); redirty_logged_cards(); COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } +void G1CollectedHeap::record_obj_copy_mem_stats() { + _gc_tracer_stw->report_evacuation_statistics(create_g1_evac_summary(&_survivor_evac_stats), + create_g1_evac_summary(&_old_evac_stats)); +} + void G1CollectedHeap::free_region(HeapRegion* hr, FreeRegionList* free_list, bool par, @@ -5972,6 +6058,11 @@ cur->set_evacuation_failed(false); // The region is now considered to be old. cur->set_old(); + // Do some allocation statistics accounting. Regions that failed evacuation + // are always made old, so there is no need to update anything in the young + // gen statistics, but we need to update old gen statistics. + size_t used_words = cur->marked_bytes() / HeapWordSize; + _old_evac_stats.add_failure_used_and_waste(used_words, HeapRegion::GrainWords - used_words); _old_set.add(cur); evacuation_info.increment_collectionset_used_after(cur->used()); } @@ -6217,6 +6308,10 @@ } } +bool G1CollectedHeap::is_old_gc_alloc_region(HeapRegion* hr) { + return _allocator->is_retained_old_region(hr); +} + void G1CollectedHeap::set_region_short_lived_locked(HeapRegion* hr) { _young_list->push_region(hr); } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -28,12 +28,12 @@ #include "gc/g1/concurrentMark.hpp" #include "gc/g1/evacuationInfo.hpp" #include "gc/g1/g1AllocationContext.hpp" -#include "gc/g1/g1Allocator.hpp" #include "gc/g1/g1BiasedArray.hpp" #include "gc/g1/g1CollectorState.hpp" #include "gc/g1/g1HRPrinter.hpp" #include "gc/g1/g1InCSetState.hpp" #include "gc/g1/g1MonitoringSupport.hpp" +#include "gc/g1/g1EvacStats.hpp" #include "gc/g1/g1SATBCardTableModRefBS.hpp" #include "gc/g1/g1YCTypes.hpp" #include "gc/g1/hSpaceCounters.hpp" @@ -41,6 +41,7 @@ #include "gc/g1/heapRegionSet.hpp" #include "gc/shared/barrierSet.hpp" #include "gc/shared/collectedHeap.hpp" +#include "gc/shared/plab.hpp" #include "memory/memRegion.hpp" #include "utilities/stack.hpp" @@ -54,6 +55,7 @@ class HRRSCleanupTask; class GenerationSpec; class OopsInHeapRegionClosure; +class G1ParScanThreadState; class G1KlassScanClosure; class G1ParScanThreadState; class ObjectClosure; @@ -75,7 +77,9 @@ class EvacuationFailedInfo; class nmethod; class Ticks; -class FlexibleWorkGang; +class WorkGang; +class G1Allocator; +class G1ArchiveAllocator; typedef OverflowTaskQueue RefToScanQueue; typedef GenericTaskQueueSet RefToScanQueueSet; @@ -184,8 +188,7 @@ friend class VM_G1IncCollectionPause; friend class VMStructs; friend class MutatorAllocRegion; - friend class SurvivorGCAllocRegion; - friend class OldGCAllocRegion; + friend class G1GCAllocRegion; // Closures used in implementation. friend class G1ParScanThreadState; @@ -200,7 +203,7 @@ friend class G1CheckCSetFastTableClosure; private: - FlexibleWorkGang* _workers; + WorkGang* _workers; static size_t _humongous_object_threshold_in_words; @@ -245,7 +248,7 @@ // The sequence of all heap regions in the heap. HeapRegionManager _hrm; - // Handles non-humongous allocations in the G1CollectedHeap. + // Manages all allocations with regions except humongous object allocations. G1Allocator* _allocator; // Outside of GC pauses, the number of bytes used in all regions other @@ -263,11 +266,11 @@ // Statistics for each allocation context AllocationContextStats _allocation_context_stats; - // PLAB sizing policy for survivors. - PLABStats _survivor_plab_stats; + // GC allocation statistics policy for survivors. + G1EvacStats _survivor_evac_stats; - // PLAB sizing policy for tenured objects. - PLABStats _old_plab_stats; + // GC allocation statistics policy for tenured objects. + G1EvacStats _old_evac_stats; // It specifies whether we should attempt to expand the heap after a // region allocation failure. If heap expansion fails we set this to @@ -581,14 +584,14 @@ // Process any reference objects discovered during // an incremental evacuation pause. - void process_discovered_references(); + void process_discovered_references(G1ParScanThreadState** per_thread_states); // Enqueue any remaining discovered references // after processing. - void enqueue_discovered_references(); + void enqueue_discovered_references(G1ParScanThreadState** per_thread_states); public: - FlexibleWorkGang* workers() const { return _workers; } + WorkGang* workers() const { return _workers; } G1Allocator* allocator() { return _allocator; @@ -606,7 +609,7 @@ bool expand(size_t expand_bytes); // Returns the PLAB statistics for a given destination. - inline PLABStats* alloc_buffer_stats(InCSetState dest); + inline G1EvacStats* alloc_buffer_stats(InCSetState dest); // Determines PLAB size for a given destination. inline size_t desired_plab_sz(InCSetState dest); @@ -680,6 +683,9 @@ // Allocates a new heap region instance. HeapRegion* new_heap_region(uint hrs_index, MemRegion mr); + // Allocates a new per thread par scan state for the given thread id. + G1ParScanThreadState* new_par_scan_state(uint worker_id); + // Allocate the highest free region in the reserved heap. This will commit // regions as necessary. HeapRegion* alloc_highest_free_region(); @@ -789,6 +795,20 @@ // Actually do the work of evacuating the collection set. void evacuate_collection_set(EvacuationInfo& evacuation_info); + // Print the header for the per-thread termination statistics. + static void print_termination_stats_hdr(outputStream* const st); + // Print actual per-thread termination statistics. + void print_termination_stats(outputStream* const st, + uint worker_id, + double elapsed_ms, + double strong_roots_ms, + double term_ms, + size_t term_attempts, + size_t alloc_buffer_waste, + size_t undo_waste) const; + // Update object copying statistics. + void record_obj_copy_mem_stats(); + // The g1 remembered set of the heap. G1RemSet* _g1_rem_set; @@ -1195,9 +1215,7 @@ // Determine whether the given region is one that we are using as an // old GC alloc region. - bool is_old_gc_alloc_region(HeapRegion* hr) { - return _allocator->is_retained_old_region(hr); - } + bool is_old_gc_alloc_region(HeapRegion* hr); // Perform a collection of the heap; intended for use in implementing // "System.gc". This probably implies as full a collection as the @@ -1566,6 +1584,7 @@ const VerifyOption vo) const; G1HeapSummary create_g1_heap_summary(); + G1EvacSummary create_g1_evac_summary(G1EvacStats* stats); // Printing diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -35,12 +35,12 @@ #include "gc/shared/taskqueue.hpp" #include "runtime/orderAccess.inline.hpp" -PLABStats* G1CollectedHeap::alloc_buffer_stats(InCSetState dest) { +G1EvacStats* G1CollectedHeap::alloc_buffer_stats(InCSetState dest) { switch (dest.value()) { case InCSetState::Young: - return &_survivor_plab_stats; + return &_survivor_evac_stats; case InCSetState::Old: - return &_old_plab_stats; + return &_old_evac_stats; default: ShouldNotReachHere(); return NULL; // Keep some compilers happy diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1CollectedHeap_ext.cpp --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap_ext.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap_ext.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "gc/g1/g1CollectedHeap.hpp" +#include "gc/g1/g1ParScanThreadState.hpp" #include "gc/g1/heapRegion.inline.hpp" bool G1CollectedHeap::copy_allocation_context_stats(const jint* contexts, @@ -37,3 +38,7 @@ MemRegion mr) { return new HeapRegion(hrs_index, bot_shared(), mr); } + +G1ParScanThreadState* G1CollectedHeap::new_par_scan_state(uint worker_id) { + return new G1ParScanThreadState(this, worker_id); +} diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp --- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1582,7 +1582,7 @@ G1CollectorPolicy::record_concurrent_mark_cleanup_end() { _collectionSetChooser->clear(); - FlexibleWorkGang* workers = _g1->workers(); + WorkGang* workers = _g1->workers(); uint n_workers = workers->active_workers(); uint n_regions = _g1->num_regions(); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp --- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -26,8 +26,8 @@ #define SHARE_VM_GC_G1_G1COLLECTORPOLICY_HPP #include "gc/g1/collectionSetChooser.hpp" -#include "gc/g1/g1Allocator.hpp" #include "gc/g1/g1CollectorState.hpp" +#include "gc/g1/g1InCSetState.hpp" #include "gc/g1/g1MMUTracker.hpp" #include "gc/shared/collectorPolicy.hpp" diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1CollectorPolicy_ext.hpp --- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy_ext.hpp Thu Sep 03 14:24:41 2015 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef SHARE_VM_GC_G1_G1COLLECTORPOLICY_EXT_HPP -#define SHARE_VM_GC_G1_G1COLLECTORPOLICY_EXT_HPP - -#include "gc/g1/g1CollectorPolicy.hpp" - -class G1CollectorPolicyExt : public G1CollectorPolicy { }; - -#endif // SHARE_VM_GC_G1_G1COLLECTORPOLICY_EXT_HPP diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1EvacStats.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc/g1/g1EvacStats.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/g1/g1EvacStats.hpp" +#include "gc/shared/gcId.hpp" +#include "trace/tracing.hpp" + +void G1EvacStats::adjust_desired_plab_sz() { + if (PrintPLAB) { + gclog_or_tty->print(" (allocated = " SIZE_FORMAT " wasted = " SIZE_FORMAT " " + "unused = " SIZE_FORMAT " used = " SIZE_FORMAT " " + "undo_waste = " SIZE_FORMAT " region_end_waste = " SIZE_FORMAT " " + "regions filled = %u direct_allocated = " SIZE_FORMAT " " + "failure_used = " SIZE_FORMAT " failure_waste = " SIZE_FORMAT ") ", + _allocated, _wasted, _unused, used(), _undo_wasted, _region_end_waste, + _regions_filled, _direct_allocated, _failure_used, _failure_waste); + } + + if (ResizePLAB) { + + assert(is_object_aligned(max_size()) && min_size() <= max_size(), + "PLAB clipping computation may be incorrect"); + + if (_allocated == 0) { + assert((_unused == 0), + err_msg("Inconsistency in PLAB stats: " + "_allocated: "SIZE_FORMAT", " + "_wasted: "SIZE_FORMAT", " + "_region_end_waste: "SIZE_FORMAT", " + "_unused: "SIZE_FORMAT", " + "_used : "SIZE_FORMAT, + _allocated, _wasted, _region_end_waste, _unused, used())); + _allocated = 1; + } + // We account region end waste fully to PLAB allocation. This is not completely fair, + // but is a conservative assumption because PLABs may be sized flexibly while we + // cannot adjust direct allocations. + // In some cases, wasted_frac may become > 1 but that just reflects the problem + // with region_end_waste. + double wasted_frac = (double)(_unused + _wasted + _region_end_waste) / (double)_allocated; + size_t target_refills = (size_t)((wasted_frac * TargetSurvivorRatio) / TargetPLABWastePct); + if (target_refills == 0) { + target_refills = 1; + } + size_t cur_plab_sz = used() / target_refills; + // Take historical weighted average + _filter.sample(cur_plab_sz); + // Clip from above and below, and align to object boundary + size_t plab_sz; + plab_sz = MAX2(min_size(), (size_t)_filter.average()); + plab_sz = MIN2(max_size(), plab_sz); + plab_sz = align_object_size(plab_sz); + // Latch the result + _desired_net_plab_sz = plab_sz; + if (PrintPLAB) { + gclog_or_tty->print_cr(" (plab_sz = " SIZE_FORMAT " desired_plab_sz = " SIZE_FORMAT ") ", cur_plab_sz, plab_sz); + } + } + // Clear accumulators for next round. + reset(); +} + diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1EvacStats.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc/g1/g1EvacStats.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_gc_G1_G1EVACSTATS_HPP +#define SHARE_VM_gc_G1_G1EVACSTATS_HPP + +#include "gc/shared/plab.hpp" +#include "runtime/atomic.hpp" + +// Records various memory allocation statistics gathered during evacuation. +class G1EvacStats : public PLABStats { + private: + size_t _region_end_waste; // Number of words wasted due to skipping to the next region. + uint _regions_filled; // Number of regions filled completely. + size_t _direct_allocated; // Number of words allocated directly into the regions. + + // Number of words in live objects remaining in regions that ultimately suffered an + // evacuation failure. This is used in the regions when the regions are made old regions. + size_t _failure_used; + // Number of words wasted in regions which failed evacuation. This is the sum of space + // for objects successfully copied out of the regions (now dead space) plus waste at the + // end of regions. + size_t _failure_waste; + + virtual void reset() { + PLABStats::reset(); + _region_end_waste = 0; + _regions_filled = 0; + _direct_allocated = 0; + _failure_used = 0; + _failure_waste = 0; + } + + public: + G1EvacStats(size_t desired_plab_sz_, unsigned wt) : PLABStats(desired_plab_sz_, wt), + _region_end_waste(0), _regions_filled(0), _direct_allocated(0), + _failure_used(0), _failure_waste(0) { + } + + virtual void adjust_desired_plab_sz(); + + size_t allocated() const { return _allocated; } + size_t wasted() const { return _wasted; } + size_t unused() const { return _unused; } + size_t used() const { return allocated() - (wasted() + unused()); } + size_t undo_wasted() const { return _undo_wasted; } + + uint regions_filled() const { return _regions_filled; } + size_t region_end_waste() const { return _region_end_waste; } + size_t direct_allocated() const { return _direct_allocated; } + + // Amount of space in heapwords used in the failing regions when an evacuation failure happens. + size_t failure_used() const { return _failure_used; } + // Amount of space in heapwords wasted (unused) in the failing regions when an evacuation failure happens. + size_t failure_waste() const { return _failure_waste; } + + void add_direct_allocated(size_t value) { + Atomic::add_ptr(value, &_direct_allocated); + } + + void add_region_end_waste(size_t value) { + Atomic::add_ptr(value, &_region_end_waste); + Atomic::add_ptr(1, &_regions_filled); + } + + void add_failure_used_and_waste(size_t used, size_t waste) { + Atomic::add_ptr(used, &_failure_used); + Atomic::add_ptr(waste, &_failure_waste); + } +}; + +#endif // SHARE_VM_gc_G1_G1EVACSTATS_HPP diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1OopClosures.cpp --- a/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -32,7 +32,11 @@ G1ParCopyHelper::G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : G1ParClosureSuper(g1, par_scan_state), _scanned_klass(NULL), - _cm(_g1->concurrent_mark()) {} + _cm(_g1->concurrent_mark()) { } + +G1ParCopyHelper::G1ParCopyHelper(G1CollectedHeap* g1) : + G1ParClosureSuper(g1), _scanned_klass(NULL), + _cm(_g1->concurrent_mark()) { } G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1) : _g1(g1), _par_scan_state(NULL), _worker_id(UINT_MAX) { } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1OopClosures.hpp --- a/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -76,15 +76,13 @@ class G1ParScanClosure : public G1ParClosureSuper { public: - G1ParScanClosure(G1CollectedHeap* g1, ReferenceProcessor* rp) : - G1ParClosureSuper(g1) { - assert(_ref_processor == NULL, "sanity"); - _ref_processor = rp; - } + G1ParScanClosure(G1CollectedHeap* g1) : G1ParClosureSuper(g1) { } template void do_oop_nv(T* p); virtual void do_oop(oop* p) { do_oop_nv(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } + + void set_ref_processor(ReferenceProcessor* ref_processor) { _ref_processor = ref_processor; } }; // Add back base class for metadata @@ -104,6 +102,7 @@ void mark_forwarded_object(oop from_obj, oop to_obj); public: G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state); + G1ParCopyHelper(G1CollectedHeap* g1); void set_scanned_klass(Klass* k) { _scanned_klass = k; } template void do_klass_barrier(T* p, oop new_obj); @@ -132,6 +131,10 @@ assert(_ref_processor == NULL, "sanity"); } + G1ParCopyClosure(G1CollectedHeap* g1) : G1ParCopyHelper(g1) { + assert(_ref_processor == NULL, "sanity"); + } + template void do_oop_nv(T* p) { do_oop_work(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1ParScanThreadState.cpp --- a/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "gc/g1/g1Allocator.inline.hpp" #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1OopClosures.inline.hpp" #include "gc/g1/g1ParScanThreadState.inline.hpp" @@ -31,17 +32,19 @@ #include "oops/oop.inline.hpp" #include "runtime/prefetch.inline.hpp" -G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id, ReferenceProcessor* rp) +G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id) : _g1h(g1h), _refs(g1h->task_queue(worker_id)), _dcq(&g1h->dirty_card_queue_set()), _ct_bs(g1h->g1_barrier_set()), _g1_rem(g1h->g1_rem_set()), - _hash_seed(17), _worker_id(worker_id), - _term_attempts(0), + _hash_seed(17), + _worker_id(worker_id), _tenuring_threshold(g1h->g1_policy()->tenuring_threshold()), - _age_table(false), _scanner(g1h, rp), - _strong_roots_time(0), _term_time(0) { + _age_table(false), + _scanner(g1h), + _old_gen_is_full(false) +{ _scanner.set_par_scan_thread_state(this); // we allocate G1YoungSurvRateNumRegions plus one entries, since // we "sacrifice" entry 0 to keep track of surviving bytes for @@ -66,38 +69,20 @@ // need to be moved to the next space. _dest[InCSetState::Young] = InCSetState::Old; _dest[InCSetState::Old] = InCSetState::Old; - - _start = os::elapsedTime(); } G1ParScanThreadState::~G1ParScanThreadState() { - _plab_allocator->retire_alloc_buffers(); + // Update allocation statistics. + _plab_allocator->flush_and_retire_stats(); delete _plab_allocator; + _g1h->g1_policy()->record_thread_age_table(&_age_table); + // Update heap statistics. + _g1h->update_surviving_young_words(_surviving_young_words); FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base); } -void G1ParScanThreadState::print_termination_stats_hdr(outputStream* const st) { - st->print_raw_cr("GC Termination Stats"); - st->print_raw_cr(" elapsed --strong roots-- -------termination------- ------waste (KiB)------"); - st->print_raw_cr("thr ms ms % ms % attempts total alloc undo"); - st->print_raw_cr("--- --------- --------- ------ --------- ------ -------- ------- ------- -------"); -} - -void G1ParScanThreadState::print_termination_stats(outputStream* const st) const { - const double elapsed_ms = elapsed_time() * 1000.0; - const double s_roots_ms = strong_roots_time() * 1000.0; - const double term_ms = term_time() * 1000.0; - size_t alloc_buffer_waste = 0; - size_t undo_waste = 0; - _plab_allocator->waste(alloc_buffer_waste, undo_waste); - st->print_cr("%3u %9.2f %9.2f %6.2f " - "%9.2f %6.2f " SIZE_FORMAT_W(8) " " - SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7), - _worker_id, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms, - term_ms, term_ms * 100 / elapsed_ms, term_attempts(), - (alloc_buffer_waste + undo_waste) * HeapWordSize / K, - alloc_buffer_waste * HeapWordSize / K, - undo_waste * HeapWordSize / K); +void G1ParScanThreadState::waste(size_t& wasted, size_t& undo_wasted) { + _plab_allocator->waste(wasted, undo_wasted); } #ifdef ASSERT @@ -152,26 +137,38 @@ HeapWord* G1ParScanThreadState::allocate_in_next_plab(InCSetState const state, InCSetState* dest, size_t word_sz, - AllocationContext_t const context) { + AllocationContext_t const context, + bool previous_plab_refill_failed) { assert(state.is_in_cset_or_humongous(), err_msg("Unexpected state: " CSETSTATE_FORMAT, state.value())); assert(dest->is_in_cset_or_humongous(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value())); // Right now we only have two types of regions (young / old) so // let's keep the logic here simple. We can generalize it when necessary. if (dest->is_young()) { + bool plab_refill_in_old_failed = false; HeapWord* const obj_ptr = _plab_allocator->allocate(InCSetState::Old, word_sz, - context); - if (obj_ptr == NULL) { - return NULL; - } + context, + &plab_refill_in_old_failed); // Make sure that we won't attempt to copy any other objects out // of a survivor region (given that apparently we cannot allocate - // any new ones) to avoid coming into this slow path. - _tenuring_threshold = 0; - dest->set_old(); + // any new ones) to avoid coming into this slow path again and again. + // Only consider failed PLAB refill here: failed inline allocations are + // typically large, so not indicative of remaining space. + if (previous_plab_refill_failed) { + _tenuring_threshold = 0; + } + + if (obj_ptr != NULL) { + dest->set_old(); + } else { + // We just failed to allocate in old gen. The same idea as explained above + // for making survivor gen unavailable for allocation applies for old gen. + _old_gen_is_full = plab_refill_in_old_failed; + } return obj_ptr; } else { + _old_gen_is_full = previous_plab_refill_failed; assert(dest->is_old(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value())); // no other space to try. return NULL; @@ -202,14 +199,20 @@ uint age = 0; InCSetState dest_state = next_state(state, old_mark, age); + // The second clause is to prevent premature evacuation failure in case there + // is still space in survivor, but old gen is full. + if (_old_gen_is_full && dest_state.is_old()) { + return handle_evacuation_failure_par(old, old_mark); + } HeapWord* obj_ptr = _plab_allocator->plab_allocate(dest_state, word_sz, context); // PLAB allocations should succeed most of the time, so we'll // normally check against NULL once and that's it. if (obj_ptr == NULL) { - obj_ptr = _plab_allocator->allocate_direct_or_new_plab(dest_state, word_sz, context); + bool plab_refill_failed = false; + obj_ptr = _plab_allocator->allocate_direct_or_new_plab(dest_state, word_sz, context, &plab_refill_failed); if (obj_ptr == NULL) { - obj_ptr = allocate_in_next_plab(state, &dest_state, word_sz, context); + obj_ptr = allocate_in_next_plab(state, &dest_state, word_sz, context, plab_refill_failed); if (obj_ptr == NULL) { // This will either forward-to-self, or detect that someone else has // installed a forwarding pointer. @@ -253,7 +256,7 @@ } else { obj->set_mark(old_mark->set_age(age)); } - age_table()->add(age, word_sz); + _age_table.add(age, word_sz); } else { obj->set_mark(old_mark); } @@ -271,8 +274,7 @@ obj); } - size_t* const surv_young_words = surviving_young_words(); - surv_young_words[young_index] += word_sz; + _surviving_young_words[young_index] += word_sz; if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) { // We keep track of the next start index in the length field of diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp --- a/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -35,16 +35,17 @@ #include "memory/allocation.hpp" #include "oops/oop.hpp" +class G1PLABAllocator; class HeapRegion; class outputStream; -class G1ParScanThreadState : public StackObj { +class G1ParScanThreadState : public CHeapObj { private: G1CollectedHeap* _g1h; RefToScanQueue* _refs; DirtyCardQueue _dcq; G1SATBCardTableModRefBS* _ct_bs; - G1RemSet* _g1_rem; + G1RemSet* _g1_rem; G1PLABAllocator* _plab_allocator; @@ -57,20 +58,16 @@ int _hash_seed; uint _worker_id; - size_t _term_attempts; - - double _start; - double _start_strong_roots; - double _strong_roots_time; - double _start_term; - double _term_time; - // Map from young-age-index (0 == not young, 1 is youngest) to // surviving words. base is what we get back from the malloc call size_t* _surviving_young_words_base; // this points into the array, as we use the first few entries for padding size_t* _surviving_young_words; + // Indicates whether in the last generation (old) there is no more space + // available for allocation. + bool _old_gen_is_full; + #define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t)) DirtyCardQueue& dirty_card_queue() { return _dcq; } @@ -85,10 +82,10 @@ } public: - G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id, ReferenceProcessor* rp); + G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id); ~G1ParScanThreadState(); - ageTable* age_table() { return &_age_table; } + void set_ref_processor(ReferenceProcessor* rp) { _scanner.set_ref_processor(rp); } #ifdef ASSERT bool queue_is_empty() const { return _refs->is_empty(); } @@ -114,40 +111,14 @@ uint worker_id() { return _worker_id; } - size_t term_attempts() const { return _term_attempts; } - void note_term_attempt() { _term_attempts++; } - - void start_strong_roots() { - _start_strong_roots = os::elapsedTime(); - } - void end_strong_roots() { - _strong_roots_time += (os::elapsedTime() - _start_strong_roots); - } - double strong_roots_time() const { return _strong_roots_time; } - - void start_term_time() { - note_term_attempt(); - _start_term = os::elapsedTime(); - } - void end_term_time() { - _term_time += (os::elapsedTime() - _start_term); - } - double term_time() const { return _term_time; } - - double elapsed_time() const { - return os::elapsedTime() - _start; - } - - // Print the header for the per-thread termination statistics. - static void print_termination_stats_hdr(outputStream* const st = gclog_or_tty); - - // Print actual per-thread termination statistics. - void print_termination_stats(outputStream* const st = gclog_or_tty) const; + // Returns the current amount of waste due to alignment or not being able to fit + // objects within LABs and the undo waste. + virtual void waste(size_t& wasted, size_t& undo_wasted); size_t* surviving_young_words() { - // We add on to hide entry 0 which accumulates surviving words for + // We add one to hide entry 0 which accumulates surviving words for // age -1 regions (i.e. non-young ones) - return _surviving_young_words; + return _surviving_young_words + 1; } private: @@ -190,12 +161,16 @@ // Tries to allocate word_sz in the PLAB of the next "generation" after trying to // allocate into dest. State is the original (source) cset state for the object - // that is allocated for. + // that is allocated for. Previous_plab_refill_failed indicates whether previously + // a PLAB refill into "state" failed. // Returns a non-NULL pointer if successful, and updates dest if required. + // Also determines whether we should continue to try to allocate into the various + // generations or just end trying to allocate. HeapWord* allocate_in_next_plab(InCSetState const state, InCSetState* dest, size_t word_sz, - AllocationContext_t const context); + AllocationContext_t const context, + bool previous_plab_refill_failed); inline InCSetState next_state(InCSetState const state, markOop const m, uint& age); public: diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp --- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -115,7 +115,7 @@ G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h, uint n_workers) : _g1h(g1h), - _process_strong_tasks(new SubTasksDone(G1RP_PS_NumElements)), + _process_strong_tasks(G1RP_PS_NumElements), _srs(n_workers), _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false, Monitor::_safepoint_check_never), _n_workers_discovered_strong_classes(0) {} @@ -158,7 +158,7 @@ { // Now the CM ref_processor roots. G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CMRefRoots, worker_i); - if (!_process_strong_tasks->is_task_claimed(G1RP_PS_refProcessor_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_refProcessor_oops_do)) { // We need to treat the discovered reference lists of the // concurrent mark ref processor as roots and keep entries // (which are added by the marking threads) on them live @@ -201,12 +201,12 @@ // as implicitly live). { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SATBFiltering, worker_i); - if (!_process_strong_tasks->is_task_claimed(G1RP_PS_filter_satb_buffers) && _g1h->collector_state()->mark_in_progress()) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_filter_satb_buffers) && _g1h->collector_state()->mark_in_progress()) { JavaThread::satb_mark_queue_set().filter_thread_buffers(); } } - _process_strong_tasks->all_tasks_completed(n_workers()); + _process_strong_tasks.all_tasks_completed(n_workers()); } void G1RootProcessor::process_strong_roots(OopClosure* oops, @@ -216,7 +216,7 @@ process_java_roots(oops, clds, clds, NULL, blobs, NULL, 0); process_vm_roots(oops, NULL, NULL, 0); - _process_strong_tasks->all_tasks_completed(n_workers()); + _process_strong_tasks.all_tasks_completed(n_workers()); } void G1RootProcessor::process_all_roots(OopClosure* oops, @@ -226,11 +226,11 @@ process_java_roots(oops, NULL, clds, clds, NULL, NULL, 0); process_vm_roots(oops, oops, NULL, 0); - if (!_process_strong_tasks->is_task_claimed(G1RP_PS_CodeCache_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_CodeCache_oops_do)) { CodeCache::blobs_do(blobs); } - _process_strong_tasks->all_tasks_completed(n_workers()); + _process_strong_tasks.all_tasks_completed(n_workers()); } void G1RootProcessor::process_java_roots(OopClosure* strong_roots, @@ -246,7 +246,7 @@ // let the thread process the weak CLDs and nmethods. { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CLDGRoots, worker_i); - if (!_process_strong_tasks->is_task_claimed(G1RP_PS_ClassLoaderDataGraph_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_ClassLoaderDataGraph_oops_do)) { ClassLoaderDataGraph::roots_cld_do(strong_clds, weak_clds); } } @@ -264,49 +264,49 @@ uint worker_i) { { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::UniverseRoots, worker_i); - if (!_process_strong_tasks->is_task_claimed(G1RP_PS_Universe_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_Universe_oops_do)) { Universe::oops_do(strong_roots); } } { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::JNIRoots, worker_i); - if (!_process_strong_tasks->is_task_claimed(G1RP_PS_JNIHandles_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_JNIHandles_oops_do)) { JNIHandles::oops_do(strong_roots); } } { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ObjectSynchronizerRoots, worker_i); - if (!_process_strong_tasks-> is_task_claimed(G1RP_PS_ObjectSynchronizer_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_ObjectSynchronizer_oops_do)) { ObjectSynchronizer::oops_do(strong_roots); } } { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::FlatProfilerRoots, worker_i); - if (!_process_strong_tasks->is_task_claimed(G1RP_PS_FlatProfiler_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_FlatProfiler_oops_do)) { FlatProfiler::oops_do(strong_roots); } } { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ManagementRoots, worker_i); - if (!_process_strong_tasks->is_task_claimed(G1RP_PS_Management_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_Management_oops_do)) { Management::oops_do(strong_roots); } } { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::JVMTIRoots, worker_i); - if (!_process_strong_tasks->is_task_claimed(G1RP_PS_jvmti_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_jvmti_oops_do)) { JvmtiExport::oops_do(strong_roots); } } { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SystemDictionaryRoots, worker_i); - if (!_process_strong_tasks->is_task_claimed(G1RP_PS_SystemDictionary_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(G1RP_PS_SystemDictionary_oops_do)) { SystemDictionary::roots_oops_do(strong_roots, weak_roots); } } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp --- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -45,7 +45,7 @@ // worker thread call the process_roots methods. class G1RootProcessor : public StackObj { G1CollectedHeap* _g1h; - SubTasksDone* _process_strong_tasks; + SubTasksDone _process_strong_tasks; StrongRootsScope _srs; // Used to implement the Thread work barrier. diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/heapRegion.hpp --- a/hotspot/src/share/vm/gc/g1/heapRegion.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/heapRegion.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -109,7 +109,7 @@ // evacuation pauses between two cleanups, which is _highly_ unlikely. class G1OffsetTableContigSpace: public CompactibleSpace { friend class VMStructs; - HeapWord* _top; + HeapWord* volatile _top; HeapWord* volatile _scan_top; protected: G1BlockOffsetArrayContigSpace _offsets; @@ -134,10 +134,18 @@ // Reset the G1OffsetTableContigSpace. virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space); - HeapWord** top_addr() { return &_top; } - // Allocation helpers (return NULL if full). - inline HeapWord* allocate_impl(size_t word_size, HeapWord* end_value); - inline HeapWord* par_allocate_impl(size_t word_size, HeapWord* end_value); + HeapWord* volatile* top_addr() { return &_top; } + // Try to allocate at least min_word_size and up to desired_size from this Space. + // Returns NULL if not possible, otherwise sets actual_word_size to the amount of + // space allocated. + // This version assumes that all allocation requests to this Space are properly + // synchronized. + inline HeapWord* allocate_impl(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size); + // Try to allocate at least min_word_size and up to desired_size from this Space. + // Returns NULL if not possible, otherwise sets actual_word_size to the amount of + // space allocated. + // This version synchronizes with other calls to par_allocate_impl(). + inline HeapWord* par_allocate_impl(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size); public: void reset_after_compaction() { set_top(compaction_top()); } @@ -179,9 +187,14 @@ HeapWord* block_start(const void* p); HeapWord* block_start_const(const void* p) const; - // Add offset table update. + // Allocation (return NULL if full). Assumes the caller has established + // mutually exclusive access to the space. + HeapWord* allocate(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size); + // Allocation (return NULL if full). Enforces mutual exclusion internally. + HeapWord* par_allocate(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size); + virtual HeapWord* allocate(size_t word_size); - HeapWord* par_allocate(size_t word_size); + virtual HeapWord* par_allocate(size_t word_size); HeapWord* saved_mark_word() const { ShouldNotReachHere(); return NULL; } @@ -351,8 +364,9 @@ // Override for scan_and_forward support. void prepare_for_compaction(CompactPoint* cp); - inline HeapWord* par_allocate_no_bot_updates(size_t word_size); + inline HeapWord* par_allocate_no_bot_updates(size_t min_word_size, size_t desired_word_size, size_t* word_size); inline HeapWord* allocate_no_bot_updates(size_t word_size); + inline HeapWord* allocate_no_bot_updates(size_t min_word_size, size_t desired_word_size, size_t* actual_size); // If this region is a member of a HeapRegionManager, the index in that // sequence, otherwise -1. diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/heapRegion.inline.hpp --- a/hotspot/src/share/vm/gc/g1/heapRegion.inline.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/heapRegion.inline.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -32,33 +32,39 @@ #include "oops/oop.inline.hpp" #include "runtime/atomic.inline.hpp" -// This version requires locking. -inline HeapWord* G1OffsetTableContigSpace::allocate_impl(size_t size, - HeapWord* const end_value) { +inline HeapWord* G1OffsetTableContigSpace::allocate_impl(size_t min_word_size, + size_t desired_word_size, + size_t* actual_size) { HeapWord* obj = top(); - if (pointer_delta(end_value, obj) >= size) { - HeapWord* new_top = obj + size; + size_t available = pointer_delta(end(), obj); + size_t want_to_allocate = MIN2(available, desired_word_size); + if (want_to_allocate >= min_word_size) { + HeapWord* new_top = obj + want_to_allocate; set_top(new_top); assert(is_aligned(obj) && is_aligned(new_top), "checking alignment"); + *actual_size = want_to_allocate; return obj; } else { return NULL; } } -// This version is lock-free. -inline HeapWord* G1OffsetTableContigSpace::par_allocate_impl(size_t size, - HeapWord* const end_value) { +inline HeapWord* G1OffsetTableContigSpace::par_allocate_impl(size_t min_word_size, + size_t desired_word_size, + size_t* actual_size) { do { HeapWord* obj = top(); - if (pointer_delta(end_value, obj) >= size) { - HeapWord* new_top = obj + size; + size_t available = pointer_delta(end(), obj); + size_t want_to_allocate = MIN2(available, desired_word_size); + if (want_to_allocate >= min_word_size) { + HeapWord* new_top = obj + want_to_allocate; HeapWord* result = (HeapWord*)Atomic::cmpxchg_ptr(new_top, top_addr(), obj); // result can be one of two: // the old top value: the exchange succeeded // otherwise: the new value of the top is returned. if (result == obj) { assert(is_aligned(obj) && is_aligned(new_top), "checking alignment"); + *actual_size = want_to_allocate; return obj; } } else { @@ -67,20 +73,34 @@ } while (true); } -inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) { - HeapWord* res = allocate_impl(size, end()); +inline HeapWord* G1OffsetTableContigSpace::allocate(size_t min_word_size, + size_t desired_word_size, + size_t* actual_size) { + HeapWord* res = allocate_impl(min_word_size, desired_word_size, actual_size); if (res != NULL) { - _offsets.alloc_block(res, size); + _offsets.alloc_block(res, *actual_size); } return res; } +inline HeapWord* G1OffsetTableContigSpace::allocate(size_t word_size) { + size_t temp; + return allocate(word_size, word_size, &temp); +} + +inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t word_size) { + size_t temp; + return par_allocate(word_size, word_size, &temp); +} + // Because of the requirement of keeping "_offsets" up to date with the // allocations, we sequentialize these with a lock. Therefore, best if // this is used for larger LAB allocations only. -inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) { +inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t min_word_size, + size_t desired_word_size, + size_t* actual_size) { MutexLocker x(&_par_alloc_lock); - return allocate(size); + return allocate(min_word_size, desired_word_size, actual_size); } inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) { @@ -128,14 +148,23 @@ return pointer_delta(next, addr); } -inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t word_size) { +inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size) { assert(is_young(), "we can only skip BOT updates on young regions"); - return par_allocate_impl(word_size, end()); + return par_allocate_impl(min_word_size, desired_word_size, actual_word_size); } inline HeapWord* HeapRegion::allocate_no_bot_updates(size_t word_size) { + size_t temp; + return allocate_no_bot_updates(word_size, word_size, &temp); +} + +inline HeapWord* HeapRegion::allocate_no_bot_updates(size_t min_word_size, + size_t desired_word_size, + size_t* actual_word_size) { assert(is_young(), "we can only skip BOT updates on young regions"); - return allocate_impl(word_size, end()); + return allocate_impl(min_word_size, desired_word_size, actual_word_size); } inline void HeapRegion::note_start_of_marking() { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/heapRegionManager.cpp --- a/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -428,7 +428,7 @@ uncommit_regions(idx_last_found + num_last_found - to_remove, to_remove); - cur -= num_last_found; + cur = idx_last_found; removed += to_remove; } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/g1/vmStructs_g1.hpp --- a/hotspot/src/share/vm/gc/g1/vmStructs_g1.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/g1/vmStructs_g1.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -34,7 +34,7 @@ static_field(HeapRegion, GrainBytes, size_t) \ static_field(HeapRegion, LogOfHRGrainBytes, int) \ \ - nonstatic_field(G1OffsetTableContigSpace, _top, HeapWord*) \ + nonstatic_field(G1OffsetTableContigSpace, _top, HeapWord* volatile) \ \ nonstatic_field(G1HeapRegionTable, _base, address) \ nonstatic_field(G1HeapRegionTable, _length, size_t) \ diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/collectorPolicy.cpp --- a/hotspot/src/share/vm/gc/shared/collectorPolicy.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/collectorPolicy.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -225,6 +225,10 @@ return align_size_up(3 * _space_alignment, _gen_alignment); } +size_t GenCollectorPolicy::old_gen_size_lower_bound() { + return align_size_up(_space_alignment, _gen_alignment); +} + #ifdef ASSERT void GenCollectorPolicy::assert_flags() { CollectorPolicy::assert_flags(); @@ -284,7 +288,7 @@ // Make sure the heap is large enough for two generations size_t smallest_new_size = young_gen_size_lower_bound(); - size_t smallest_heap_size = align_size_up(smallest_new_size + align_size_up(_space_alignment, _gen_alignment), + size_t smallest_heap_size = align_size_up(smallest_new_size + old_gen_size_lower_bound(), _heap_alignment); if (MaxHeapSize < smallest_heap_size) { FLAG_SET_ERGO(size_t, MaxHeapSize, smallest_heap_size); @@ -356,6 +360,7 @@ vm_exit_during_initialization("Invalid young gen ratio specified"); } + OldSize = MAX2(OldSize, old_gen_size_lower_bound()); if (!is_size_aligned(OldSize, _gen_alignment)) { // Setting OldSize directly to preserve information about the possible // setting of OldSize on the command line. diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/collectorPolicy.hpp --- a/hotspot/src/share/vm/gc/shared/collectorPolicy.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/collectorPolicy.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -282,6 +282,8 @@ size_t young_gen_size_lower_bound(); + size_t old_gen_size_lower_bound(); + HeapWord* mem_allocate_work(size_t size, bool is_tlab, bool* gc_overhead_limit_was_exceeded); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/gcHeapSummary.hpp --- a/hotspot/src/share/vm/gc/shared/gcHeapSummary.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/gcHeapSummary.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -189,4 +189,44 @@ }; +class G1EvacSummary : public StackObj { +private: + size_t _allocated; // Total allocated + size_t _wasted; // of which wasted (internal fragmentation) + size_t _undo_wasted; // of which wasted on undo (is not used for calculation of PLAB size) + size_t _unused; // Unused in last buffer + size_t _used; + + size_t _region_end_waste; // Number of words wasted due to skipping to the next region. + uint _regions_filled; // Number of regions filled completely. + size_t _direct_allocated; // Number of words allocated directly into the regions. + + // Number of words in live objects remaining in regions that ultimately suffered an + // evacuation failure. This is used in the regions when the regions are made old regions. + size_t _failure_used; + // Number of words wasted in regions which failed evacuation. This is the sum of space + // for objects successfully copied out of the regions (now dead space) plus waste at the + // end of regions. + size_t _failure_waste; +public: + G1EvacSummary(size_t allocated, size_t wasted, size_t undo_wasted, size_t unused, + size_t used, size_t region_end_waste, uint regions_filled, size_t direct_allocated, + size_t failure_used, size_t failure_waste) : + _allocated(allocated), _wasted(wasted), _undo_wasted(undo_wasted), _unused(unused), + _used(used), _region_end_waste(region_end_waste), _regions_filled(regions_filled), + _direct_allocated(direct_allocated), _failure_used(failure_used), _failure_waste(failure_waste) + { } + + size_t allocated() const { return _allocated; } + size_t wasted() const { return _wasted; } + size_t undo_wasted() const { return _undo_wasted; } + size_t unused() const { return _unused; } + size_t used() const { return _used; } + size_t region_end_waste() const { return _region_end_waste; } + uint regions_filled() const { return _regions_filled; } + size_t direct_allocated() const { return _direct_allocated; } + size_t failure_used() const { return _failure_used; } + size_t failure_waste() const { return _failure_waste; } +}; + #endif // SHARE_VM_GC_SHARED_GCHEAPSUMMARY_HPP diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/gcTrace.cpp --- a/hotspot/src/share/vm/gc/shared/gcTrace.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/gcTrace.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -252,4 +252,12 @@ send_evacuation_failed_event(ef_info); ef_info.reset(); } + +void G1NewTracer::report_evacuation_statistics(const G1EvacSummary& young_summary, const G1EvacSummary& old_summary) const { + assert_set_gc_id(); + + send_young_evacuation_statistics(young_summary); + send_old_evacuation_statistics(old_summary); +} + #endif diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/gcTrace.hpp --- a/hotspot/src/share/vm/gc/shared/gcTrace.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/gcTrace.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -45,6 +45,7 @@ class MetaspaceSummary; class PSHeapSummary; class G1HeapSummary; +class G1EvacSummary; class ReferenceProcessorStats; class TimePartitions; class BoolObjectClosure; @@ -257,10 +258,14 @@ void report_evacuation_info(EvacuationInfo* info); void report_evacuation_failed(EvacuationFailedInfo& ef_info); + void report_evacuation_statistics(const G1EvacSummary& young_summary, const G1EvacSummary& old_summary) const; private: void send_g1_young_gc_event(); void send_evacuation_info_event(EvacuationInfo* info); void send_evacuation_failed_event(const EvacuationFailedInfo& ef_info) const; + + void send_young_evacuation_statistics(const G1EvacSummary& summary) const; + void send_old_evacuation_statistics(const G1EvacSummary& summary) const; }; #endif diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/gcTraceSend.cpp --- a/hotspot/src/share/vm/gc/shared/gcTraceSend.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/gcTraceSend.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -234,6 +234,37 @@ e.commit(); } } + +static TraceStructG1EvacStats create_g1_evacstats(unsigned gcid, const G1EvacSummary& summary) { + TraceStructG1EvacStats s; + s.set_gcId(gcid); + s.set_allocated(summary.allocated() * HeapWordSize); + s.set_wasted(summary.wasted() * HeapWordSize); + s.set_used(summary.used() * HeapWordSize); + s.set_undoWaste(summary.undo_wasted() * HeapWordSize); + s.set_regionEndWaste(summary.region_end_waste() * HeapWordSize); + s.set_regionsRefilled(summary.regions_filled()); + s.set_directAllocated(summary.direct_allocated() * HeapWordSize); + s.set_failureUsed(summary.failure_used() * HeapWordSize); + s.set_failureWaste(summary.failure_waste() * HeapWordSize); + return s; +} + +void G1NewTracer::send_young_evacuation_statistics(const G1EvacSummary& summary) const { + EventGCG1EvacuationYoungStatistics surv_evt; + if (surv_evt.should_commit()) { + surv_evt.set_stats(create_g1_evacstats(_shared_gc_info.gc_id().id(), summary)); + surv_evt.commit(); + } +} + +void G1NewTracer::send_old_evacuation_statistics(const G1EvacSummary& summary) const { + EventGCG1EvacuationOldStatistics old_evt; + if (old_evt.should_commit()) { + old_evt.set_stats(create_g1_evacstats(_shared_gc_info.gc_id().id(), summary)); + old_evt.commit(); + } +} #endif static TraceStructVirtualSpace to_trace_struct(const VirtualSpaceSummary& summary) { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp --- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -86,7 +86,7 @@ { assert(policy != NULL, "Sanity check"); if (UseConcMarkSweepGC) { - _workers = new FlexibleWorkGang("GC Thread", ParallelGCThreads, + _workers = new WorkGang("GC Thread", ParallelGCThreads, /* are_GC_task_threads */true, /* are_ConcurrentGC_threads */false); _workers->initialize_workers(); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp --- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -30,9 +30,9 @@ #include "gc/shared/collectorPolicy.hpp" #include "gc/shared/generation.hpp" -class FlexibleWorkGang; class StrongRootsScope; class SubTasksDone; +class WorkGang; // A "GenCollectedHeap" is a CollectedHeap that uses generational // collection. It has two generations, young and old. @@ -90,7 +90,7 @@ // In block contents verification, the number of header words to skip NOT_PRODUCT(static size_t _skip_header_HeapWords;) - FlexibleWorkGang* _workers; + WorkGang* _workers; protected: // Helper functions for allocation @@ -124,7 +124,7 @@ public: GenCollectedHeap(GenCollectorPolicy *policy); - FlexibleWorkGang* workers() const { return _workers; } + WorkGang* workers() const { return _workers; } GCStats* gc_stats(Generation* generation) const; diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/plab.cpp --- a/hotspot/src/share/vm/gc/shared/plab.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/plab.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -24,7 +24,7 @@ #include "precompiled.hpp" #include "gc/shared/collectedHeap.hpp" -#include "gc/shared/plab.hpp" +#include "gc/shared/plab.inline.hpp" #include "gc/shared/threadLocalAllocBuffer.hpp" #include "oops/arrayOop.hpp" #include "oops/oop.inline.hpp" diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/plab.hpp --- a/hotspot/src/share/vm/gc/shared/plab.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/plab.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -27,7 +27,6 @@ #include "gc/shared/gcUtil.hpp" #include "memory/allocation.hpp" -#include "runtime/atomic.hpp" #include "utilities/globalDefinitions.hpp" // Forward declarations. @@ -75,6 +74,8 @@ PLAB(size_t word_sz); virtual ~PLAB() {} + static size_t size_required_for_allocation(size_t word_size) { return word_size + AlignmentReserve; } + // Minimum PLAB size. static size_t min_size(); // Maximum PLAB size. @@ -95,7 +96,7 @@ } // Allocate the object aligned to "alignment_in_bytes". - HeapWord* allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes); + inline HeapWord* allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes); // Undo any allocation in the buffer, which is required to be of the // "obj" of the given "word_sz". @@ -108,13 +109,6 @@ size_t waste() { return _wasted; } size_t undo_waste() { return _undo_wasted; } - // Should only be done if we are about to reset with a new buffer of the - // given size. - void set_word_size(size_t new_word_sz) { - assert(new_word_sz > AlignmentReserve, "Too small"); - _word_sz = new_word_sz; - } - // The number of words of unallocated space remaining in the buffer. size_t words_remaining() { assert(_end >= _top, "Negative buffer"); @@ -126,7 +120,10 @@ } // Sets the space of the buffer to be [buf, space+word_sz()). - virtual void set_buf(HeapWord* buf) { + virtual void set_buf(HeapWord* buf, size_t new_word_sz) { + assert(new_word_sz > AlignmentReserve, "Too small"); + _word_sz = new_word_sz; + _bottom = buf; _top = _bottom; _hard_end = _bottom + word_sz(); @@ -149,7 +146,8 @@ }; // PLAB book-keeping. -class PLABStats VALUE_OBJ_CLASS_SPEC { +class PLABStats : public CHeapObj { + protected: size_t _allocated; // Total allocated size_t _wasted; // of which wasted (internal fragmentation) size_t _undo_wasted; // of which wasted on undo (is not used for calculation of PLAB size) @@ -158,7 +156,7 @@ AdaptiveWeightedAverage _filter; // Integrator with decay - void reset() { + virtual void reset() { _allocated = 0; _wasted = 0; _undo_wasted = 0; @@ -174,6 +172,8 @@ _filter(wt) { } + virtual ~PLABStats() { } + static const size_t min_size() { return PLAB::min_size(); } @@ -187,23 +187,15 @@ // Updates the current desired PLAB size. Computes the new desired PLAB size with one gc worker thread, // updates _desired_plab_sz and clears sensor accumulators. - void adjust_desired_plab_sz(); + virtual void adjust_desired_plab_sz(); - void add_allocated(size_t v) { - Atomic::add_ptr(v, &_allocated); - } + inline void add_allocated(size_t v); - void add_unused(size_t v) { - Atomic::add_ptr(v, &_unused); - } + inline void add_unused(size_t v); - void add_wasted(size_t v) { - Atomic::add_ptr(v, &_wasted); - } + inline void add_wasted(size_t v); - void add_undo_wasted(size_t v) { - Atomic::add_ptr(v, &_undo_wasted); - } + inline void add_undo_wasted(size_t v); }; #endif // SHARE_VM_GC_SHARED_PLAB_HPP diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/plab.inline.hpp --- a/hotspot/src/share/vm/gc/shared/plab.inline.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/plab.inline.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -27,9 +27,10 @@ #include "gc/shared/collectedHeap.inline.hpp" #include "gc/shared/plab.hpp" +#include "memory/allocation.inline.hpp" +#include "runtime/atomic.inline.hpp" -HeapWord* PLAB::allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes) { - +inline HeapWord* PLAB::allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes) { HeapWord* res = CollectedHeap::align_allocation_or_fail(_top, _end, alignment_in_bytes); if (res == NULL) { return NULL; @@ -41,4 +42,20 @@ return allocate(word_sz); } +void PLABStats::add_allocated(size_t v) { + Atomic::add_ptr(v, &_allocated); +} + +void PLABStats::add_unused(size_t v) { + Atomic::add_ptr(v, &_unused); +} + +void PLABStats::add_wasted(size_t v) { + Atomic::add_ptr(v, &_wasted); +} + +void PLABStats::add_undo_wasted(size_t v) { + Atomic::add_ptr(v, &_undo_wasted); +} + #endif // SHARE_VM_GC_SHARED_PLAB_INLINE_HPP diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/workgroup.cpp --- a/hotspot/src/share/vm/gc/shared/workgroup.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/workgroup.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -28,58 +28,25 @@ #include "memory/allocation.inline.hpp" #include "runtime/atomic.inline.hpp" #include "runtime/os.hpp" +#include "runtime/semaphore.hpp" +#include "runtime/thread.inline.hpp" // Definitions of WorkGang methods. -AbstractWorkGang::AbstractWorkGang(const char* name, - bool are_GC_task_threads, - bool are_ConcurrentGC_threads) : - _name(name), - _are_GC_task_threads(are_GC_task_threads), - _are_ConcurrentGC_threads(are_ConcurrentGC_threads) { - - assert(!(are_GC_task_threads && are_ConcurrentGC_threads), - "They cannot both be STW GC and Concurrent threads" ); - - // Other initialization. - _monitor = new Monitor(/* priority */ Mutex::leaf, - /* name */ "WorkGroup monitor", - /* allow_vm_block */ are_GC_task_threads, - Monitor::_safepoint_check_sometimes); - assert(monitor() != NULL, "Failed to allocate monitor"); - _task = NULL; - _sequence_number = 0; - _started_workers = 0; - _finished_workers = 0; -} - -WorkGang::WorkGang(const char* name, - uint workers, - bool are_GC_task_threads, - bool are_ConcurrentGC_threads) : - AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) { - _total_workers = workers; -} - -GangWorker* WorkGang::allocate_worker(uint which) { - GangWorker* new_worker = new GangWorker(this, which); - return new_worker; -} - // The current implementation will exit if the allocation // of any worker fails. Still, return a boolean so that // a future implementation can possibly do a partial // initialization of the workers and report such to the // caller. -bool WorkGang::initialize_workers() { +bool AbstractWorkGang::initialize_workers() { if (TraceWorkGang) { tty->print_cr("Constructing work gang %s with %d threads", name(), total_workers()); } - _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, total_workers(), mtInternal); - if (gang_workers() == NULL) { + _workers = NEW_C_HEAP_ARRAY(AbstractGangWorker*, total_workers(), mtInternal); + if (_workers == NULL) { vm_exit_out_of_memory(0, OOM_MALLOC_ERROR, "Cannot create GangWorker array."); return false; } @@ -90,9 +57,9 @@ worker_type = os::pgc_thread; } for (uint worker = 0; worker < total_workers(); worker += 1) { - GangWorker* new_worker = allocate_worker(worker); + AbstractGangWorker* new_worker = allocate_worker(worker); assert(new_worker != NULL, "Failed to allocate GangWorker"); - _gang_workers[worker] = new_worker; + _workers[worker] = new_worker; if (new_worker == NULL || !os::create_thread(new_worker, worker_type)) { vm_exit_out_of_memory(0, OOM_MALLOC_ERROR, "Cannot create worker GC thread. Out of system resources."); @@ -105,110 +72,208 @@ return true; } -GangWorker* AbstractWorkGang::gang_worker(uint i) const { +AbstractGangWorker* AbstractWorkGang::worker(uint i) const { // Array index bounds checking. - GangWorker* result = NULL; - assert(gang_workers() != NULL, "No workers for indexing"); + AbstractGangWorker* result = NULL; + assert(_workers != NULL, "No workers for indexing"); assert(i < total_workers(), "Worker index out of bounds"); - result = _gang_workers[i]; + result = _workers[i]; assert(result != NULL, "Indexing to null worker"); return result; } -void WorkGang::run_task(AbstractGangTask* task) { - run_task(task, total_workers()); -} - -void WorkGang::run_task(AbstractGangTask* task, uint no_of_parallel_workers) { - // This thread is executed by the VM thread which does not block - // on ordinary MutexLocker's. - MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); - if (TraceWorkGang) { - tty->print_cr("Running work gang %s task %s", name(), task->name()); - } - // Tell all the workers to run a task. - assert(task != NULL, "Running a null task"); - // Initialize. - _task = task; - _sequence_number += 1; - _started_workers = 0; - _finished_workers = 0; - // Tell the workers to get to work. - monitor()->notify_all(); - // Wait for them to be finished - while (finished_workers() < no_of_parallel_workers) { - if (TraceWorkGang) { - tty->print_cr("Waiting in work gang %s: %u/%u finished sequence %d", - name(), finished_workers(), no_of_parallel_workers, - _sequence_number); - } - monitor()->wait(/* no_safepoint_check */ true); - } - _task = NULL; - if (TraceWorkGang) { - tty->print_cr("\nFinished work gang %s: %u/%u sequence %d", - name(), finished_workers(), no_of_parallel_workers, - _sequence_number); - Thread* me = Thread::current(); - tty->print_cr(" T: " PTR_FORMAT " VM_thread: %d", p2i(me), me->is_VM_thread()); - } -} - -void FlexibleWorkGang::run_task(AbstractGangTask* task) { - // If active_workers() is passed, _finished_workers - // must only be incremented for workers that find non_null - // work (as opposed to all those that just check that the - // task is not null). - WorkGang::run_task(task, (uint) active_workers()); -} - -void AbstractWorkGang::internal_worker_poll(WorkData* data) const { - assert(monitor()->owned_by_self(), "worker_poll is an internal method"); - assert(data != NULL, "worker data is null"); - data->set_task(task()); - data->set_sequence_number(sequence_number()); -} - -void AbstractWorkGang::internal_note_start() { - assert(monitor()->owned_by_self(), "note_finish is an internal method"); - _started_workers += 1; -} - -void AbstractWorkGang::internal_note_finish() { - assert(monitor()->owned_by_self(), "note_finish is an internal method"); - _finished_workers += 1; -} - void AbstractWorkGang::print_worker_threads_on(outputStream* st) const { - uint num_thr = total_workers(); - for (uint i = 0; i < num_thr; i++) { - gang_worker(i)->print_on(st); + uint workers = total_workers(); + for (uint i = 0; i < workers; i++) { + worker(i)->print_on(st); st->cr(); } } void AbstractWorkGang::threads_do(ThreadClosure* tc) const { assert(tc != NULL, "Null ThreadClosure"); - uint num_thr = total_workers(); - for (uint i = 0; i < num_thr; i++) { - tc->do_thread(gang_worker(i)); + uint workers = total_workers(); + for (uint i = 0; i < workers; i++) { + tc->do_thread(worker(i)); } } -// GangWorker methods. +// WorkGang dispatcher implemented with semaphores. +// +// Semaphores don't require the worker threads to re-claim the lock when they wake up. +// This helps lowering the latency when starting and stopping the worker threads. +class SemaphoreGangTaskDispatcher : public GangTaskDispatcher { + // The task currently being dispatched to the GangWorkers. + AbstractGangTask* _task; + + volatile uint _started; + volatile uint _not_finished; + + // Semaphore used to start the GangWorkers. + Semaphore* _start_semaphore; + // Semaphore used to notify the coordinator that all workers are done. + Semaphore* _end_semaphore; + +public: + SemaphoreGangTaskDispatcher() : + _task(NULL), + _started(0), + _not_finished(0), + _start_semaphore(new Semaphore()), + _end_semaphore(new Semaphore()) +{ } + + ~SemaphoreGangTaskDispatcher() { + delete _start_semaphore; + delete _end_semaphore; + } + + void coordinator_execute_on_workers(AbstractGangTask* task, uint num_workers) { + // No workers are allowed to read the state variables until they have been signaled. + _task = task; + _not_finished = num_workers; + + // Dispatch 'num_workers' number of tasks. + _start_semaphore->signal(num_workers); + + // Wait for the last worker to signal the coordinator. + _end_semaphore->wait(); + + // No workers are allowed to read the state variables after the coordinator has been signaled. + assert(_not_finished == 0, err_msg("%d not finished workers?", _not_finished)); + _task = NULL; + _started = 0; + + } + + WorkData worker_wait_for_task() { + // Wait for the coordinator to dispatch a task. + _start_semaphore->wait(); + + uint num_started = (uint) Atomic::add(1, (volatile jint*)&_started); + + // Subtract one to get a zero-indexed worker id. + uint worker_id = num_started - 1; + + return WorkData(_task, worker_id); + } + + void worker_done_with_task() { + // Mark that the worker is done with the task. + // The worker is not allowed to read the state variables after this line. + uint not_finished = (uint) Atomic::add(-1, (volatile jint*)&_not_finished); + + // The last worker signals to the coordinator that all work is completed. + if (not_finished == 0) { + _end_semaphore->signal(); + } + } +}; + +class MutexGangTaskDispatcher : public GangTaskDispatcher { + AbstractGangTask* _task; + + volatile uint _started; + volatile uint _finished; + volatile uint _num_workers; + + Monitor* _monitor; -GangWorker::GangWorker(AbstractWorkGang* gang, uint id) { + public: + MutexGangTaskDispatcher() + : _task(NULL), + _monitor(new Monitor(Monitor::leaf, "WorkGang dispatcher lock", false, Monitor::_safepoint_check_never)), + _started(0), + _finished(0), + _num_workers(0) {} + + ~MutexGangTaskDispatcher() { + delete _monitor; + } + + void coordinator_execute_on_workers(AbstractGangTask* task, uint num_workers) { + MutexLockerEx ml(_monitor, Mutex::_no_safepoint_check_flag); + + _task = task; + _num_workers = num_workers; + + // Tell the workers to get to work. + _monitor->notify_all(); + + // Wait for them to finish. + while (_finished < _num_workers) { + _monitor->wait(/* no_safepoint_check */ true); + } + + _task = NULL; + _num_workers = 0; + _started = 0; + _finished = 0; + } + + WorkData worker_wait_for_task() { + MonitorLockerEx ml(_monitor, Mutex::_no_safepoint_check_flag); + + while (_num_workers == 0 || _started == _num_workers) { + _monitor->wait(/* no_safepoint_check */ true); + } + + _started++; + + // Subtract one to get a zero-indexed worker id. + uint worker_id = _started - 1; + + return WorkData(_task, worker_id); + } + + void worker_done_with_task() { + MonitorLockerEx ml(_monitor, Mutex::_no_safepoint_check_flag); + + _finished++; + + if (_finished == _num_workers) { + // This will wake up all workers and not only the coordinator. + _monitor->notify_all(); + } + } +}; + +static GangTaskDispatcher* create_dispatcher() { + if (UseSemaphoreGCThreadsSynchronization) { + return new SemaphoreGangTaskDispatcher(); + } + + return new MutexGangTaskDispatcher(); +} + +WorkGang::WorkGang(const char* name, + uint workers, + bool are_GC_task_threads, + bool are_ConcurrentGC_threads) : + AbstractWorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads), + _dispatcher(create_dispatcher()) +{ } + +AbstractGangWorker* WorkGang::allocate_worker(uint worker_id) { + return new GangWorker(this, worker_id); +} + +void WorkGang::run_task(AbstractGangTask* task) { + _dispatcher->coordinator_execute_on_workers(task, active_workers()); +} + +AbstractGangWorker::AbstractGangWorker(AbstractWorkGang* gang, uint id) { _gang = gang; set_id(id); set_name("%s#%d", gang->name(), id); } -void GangWorker::run() { +void AbstractGangWorker::run() { initialize(); loop(); } -void GangWorker::initialize() { +void AbstractGangWorker::initialize() { this->initialize_thread_local_storage(); this->record_stack_base_and_size(); this->initialize_named_thread(); @@ -224,112 +289,59 @@ " of a work gang"); } -void GangWorker::loop() { - int previous_sequence_number = 0; - Monitor* gang_monitor = gang()->monitor(); - for ( ; ; ) { - WorkData data; - int part; // Initialized below. - { - // Grab the gang mutex. - MutexLocker ml(gang_monitor); - // Wait for something to do. - // Polling outside the while { wait } avoids missed notifies - // in the outer loop. - gang()->internal_worker_poll(&data); - if (TraceWorkGang) { - tty->print("Polled outside for work in gang %s worker %u", - gang()->name(), id()); - tty->print(" sequence: %d (prev: %d)", - data.sequence_number(), previous_sequence_number); - if (data.task() != NULL) { - tty->print(" task: %s", data.task()->name()); - } else { - tty->print(" task: NULL"); - } - tty->cr(); - } - for ( ; /* break */; ) { - // Check for new work. - if ((data.task() != NULL) && - (data.sequence_number() != previous_sequence_number)) { - if (gang()->needs_more_workers()) { - gang()->internal_note_start(); - gang_monitor->notify_all(); - part = gang()->started_workers() - 1; - break; - } - } - // Nothing to do. - gang_monitor->wait(/* no_safepoint_check */ true); - gang()->internal_worker_poll(&data); - if (TraceWorkGang) { - tty->print("Polled inside for work in gang %s worker %u", - gang()->name(), id()); - tty->print(" sequence: %d (prev: %d)", - data.sequence_number(), previous_sequence_number); - if (data.task() != NULL) { - tty->print(" task: %s", data.task()->name()); - } else { - tty->print(" task: NULL"); - } - tty->cr(); - } - } - // Drop gang mutex. - } - if (TraceWorkGang) { - tty->print("Work for work gang %s id %u task %s part %d", - gang()->name(), id(), data.task()->name(), part); - } - assert(data.task() != NULL, "Got null task"); - data.task()->work(part); - { - if (TraceWorkGang) { - tty->print("Finish for work gang %s id %u task %s part %d", - gang()->name(), id(), data.task()->name(), part); - } - // Grab the gang mutex. - MutexLocker ml(gang_monitor); - gang()->internal_note_finish(); - // Tell the gang you are done. - gang_monitor->notify_all(); - // Drop the gang mutex. - } - previous_sequence_number = data.sequence_number(); - } -} - -bool GangWorker::is_GC_task_thread() const { +bool AbstractGangWorker::is_GC_task_thread() const { return gang()->are_GC_task_threads(); } -bool GangWorker::is_ConcurrentGC_thread() const { +bool AbstractGangWorker::is_ConcurrentGC_thread() const { return gang()->are_ConcurrentGC_threads(); } -void GangWorker::print_on(outputStream* st) const { +void AbstractGangWorker::print_on(outputStream* st) const { st->print("\"%s\" ", name()); Thread::print_on(st); st->cr(); } -// Printing methods +WorkData GangWorker::wait_for_task() { + return gang()->dispatcher()->worker_wait_for_task(); +} -const char* AbstractWorkGang::name() const { - return _name; +void GangWorker::signal_task_done() { + gang()->dispatcher()->worker_done_with_task(); +} + +void GangWorker::print_task_started(WorkData data) { + if (TraceWorkGang) { + tty->print_cr("Running work gang %s task %s worker %u", name(), data._task->name(), data._worker_id); + } } -#ifndef PRODUCT - -const char* AbstractGangTask::name() const { - return _name; +void GangWorker::print_task_done(WorkData data) { + if (TraceWorkGang) { + tty->print_cr("\nFinished work gang %s task %s worker %u", name(), data._task->name(), data._worker_id); + Thread* me = Thread::current(); + tty->print_cr(" T: " PTR_FORMAT " VM_thread: %d", p2i(me), me->is_VM_thread()); + } } -#endif /* PRODUCT */ +void GangWorker::run_task(WorkData data) { + print_task_started(data); + + data._task->work(data._worker_id); + + print_task_done(data); +} -// FlexibleWorkGang +void GangWorker::loop() { + while (true) { + WorkData data = wait_for_task(); + run_task(data); + + signal_task_done(); + } +} // *** WorkGangBarrierSync diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/gc/shared/workgroup.hpp --- a/hotspot/src/share/vm/gc/shared/workgroup.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/gc/shared/workgroup.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -25,282 +25,111 @@ #ifndef SHARE_VM_GC_SHARED_WORKGROUP_HPP #define SHARE_VM_GC_SHARED_WORKGROUP_HPP -#include "gc/shared/taskqueue.hpp" -#include "runtime/thread.inline.hpp" +#include "memory/allocation.hpp" +#include "runtime/globals.hpp" +#include "runtime/thread.hpp" +#include "utilities/debug.hpp" +#include "utilities/globalDefinitions.hpp" // Task class hierarchy: // AbstractGangTask -// AbstractGangTaskWOopQueues // // Gang/Group class hierarchy: // AbstractWorkGang // WorkGang -// FlexibleWorkGang -// YieldingFlexibleWorkGang (defined in another file) +// YieldingFlexibleWorkGang (defined in another file) // // Worker class hierarchy: -// GangWorker (subclass of WorkerThread) +// AbstractGangWorker (subclass of WorkerThread) +// GangWorker // YieldingFlexibleGangWorker (defined in another file) // Forward declarations of classes defined here +class AbstractGangWorker; +class Semaphore; class WorkGang; -class GangWorker; -class YieldingFlexibleGangWorker; -class YieldingFlexibleGangTask; -class WorkData; -class AbstractWorkGang; // An abstract task to be worked on by a gang. // You subclass this to supply your own work() method class AbstractGangTask VALUE_OBJ_CLASS_SPEC { -public: + const char* _name; + + public: + AbstractGangTask(const char* name) : _name(name) {} + // The abstract work method. // The argument tells you which member of the gang you are. virtual void work(uint worker_id) = 0; // Debugging accessor for the name. - const char* name() const PRODUCT_RETURN_(return NULL;); - int counter() { return _counter; } - void set_counter(int value) { _counter = value; } - int *address_of_counter() { return &_counter; } - - // RTTI - NOT_PRODUCT(virtual bool is_YieldingFlexibleGang_task() const { - return false; - }) + const char* name() const { return _name; } +}; -private: - NOT_PRODUCT(const char* _name;) - // ??? Should a task have a priority associated with it? - // ??? Or can the run method adjust priority as needed? - int _counter; - -protected: - // Constructor and desctructor: only construct subclasses. - AbstractGangTask(const char* name) - { - NOT_PRODUCT(_name = name); - _counter = 0; - } - ~AbstractGangTask() { } - -public: +struct WorkData { + AbstractGangTask* _task; + uint _worker_id; + WorkData(AbstractGangTask* task, uint worker_id) : _task(task), _worker_id(worker_id) {} }; -class AbstractGangTaskWOopQueues : public AbstractGangTask { - OopTaskQueueSet* _queues; - ParallelTaskTerminator _terminator; +// Interface to handle the synchronization between the coordinator thread and the worker threads, +// when a task is dispatched out to the worker threads. +class GangTaskDispatcher : public CHeapObj { public: - AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues, uint n_threads) : - AbstractGangTask(name), _queues(queues), _terminator(n_threads, _queues) {} - ParallelTaskTerminator* terminator() { return &_terminator; } - OopTaskQueueSet* queues() { return _queues; } + virtual ~GangTaskDispatcher() {} + + // Coordinator API. + + // Distributes the task out to num_workers workers. + // Returns when the task has been completed by all workers. + virtual void coordinator_execute_on_workers(AbstractGangTask* task, uint num_workers) = 0; + + // Worker API. + + // Waits for a task to become available to the worker. + // Returns when the worker has been assigned a task. + virtual WorkData worker_wait_for_task() = 0; + + // Signal to the coordinator that the worker is done with the assigned task. + virtual void worker_done_with_task() = 0; }; +// The work gang is the collection of workers to execute tasks. +// The number of workers run for a task is "_active_workers" +// while "_total_workers" is the number of available of workers. +class AbstractWorkGang : public CHeapObj { + protected: + // The array of worker threads for this gang. + AbstractGangWorker** _workers; + // The count of the number of workers in the gang. + uint _total_workers; + // The currently active workers in this gang. + uint _active_workers; + // Printing support. + const char* _name; -// Class AbstractWorkGang: -// An abstract class representing a gang of workers. -// You subclass this to supply an implementation of run_task(). -class AbstractWorkGang: public CHeapObj { -protected: - // Work gangs are never deleted, so no need to cleanup. - ~AbstractWorkGang() { ShouldNotReachHere(); } -public: - // Constructor. - AbstractWorkGang(const char* name, bool are_GC_task_threads, - bool are_ConcurrentGC_threads); - // Run a task, returns when the task is done (or terminated). - virtual void run_task(AbstractGangTask* task) = 0; - // Return true if more workers should be applied to the task. - virtual bool needs_more_workers() const { return true; } -public: - // Debugging. - const char* name() const; -protected: + private: // Initialize only instance data. const bool _are_GC_task_threads; const bool _are_ConcurrentGC_threads; - // Printing support. - const char* _name; - // The monitor which protects these data, - // and notifies of changes in it. - Monitor* _monitor; - // The count of the number of workers in the gang. - uint _total_workers; - // The array of worker threads for this gang. - // This is only needed for cleaning up. - GangWorker** _gang_workers; - // The task for this gang. - AbstractGangTask* _task; - // A sequence number for the current task. - int _sequence_number; - // The number of started workers. - uint _started_workers; - // The number of finished workers. - uint _finished_workers; -public: - // Accessors for fields - Monitor* monitor() const { - return _monitor; - } - uint total_workers() const { - return _total_workers; - } - virtual uint active_workers() const { - return _total_workers; - } - GangWorker** gang_workers() const { - return _gang_workers; - } - AbstractGangTask* task() const { - return _task; - } - int sequence_number() const { - return _sequence_number; - } - uint started_workers() const { - return _started_workers; - } - uint finished_workers() const { - return _finished_workers; - } - bool are_GC_task_threads() const { - return _are_GC_task_threads; - } - bool are_ConcurrentGC_threads() const { - return _are_ConcurrentGC_threads; - } - // Predicates. - bool is_idle() const { - return (task() == NULL); - } - // Return the Ith gang worker. - GangWorker* gang_worker(uint i) const; - - void threads_do(ThreadClosure* tc) const; - - // Printing - void print_worker_threads_on(outputStream *st) const; - void print_worker_threads() const { - print_worker_threads_on(tty); - } - -protected: - friend class GangWorker; - friend class YieldingFlexibleGangWorker; - // Note activation and deactivation of workers. - // These methods should only be called with the mutex held. - void internal_worker_poll(WorkData* data) const; - void internal_note_start(); - void internal_note_finish(); -}; -class WorkData: public StackObj { - // This would be a struct, but I want accessor methods. -private: - AbstractGangTask* _task; - int _sequence_number; -public: - // Constructor and destructor - WorkData() { - _task = NULL; - _sequence_number = 0; - } - ~WorkData() { - } - AbstractGangTask* task() const { return _task; } - void set_task(AbstractGangTask* value) { _task = value; } - int sequence_number() const { return _sequence_number; } - void set_sequence_number(int value) { _sequence_number = value; } - - YieldingFlexibleGangTask* yf_task() const { - return (YieldingFlexibleGangTask*)_task; - } -}; - -// Class WorkGang: -class WorkGang: public AbstractWorkGang { -public: - // Constructor - WorkGang(const char* name, uint workers, - bool are_GC_task_threads, bool are_ConcurrentGC_threads); - // Run a task, returns when the task is done (or terminated). - virtual void run_task(AbstractGangTask* task); - void run_task(AbstractGangTask* task, uint no_of_parallel_workers); - // Allocate a worker and return a pointer to it. - virtual GangWorker* allocate_worker(uint which); - // Initialize workers in the gang. Return true if initialization - // succeeded. The type of the worker can be overridden in a derived - // class with the appropriate implementation of allocate_worker(). - bool initialize_workers(); -}; - -// Class GangWorker: -// Several instances of this class run in parallel as workers for a gang. -class GangWorker: public WorkerThread { -public: - // Constructors and destructor. - GangWorker(AbstractWorkGang* gang, uint id); + public: + AbstractWorkGang(const char* name, uint workers, bool are_GC_task_threads, bool are_ConcurrentGC_threads) : + _name(name), + _total_workers(workers), + _active_workers(UseDynamicNumberOfGCThreads ? 1U : workers), + _are_GC_task_threads(are_GC_task_threads), + _are_ConcurrentGC_threads(are_ConcurrentGC_threads) + { } - // The only real method: run a task for the gang. - virtual void run(); - // Predicate for Thread - virtual bool is_GC_task_thread() const; - virtual bool is_ConcurrentGC_thread() const; - // Printing - void print_on(outputStream* st) const; - virtual void print() const { print_on(tty); } -protected: - AbstractWorkGang* _gang; - - virtual void initialize(); - virtual void loop(); - -public: - AbstractWorkGang* gang() const { return _gang; } -}; + // Initialize workers in the gang. Return true if initialization succeeded. + bool initialize_workers(); -// Dynamic number of worker threads -// -// This type of work gang is used to run different numbers of -// worker threads at different times. The -// number of workers run for a task is "_active_workers" -// instead of "_total_workers" in a WorkGang. The method -// "needs_more_workers()" returns true until "_active_workers" -// have been started and returns false afterwards. The -// implementation of "needs_more_workers()" in WorkGang always -// returns true so that all workers are started. The method -// "loop()" in GangWorker was modified to ask "needs_more_workers()" -// in its loop to decide if it should start working on a task. -// A worker in "loop()" waits for notification on the WorkGang -// monitor and execution of each worker as it checks for work -// is serialized via the same monitor. The "needs_more_workers()" -// call is serialized and additionally the calculation for the -// "part" (effectively the worker id for executing the task) is -// serialized to give each worker a unique "part". Workers that -// are not needed for this tasks (i.e., "_active_workers" have -// been started before it, continue to wait for work. + bool are_GC_task_threads() const { return _are_GC_task_threads; } + bool are_ConcurrentGC_threads() const { return _are_ConcurrentGC_threads; } -class FlexibleWorkGang: public WorkGang { - // The currently active workers in this gang. - // This is a number that is dynamically adjusted - // and checked in the run_task() method at each invocation. - // As described above _active_workers determines the number - // of threads started on a task. It must also be used to - // determine completion. + uint total_workers() const { return _total_workers; } - protected: - uint _active_workers; - public: - // Constructor and destructor. - FlexibleWorkGang(const char* name, uint workers, - bool are_GC_task_threads, - bool are_ConcurrentGC_threads) : - WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads), - _active_workers(UseDynamicNumberOfGCThreads ? 1U : workers) {} - - // Accessors for fields. virtual uint active_workers() const { assert(_active_workers <= _total_workers, err_msg("_active_workers: %u > _total_workers: %u", _active_workers, _total_workers)); @@ -317,10 +146,90 @@ assert(UseDynamicNumberOfGCThreads || _active_workers == _total_workers, "Unless dynamic should use total workers"); } + + // Return the Ith worker. + AbstractGangWorker* worker(uint i) const; + + void threads_do(ThreadClosure* tc) const; + + // Debugging. + const char* name() const { return _name; } + + // Printing + void print_worker_threads_on(outputStream *st) const; + void print_worker_threads() const { + print_worker_threads_on(tty); + } + + protected: + virtual AbstractGangWorker* allocate_worker(uint which) = 0; +}; + +// An class representing a gang of workers. +class WorkGang: public AbstractWorkGang { + // To get access to the GangTaskDispatcher instance. + friend class GangWorker; + + // Never deleted. + ~WorkGang(); + + GangTaskDispatcher* const _dispatcher; + GangTaskDispatcher* dispatcher() const { + return _dispatcher; + } + +public: + WorkGang(const char* name, + uint workers, + bool are_GC_task_threads, + bool are_ConcurrentGC_threads); + + // Run a task, returns when the task is done. virtual void run_task(AbstractGangTask* task); - virtual bool needs_more_workers() const { - return _started_workers < _active_workers; - } + +protected: + virtual AbstractGangWorker* allocate_worker(uint which); +}; + +// Several instances of this class run in parallel as workers for a gang. +class AbstractGangWorker: public WorkerThread { +public: + AbstractGangWorker(AbstractWorkGang* gang, uint id); + + // The only real method: run a task for the gang. + virtual void run(); + // Predicate for Thread + virtual bool is_GC_task_thread() const; + virtual bool is_ConcurrentGC_thread() const; + // Printing + void print_on(outputStream* st) const; + virtual void print() const { print_on(tty); } + +protected: + AbstractWorkGang* _gang; + + virtual void initialize(); + virtual void loop() = 0; + + AbstractWorkGang* gang() const { return _gang; } +}; + +class GangWorker: public AbstractGangWorker { +public: + GangWorker(WorkGang* gang, uint id) : AbstractGangWorker(gang, id) {} + +protected: + virtual void loop(); + +private: + WorkData wait_for_task(); + void run_task(WorkData work); + void signal_task_done(); + + void print_task_started(WorkData data); + void print_task_done(WorkData data); + + WorkGang* gang() const { return (WorkGang*)_gang; } }; // A class that acts as a synchronisation barrier. Workers enter diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/interpreter/abstractInterpreter.hpp --- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -90,6 +90,10 @@ java_util_zip_CRC32_update, // implementation of java.util.zip.CRC32.update() java_util_zip_CRC32_updateBytes, // implementation of java.util.zip.CRC32.updateBytes() java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer() + java_lang_Float_intBitsToFloat, // implementation of java.lang.Float.intBitsToFloat() + java_lang_Float_floatToRawIntBits, // implementation of java.lang.Float.floatToRawIntBits() + java_lang_Double_longBitsToDouble, // implementation of java.lang.Double.longBitsToDouble() + java_lang_Double_doubleToRawLongBits, // implementation of java.lang.Double.doubleToRawLongBits() number_of_method_entries, invalid = -1 }; diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/interpreter/interpreter.cpp --- a/hotspot/src/share/vm/interpreter/interpreter.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/interpreter/interpreter.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -234,7 +234,15 @@ case vmIntrinsics::_updateByteBufferCRC32 : return java_util_zip_CRC32_updateByteBuffer; } } -#endif + + switch(m->intrinsic_id()) { + case vmIntrinsics::_intBitsToFloat: return java_lang_Float_intBitsToFloat; + case vmIntrinsics::_floatToRawIntBits: return java_lang_Float_floatToRawIntBits; + case vmIntrinsics::_longBitsToDouble: return java_lang_Double_longBitsToDouble; + case vmIntrinsics::_doubleToRawLongBits: return java_lang_Double_doubleToRawLongBits; + } + +#endif // CC_INTERP // Native method? // Note: This test must come _before_ the test for intrinsic @@ -559,6 +567,25 @@ : // fall thru case Interpreter::java_util_zip_CRC32_updateByteBuffer : entry_point = generate_CRC32_updateBytes_entry(kind); break; +#if defined(TARGET_ARCH_x86) && !defined(_LP64) + // On x86_32 platforms, a special entry is generated for the following four methods. + // On other platforms the normal entry is used to enter these methods. + case Interpreter::java_lang_Float_intBitsToFloat + : entry_point = generate_Float_intBitsToFloat_entry(); break; + case Interpreter::java_lang_Float_floatToRawIntBits + : entry_point = generate_Float_floatToRawIntBits_entry(); break; + case Interpreter::java_lang_Double_longBitsToDouble + : entry_point = generate_Double_longBitsToDouble_entry(); break; + case Interpreter::java_lang_Double_doubleToRawLongBits + : entry_point = generate_Double_doubleToRawLongBits_entry(); break; +#else + case Interpreter::java_lang_Float_intBitsToFloat: + case Interpreter::java_lang_Float_floatToRawIntBits: + case Interpreter::java_lang_Double_longBitsToDouble: + case Interpreter::java_lang_Double_doubleToRawLongBits: + entry_point = generate_native_entry(false); + break; +#endif // defined(TARGET_ARCH_x86) && !defined(_LP64) #endif // CC_INTERP default: fatal(err_msg("unexpected method kind: %d", kind)); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/interpreter/templateInterpreter.cpp --- a/hotspot/src/share/vm/interpreter/templateInterpreter.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/interpreter/templateInterpreter.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -397,34 +397,39 @@ // all non-native method kinds method_entry(zerolocals) - method_entry(zerolocals_synchronized) - method_entry(empty) - method_entry(accessor) - method_entry(abstract) - method_entry(java_lang_math_sin ) - method_entry(java_lang_math_cos ) - method_entry(java_lang_math_tan ) - method_entry(java_lang_math_abs ) - method_entry(java_lang_math_sqrt ) - method_entry(java_lang_math_log ) - method_entry(java_lang_math_log10) - method_entry(java_lang_math_exp ) - method_entry(java_lang_math_pow ) - method_entry(java_lang_ref_reference_get) + method_entry(zerolocals_synchronized) + method_entry(empty) + method_entry(accessor) + method_entry(abstract) + method_entry(java_lang_math_sin ) + method_entry(java_lang_math_cos ) + method_entry(java_lang_math_tan ) + method_entry(java_lang_math_abs ) + method_entry(java_lang_math_sqrt ) + method_entry(java_lang_math_log ) + method_entry(java_lang_math_log10) + method_entry(java_lang_math_exp ) + method_entry(java_lang_math_pow ) + method_entry(java_lang_ref_reference_get) - if (UseCRC32Intrinsics) { - method_entry(java_util_zip_CRC32_update) - method_entry(java_util_zip_CRC32_updateBytes) - method_entry(java_util_zip_CRC32_updateByteBuffer) - } + if (UseCRC32Intrinsics) { + method_entry(java_util_zip_CRC32_update) + method_entry(java_util_zip_CRC32_updateBytes) + method_entry(java_util_zip_CRC32_updateByteBuffer) + } + + method_entry(java_lang_Float_intBitsToFloat); + method_entry(java_lang_Float_floatToRawIntBits); + method_entry(java_lang_Double_longBitsToDouble); + method_entry(java_lang_Double_doubleToRawLongBits); initialize_method_handle_entries(); // all native method kinds (must be one contiguous block) Interpreter::_native_entry_begin = Interpreter::code()->code_end(); method_entry(native) - method_entry(native_synchronized) - Interpreter::_native_entry_end = Interpreter::code()->code_end(); + method_entry(native_synchronized) + Interpreter::_native_entry_end = Interpreter::code()->code_end(); #undef method_entry diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/memory/metaspace.hpp --- a/hotspot/src/share/vm/memory/metaspace.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/memory/metaspace.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -254,7 +254,7 @@ // Debugging support void verify(); - static void print_compressed_class_space(outputStream* st, const char* requested_addr = 0); + static void print_compressed_class_space(outputStream* st, const char* requested_addr = 0) NOT_LP64({}); class AllocRecordClosure : public StackObj { public: diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/memory/universe.cpp --- a/hotspot/src/share/vm/memory/universe.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/memory/universe.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -77,7 +77,7 @@ #if INCLUDE_ALL_GCS #include "gc/cms/cmsCollectorPolicy.hpp" #include "gc/g1/g1CollectedHeap.inline.hpp" -#include "gc/g1/g1CollectorPolicy_ext.hpp" +#include "gc/g1/g1CollectorPolicy.hpp" #include "gc/parallel/parallelScavengeHeap.hpp" #include "gc/shared/adaptiveSizePolicy.hpp" #endif // INCLUDE_ALL_GCS @@ -694,13 +694,29 @@ return JNI_OK; } -template -jint Universe::create_heap() { +CollectedHeap* Universe::create_heap() { assert(_collectedHeap == NULL, "Heap already created"); - Policy* policy = new Policy(); - policy->initialize_all(); - _collectedHeap = new Heap(policy); - return _collectedHeap->initialize(); +#if !INCLUDE_ALL_GCS + if (UseParallelGC) { + fatal("UseParallelGC not supported in this VM."); + } else if (UseG1GC) { + fatal("UseG1GC not supported in this VM."); + } else if (UseConcMarkSweepGC) { + fatal("UseConcMarkSweepGC not supported in this VM."); +#else + if (UseParallelGC) { + return Universe::create_heap_with_policy(); + } else if (UseG1GC) { + return Universe::create_heap_with_policy(); + } else if (UseConcMarkSweepGC) { + return Universe::create_heap_with_policy(); +#endif + } else if (UseSerialGC) { + return Universe::create_heap_with_policy(); + } + + ShouldNotReachHere(); + return NULL; } // Choose the heap base address and oop encoding mode @@ -714,27 +730,12 @@ jint Universe::initialize_heap() { jint status = JNI_ERR; -#if !INCLUDE_ALL_GCS - if (UseParallelGC) { - fatal("UseParallelGC not supported in this VM."); - } else if (UseG1GC) { - fatal("UseG1GC not supported in this VM."); - } else if (UseConcMarkSweepGC) { - fatal("UseConcMarkSweepGC not supported in this VM."); -#else - if (UseParallelGC) { - status = Universe::create_heap(); - } else if (UseG1GC) { - status = Universe::create_heap(); - } else if (UseConcMarkSweepGC) { - status = Universe::create_heap(); -#endif - } else if (UseSerialGC) { - status = Universe::create_heap(); - } else { - ShouldNotReachHere(); + _collectedHeap = create_heap_ext(); + if (_collectedHeap == NULL) { + _collectedHeap = create_heap(); } + status = _collectedHeap->initialize(); if (status != JNI_OK) { return status; } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/memory/universe.hpp --- a/hotspot/src/share/vm/memory/universe.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/memory/universe.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -214,7 +214,9 @@ static size_t _heap_capacity_at_last_gc; static size_t _heap_used_at_last_gc; - template static jint create_heap(); + template static CollectedHeap* create_heap_with_policy(); + static CollectedHeap* create_heap(); + static CollectedHeap* create_heap_ext(); static jint initialize_heap(); static void initialize_basic_type_mirrors(TRAPS); static void fixup_mirrors(TRAPS); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/memory/universe.inline.hpp --- a/hotspot/src/share/vm/memory/universe.inline.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/memory/universe.inline.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -49,4 +49,11 @@ _allocation_context_notification_obj = obj; } +template +CollectedHeap* Universe::create_heap_with_policy() { + Policy* policy = new Policy(); + policy->initialize_all(); + return new Heap(policy); +} + #endif // SHARE_VM_MEMORY_UNIVERSE_INLINE_HPP diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/memory/universe_ext.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/memory/universe_ext.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "memory/universe.hpp" + +CollectedHeap* Universe::create_heap_ext() { + return NULL; +} diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/oops/symbol.cpp --- a/hotspot/src/share/vm/oops/symbol.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/oops/symbol.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -35,7 +35,7 @@ Symbol::Symbol(const u1* name, int length, int refcount) { _refcount = refcount; _length = length; - _identity_hash = os::random(); + _identity_hash = (short)os::random(); for (int i = 0; i < _length; i++) { byte_at_put(i, name[i]); } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/oops/symbol.hpp --- a/hotspot/src/share/vm/oops/symbol.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/oops/symbol.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -106,23 +106,18 @@ #define PERM_REFCOUNT -1 #endif -// We separate the fields in SymbolBase from Symbol::_body so that -// Symbol::size(int) can correctly calculate the space needed. -class SymbolBase : public MetaspaceObj { - public: +class Symbol : public MetaspaceObj { + friend class VMStructs; + friend class SymbolTable; + friend class MoveSymbols; + + private: ATOMIC_SHORT_PAIR( volatile short _refcount, // needs atomic operation unsigned short _length // number of UTF8 characters in the symbol (does not need atomic op) ); - int _identity_hash; -}; - -class Symbol : private SymbolBase { - friend class VMStructs; - friend class SymbolTable; - friend class MoveSymbols; - private: - jbyte _body[1]; + short _identity_hash; + jbyte _body[2]; enum { // max_symbol_length is constrained by type of _length @@ -130,7 +125,7 @@ }; static int size(int length) { - size_t sz = heap_word_size(sizeof(SymbolBase) + (length > 0 ? length : 0)); + size_t sz = heap_word_size(sizeof(Symbol) + (length > 2 ? length - 2 : 0)); return align_object_size(sz); } @@ -154,8 +149,11 @@ // Returns the largest size symbol we can safely hold. static int max_length() { return max_symbol_length; } - - int identity_hash() { return _identity_hash; } + unsigned identity_hash() { + unsigned addr_bits = (unsigned)((uintptr_t)this >> (LogMinObjAlignmentInBytes + 3)); + return ((unsigned)_identity_hash & 0xffff) | + ((addr_bits ^ (_length << 8) ^ (( _body[0] << 8) | _body[1])) << 16); + } // For symbol table alternate hashing unsigned int new_hash(juint seed); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/arraycopynode.cpp --- a/hotspot/src/share/vm/opto/arraycopynode.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/arraycopynode.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -626,3 +626,75 @@ return CallNode::may_modify_arraycopy_helper(dest_t, t_oop, phase); } + +bool ArrayCopyNode::may_modify_helper(const TypeOopPtr *t_oop, Node* n, PhaseTransform *phase) { + if (n->is_Proj()) { + n = n->in(0); + if (n->is_Call() && n->as_Call()->may_modify(t_oop, phase)) { + return true; + } + } + return false; +} + +bool ArrayCopyNode::may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase) { + Node* mem = mb->in(TypeFunc::Memory); + + if (mem->is_MergeMem()) { + Node* n = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); + if (may_modify_helper(t_oop, n, phase)) { + return true; + } else if (n->is_Phi()) { + for (uint i = 1; i < n->req(); i++) { + if (n->in(i) != NULL) { + if (may_modify_helper(t_oop, n->in(i), phase)) { + return true; + } + } + } + } + } + + return false; +} + +// Does this array copy modify offsets between offset_lo and offset_hi +// in the destination array +// if must_modify is false, return true if the copy could write +// between offset_lo and offset_hi +// if must_modify is true, return true if the copy is guaranteed to +// write between offset_lo and offset_hi +bool ArrayCopyNode::modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransform* phase, bool must_modify) { + assert(_kind == ArrayCopy || _kind == CopyOf || _kind == CopyOfRange, "only for real array copies"); + + Node* dest = in(ArrayCopyNode::Dest); + Node* src_pos = in(ArrayCopyNode::SrcPos); + Node* dest_pos = in(ArrayCopyNode::DestPos); + Node* len = in(ArrayCopyNode::Length); + + const TypeInt *dest_pos_t = phase->type(dest_pos)->isa_int(); + const TypeInt *len_t = phase->type(len)->isa_int(); + const TypeAryPtr* ary_t = phase->type(dest)->isa_aryptr(); + + if (dest_pos_t != NULL && len_t != NULL && ary_t != NULL) { + BasicType ary_elem = ary_t->klass()->as_array_klass()->element_type()->basic_type(); + uint header = arrayOopDesc::base_offset_in_bytes(ary_elem); + uint elemsize = type2aelembytes(ary_elem); + + intptr_t dest_pos_plus_len_lo = (((intptr_t)dest_pos_t->_lo) + len_t->_lo) * elemsize + header; + intptr_t dest_pos_plus_len_hi = (((intptr_t)dest_pos_t->_hi) + len_t->_hi) * elemsize + header; + intptr_t dest_pos_lo = ((intptr_t)dest_pos_t->_lo) * elemsize + header; + intptr_t dest_pos_hi = ((intptr_t)dest_pos_t->_hi) * elemsize + header; + + if (must_modify) { + if (offset_lo >= dest_pos_hi && offset_hi < dest_pos_plus_len_lo) { + return true; + } + } else { + if (offset_hi >= dest_pos_lo && offset_lo < dest_pos_plus_len_hi) { + return true; + } + } + } + return false; +} diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/arraycopynode.hpp --- a/hotspot/src/share/vm/opto/arraycopynode.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/arraycopynode.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -108,6 +108,7 @@ BasicType copy_type, const Type* value_type, int count); bool finish_transform(PhaseGVN *phase, bool can_reshape, Node* ctl, Node *mem); + static bool may_modify_helper(const TypeOopPtr *t_oop, Node* n, PhaseTransform *phase); public: @@ -162,6 +163,9 @@ bool is_alloc_tightly_coupled() const { return _alloc_tightly_coupled; } + static bool may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase); + bool modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransform* phase, bool must_modify); + #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; virtual void dump_compact_spec(outputStream* st) const; diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/c2compiler.cpp --- a/hotspot/src/share/vm/opto/c2compiler.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/c2compiler.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -161,7 +161,7 @@ vmIntrinsics::ID id = method->intrinsic_id(); assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); - if (id < vmIntrinsics::FIRST_ID || id >= vmIntrinsics::LAST_COMPILER_INLINE) { + if (id < vmIntrinsics::FIRST_ID || id > vmIntrinsics::LAST_COMPILER_INLINE) { return false; } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/callnode.cpp --- a/hotspot/src/share/vm/opto/callnode.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/callnode.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -742,7 +742,7 @@ // bool CallNode::may_modify(const TypeOopPtr *t_oop, PhaseTransform *phase) { assert((t_oop != NULL), "sanity"); - if (is_call_to_arraycopystub()) { + if (is_call_to_arraycopystub() && strcmp(_name, "unsafe_arraycopy") != 0) { const TypeTuple* args = _tf->domain(); Node* dest = NULL; // Stubs that can be called once an ArrayCopyNode is expanded have diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/chaitin.cpp --- a/hotspot/src/share/vm/opto/chaitin.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/chaitin.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -990,9 +990,13 @@ // FOUR registers! #ifdef ASSERT if (is_vect) { - assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned"); - assert(!lrg._fat_proj, "sanity"); - assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity"); + if (lrg.num_regs() != 0) { + assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned"); + assert(!lrg._fat_proj, "sanity"); + assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity"); + } else { + assert(n->is_Phi(), "not all inputs processed only if Phi"); + } } #endif if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/compile.hpp --- a/hotspot/src/share/vm/opto/compile.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/compile.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -93,7 +93,7 @@ public: void set_idx(node_idx_t idx) { - _idx_clone_orig = _idx_clone_orig & 0xFFFFFFFF00000000 | idx; + _idx_clone_orig = _idx_clone_orig & CONST64(0xFFFFFFFF00000000) | idx; } node_idx_t idx() const { return (node_idx_t)(_idx_clone_orig & 0xFFFFFFFF); } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/library_call.cpp --- a/hotspot/src/share/vm/opto/library_call.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/library_call.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -2730,7 +2730,22 @@ load_store = _gvn.transform(new CompareAndSwapPNode(control(), mem, adr, newval, oldval)); } } - post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true); + if (kind == LS_cmpxchg) { + // Emit the post barrier only when the actual store happened. + // This makes sense to check only for compareAndSet that can fail to set the value. + // CAS success path is marked more likely since we anticipate this is a performance + // critical path, while CAS failure path can use the penalty for going through unlikely + // path as backoff. Which is still better than doing a store barrier there. + IdealKit ideal(this); + ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); { + sync_kit(ideal); + post_barrier(ideal.ctrl(), load_store, base, adr, alias_idx, newval, T_OBJECT, true); + ideal.sync_kit(this); + } ideal.end_if(); + final_sync(ideal); + } else { + post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true); + } break; default: fatal(err_msg_res("unexpected type %d: %s", type, type2name(type))); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/loopnode.cpp --- a/hotspot/src/share/vm/opto/loopnode.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/loopnode.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -1175,7 +1175,7 @@ //============================================================================= //------------------------------is_member-------------------------------------- // Is 'l' a member of 'this'? -int IdealLoopTree::is_member( const IdealLoopTree *l ) const { +bool IdealLoopTree::is_member(const IdealLoopTree *l) const { while( l->_nest > _nest ) l = l->_parent; return l == this; } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/loopnode.hpp --- a/hotspot/src/share/vm/opto/loopnode.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/loopnode.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -384,7 +384,7 @@ { } // Is 'l' a member of 'this'? - int is_member( const IdealLoopTree *l ) const; // Test for nested membership + bool is_member(const IdealLoopTree *l) const; // Test for nested membership // Set loop nesting depth. Accumulate has_call bits. int set_nest( uint depth ); @@ -1086,6 +1086,8 @@ bool split_up( Node *n, Node *blk1, Node *blk2 ); void sink_use( Node *use, Node *post_loop ); Node *place_near_use( Node *useblock ) const; + Node* try_move_store_before_loop(Node* n, Node *n_ctrl); + void try_move_store_after_loop(Node* n); bool _created_loop_node; public: diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/loopopts.cpp --- a/hotspot/src/share/vm/opto/loopopts.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/loopopts.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -653,6 +653,209 @@ return iff->in(1); } +#ifdef ASSERT +static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) { + for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) { + Node* u = m->fast_out(i); + if (u->is_CFG()) { + if (u->Opcode() == Op_NeverBranch) { + u = ((NeverBranchNode*)u)->proj_out(0); + enqueue_cfg_uses(u, wq); + } else { + wq.push(u); + } + } + } +} +#endif + +// Try moving a store out of a loop, right before the loop +Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) { + // Store has to be first in the loop body + IdealLoopTree *n_loop = get_loop(n_ctrl); + if (n->is_Store() && n_loop != _ltree_root && n_loop->is_loop()) { + assert(n->in(0), "store should have control set"); + Node* address = n->in(MemNode::Address); + Node* value = n->in(MemNode::ValueIn); + Node* mem = n->in(MemNode::Memory); + IdealLoopTree* address_loop = get_loop(get_ctrl(address)); + IdealLoopTree* value_loop = get_loop(get_ctrl(value)); + + // - address and value must be loop invariant + // - memory must be a memory Phi for the loop + // - Store must be the only store on this memory slice in the + // loop: if there's another store following this one then value + // written at iteration i by the second store could be overwritten + // at iteration i+n by the first store: it's not safe to move the + // first store out of the loop + // - nothing must observe the Phi memory: it guarantees no read + // before the store and no early exit out of the loop + // With those conditions, we are also guaranteed the store post + // dominates the loop head. Otherwise there would be extra Phi + // involved between the loop's Phi and the store. + + if (!n_loop->is_member(address_loop) && + !n_loop->is_member(value_loop) && + mem->is_Phi() && mem->in(0) == n_loop->_head && + mem->outcnt() == 1 && + mem->in(LoopNode::LoopBackControl) == n) { + +#ifdef ASSERT + // Verify that store's control does post dominate loop entry and + // that there's no early exit of the loop before the store. + bool ctrl_ok = false; + { + // Follow control from loop head until n, we exit the loop or + // we reach the tail + ResourceMark rm; + Unique_Node_List wq; + wq.push(n_loop->_head); + assert(n_loop->_tail != NULL, "need a tail"); + for (uint next = 0; next < wq.size(); ++next) { + Node *m = wq.at(next); + if (m == n->in(0)) { + ctrl_ok = true; + continue; + } + assert(!has_ctrl(m), "should be CFG"); + if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) { + ctrl_ok = false; + break; + } + enqueue_cfg_uses(m, wq); + } + } + assert(ctrl_ok, "bad control"); +#endif + + // move the Store + _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem); + _igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl)); + _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl)); + // Disconnect the phi now. An empty phi can confuse other + // optimizations in this pass of loop opts. + _igvn.replace_node(mem, mem->in(LoopNode::EntryControl)); + n_loop->_body.yank(mem); + + IdealLoopTree* new_loop = get_loop(n->in(0)); + set_ctrl_and_loop(n, n->in(0)); + + return n; + } + } + return NULL; +} + +// Try moving a store out of a loop, right after the loop +void PhaseIdealLoop::try_move_store_after_loop(Node* n) { + if (n->is_Store()) { + assert(n->in(0), "store should have control set"); + Node *n_ctrl = get_ctrl(n); + IdealLoopTree *n_loop = get_loop(n_ctrl); + // Store must be in a loop + if (n_loop != _ltree_root && !n_loop->_irreducible) { + Node* address = n->in(MemNode::Address); + Node* value = n->in(MemNode::ValueIn); + IdealLoopTree* address_loop = get_loop(get_ctrl(address)); + // address must be loop invariant + if (!n_loop->is_member(address_loop)) { + // Store must be last on this memory slice in the loop and + // nothing in the loop must observe it + Node* phi = NULL; + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* u = n->fast_out(i); + if (has_ctrl(u)) { // control use? + IdealLoopTree *u_loop = get_loop(get_ctrl(u)); + if (!n_loop->is_member(u_loop)) { + continue; + } + if (u->is_Phi() && u->in(0) == n_loop->_head) { + assert(_igvn.type(u) == Type::MEMORY, "bad phi"); + assert(phi == NULL, "already found"); + phi = u; + continue; + } + } + phi = NULL; + break; + } + if (phi != NULL) { + // Nothing in the loop before the store (next iteration) + // must observe the stored value + bool mem_ok = true; + { + ResourceMark rm; + Unique_Node_List wq; + wq.push(phi); + for (uint next = 0; next < wq.size() && mem_ok; ++next) { + Node *m = wq.at(next); + for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) { + Node* u = m->fast_out(i); + if (u->is_Store() || u->is_Phi()) { + if (u != n) { + wq.push(u); + mem_ok = (wq.size() <= 10); + } + } else { + mem_ok = false; + break; + } + } + } + } + if (mem_ok) { + // Move the Store out of the loop creating clones along + // all paths out of the loop that observe the stored value + _igvn.rehash_node_delayed(phi); + int count = phi->replace_edge(n, n->in(MemNode::Memory)); + assert(count > 0, "inconsistent phi"); + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* u = n->fast_out(i); + Node* c = get_ctrl(u); + + if (u->is_Phi()) { + c = u->in(0)->in(u->find_edge(n)); + } + IdealLoopTree *u_loop = get_loop(c); + assert (!n_loop->is_member(u_loop), "only the phi should have been a use in the loop"); + while(true) { + Node* next_c = find_non_split_ctrl(idom(c)); + if (n_loop->is_member(get_loop(next_c))) { + break; + } + c = next_c; + } + + Node* st = n->clone(); + st->set_req(0, c); + _igvn.register_new_node_with_optimizer(st); + + set_ctrl(st, c); + IdealLoopTree* new_loop = get_loop(c); + assert(new_loop != n_loop, "should be moved out of loop"); + if (new_loop->_child == NULL) new_loop->_body.push(st); + + _igvn.replace_input_of(u, u->find_edge(n), st); + --imax; + --i; + } + + + assert(n->outcnt() == 0, "all uses should be gone"); + _igvn.replace_input_of(n, MemNode::Memory, C->top()); + // Disconnect the phi now. An empty phi can confuse other + // optimizations in this pass of loop opts.. + if (phi->in(LoopNode::LoopBackControl) == phi) { + _igvn.replace_node(phi, phi->in(LoopNode::EntryControl)); + n_loop->_body.yank(phi); + } + } + } + } + } + } +} + //------------------------------split_if_with_blocks_pre----------------------- // Do the real work in a non-recursive function. Data nodes want to be // cloned in the pre-order so they can feed each other nicely. @@ -683,6 +886,11 @@ Node *n_ctrl = get_ctrl(n); if( !n_ctrl ) return n; // Dead node + Node* res = try_move_store_before_loop(n, n_ctrl); + if (res != NULL) { + return n; + } + // Attempt to remix address expressions for loop invariants Node *m = remix_address_expressions( n ); if( m ) return m; @@ -691,16 +899,18 @@ // Returns the block to clone thru. Node *n_blk = has_local_phi_input( n ); if( !n_blk ) return n; + // Do not clone the trip counter through on a CountedLoop // (messes up the canonical shape). if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n; // Check for having no control input; not pinned. Allow // dominating control. - if( n->in(0) ) { + if (n->in(0)) { Node *dom = idom(n_blk); - if( dom_lca( n->in(0), dom ) != n->in(0) ) + if (dom_lca(n->in(0), dom) != n->in(0)) { return n; + } } // Policy: when is it profitable. You must get more wins than // policy before it is considered profitable. Policy is usually 0, @@ -1029,6 +1239,8 @@ } } + try_move_store_after_loop(n); + // Check for Opaque2's who's loop has disappeared - who's input is in the // same loop nest as their output. Remove 'em, they are no longer useful. if( n_op == Op_Opaque2 && diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/macro.cpp --- a/hotspot/src/share/vm/opto/macro.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/macro.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -324,18 +324,28 @@ return in; } else if (in->is_Call()) { CallNode *call = in->as_Call(); - if (!call->may_modify(tinst, phase)) { - mem = call->in(TypeFunc::Memory); + if (call->may_modify(tinst, phase)) { + assert(call->is_ArrayCopy(), "ArrayCopy is the only call node that doesn't make allocation escape"); + + if (call->as_ArrayCopy()->modifies(offset, offset, phase, false)) { + return in; + } } mem = in->in(TypeFunc::Memory); } else if (in->is_MemBar()) { + if (ArrayCopyNode::may_modify(tinst, in->as_MemBar(), phase)) { + assert(in->in(0)->is_Proj() && in->in(0)->in(0)->is_ArrayCopy(), "should be arraycopy"); + ArrayCopyNode* ac = in->in(0)->in(0)->as_ArrayCopy(); + assert(ac->is_clonebasic(), "Only basic clone is a non escaping clone"); + return ac; + } mem = in->in(TypeFunc::Memory); } else { assert(false, "unexpected projection"); } } else if (mem->is_Store()) { const TypePtr* atype = mem->as_Store()->adr_type(); - int adr_idx = Compile::current()->get_alias_index(atype); + int adr_idx = phase->C->get_alias_index(atype); if (adr_idx == alias_idx) { assert(atype->isa_oopptr(), "address type must be oopptr"); int adr_offset = atype->offset(); @@ -373,7 +383,7 @@ adr = mem->in(3); // Destination array } const TypePtr* atype = adr->bottom_type()->is_ptr(); - int adr_idx = Compile::current()->get_alias_index(atype); + int adr_idx = phase->C->get_alias_index(atype); if (adr_idx == alias_idx) { assert(false, "Object is not scalar replaceable if a LoadStore node access its field"); return NULL; @@ -386,12 +396,63 @@ } } +// Generate loads from source of the arraycopy for fields of +// destination needed at a deoptimization point +Node* PhaseMacroExpand::make_arraycopy_load(ArrayCopyNode* ac, intptr_t offset, Node* ctl, BasicType ft, const Type *ftype, AllocateNode *alloc) { + BasicType bt = ft; + const Type *type = ftype; + if (ft == T_NARROWOOP) { + bt = T_OBJECT; + type = ftype->make_oopptr(); + } + Node* res = NULL; + if (ac->is_clonebasic()) { + Node* base = ac->in(ArrayCopyNode::Src)->in(AddPNode::Base); + Node* adr = _igvn.transform(new AddPNode(base, base, MakeConX(offset))); + const TypePtr* adr_type = _igvn.type(base)->is_ptr()->add_offset(offset); + Node* m = ac->in(TypeFunc::Memory); + while (m->is_MergeMem()) { + m = m->as_MergeMem()->memory_at(C->get_alias_index(adr_type)); + if (m->is_Proj() && m->in(0)->is_MemBar()) { + m = m->in(0)->in(TypeFunc::Memory); + } + } + res = LoadNode::make(_igvn, ctl, m, adr, adr_type, type, bt, MemNode::unordered, LoadNode::Pinned); + } else { + if (ac->modifies(offset, offset, &_igvn, true)) { + assert(ac->in(ArrayCopyNode::Dest) == alloc->result_cast(), "arraycopy destination should be allocation's result"); + uint shift = exact_log2(type2aelembytes(bt)); + Node* diff = _igvn.transform(new SubINode(ac->in(ArrayCopyNode::SrcPos), ac->in(ArrayCopyNode::DestPos))); +#ifdef _LP64 + diff = _igvn.transform(new ConvI2LNode(diff)); +#endif + diff = _igvn.transform(new LShiftXNode(diff, intcon(shift))); + + Node* off = _igvn.transform(new AddXNode(MakeConX(offset), diff)); + Node* base = ac->in(ArrayCopyNode::Src); + Node* adr = _igvn.transform(new AddPNode(base, base, off)); + const TypePtr* adr_type = _igvn.type(base)->is_ptr()->add_offset(offset); + Node* m = ac->in(TypeFunc::Memory); + res = LoadNode::make(_igvn, ctl, m, adr, adr_type, type, bt, MemNode::unordered, LoadNode::Pinned); + } + } + if (res != NULL) { + res = _igvn.transform(res); + if (ftype->isa_narrowoop()) { + // PhaseMacroExpand::scalar_replacement adds DecodeN nodes + res = _igvn.transform(new EncodePNode(res, ftype)); + } + return res; + } + return NULL; +} + // // Given a Memory Phi, compute a value Phi containing the values from stores // on the input paths. -// Note: this function is recursive, its depth is limied by the "level" argument +// Note: this function is recursive, its depth is limited by the "level" argument // Returns the computed Phi, or NULL if it cannot compute it. -Node *PhaseMacroExpand::value_from_mem_phi(Node *mem, BasicType ft, const Type *phi_type, const TypeOopPtr *adr_t, Node *alloc, Node_Stack *value_phis, int level) { +Node *PhaseMacroExpand::value_from_mem_phi(Node *mem, BasicType ft, const Type *phi_type, const TypeOopPtr *adr_t, AllocateNode *alloc, Node_Stack *value_phis, int level) { assert(mem->is_Phi(), "sanity"); int alias_idx = C->get_alias_index(adr_t); int offset = adr_t->offset(); @@ -458,6 +519,12 @@ assert(val->in(0)->is_LoadStore() || val->in(0)->Opcode() == Op_EncodeISOArray, "sanity"); assert(false, "Object is not scalar replaceable if a LoadStore node access its field"); return NULL; + } else if (val->is_ArrayCopy()) { + Node* res = make_arraycopy_load(val->as_ArrayCopy(), offset, val->in(0), ft, phi_type, alloc); + if (res == NULL) { + return NULL; + } + values.at_put(j, res); } else { #ifdef ASSERT val->dump(); @@ -479,7 +546,7 @@ } // Search the last value stored into the object's field. -Node *PhaseMacroExpand::value_from_mem(Node *sfpt_mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc) { +Node *PhaseMacroExpand::value_from_mem(Node *sfpt_mem, Node *sfpt_ctl, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, AllocateNode *alloc) { assert(adr_t->is_known_instance_field(), "instance required"); int instance_id = adr_t->instance_id(); assert((uint)instance_id == alloc->_idx, "wrong allocation"); @@ -538,6 +605,8 @@ } else { done = true; } + } else if (mem->is_ArrayCopy()) { + done = true; } else { assert(false, "unexpected node"); } @@ -562,6 +631,13 @@ value_phis.pop(); } } + } else if (mem->is_ArrayCopy()) { + Node* ctl = mem->in(0); + if (sfpt_ctl->is_Proj() && sfpt_ctl->as_Proj()->is_uncommon_trap_proj(Deoptimization::Reason_none)) { + // pin the loads in the uncommon trap path + ctl = sfpt_ctl; + } + return make_arraycopy_load(mem->as_ArrayCopy(), offset, ctl, ft, ftype, alloc); } } // Something go wrong. @@ -738,6 +814,7 @@ while (safepoints.length() > 0) { SafePointNode* sfpt = safepoints.pop(); Node* mem = sfpt->memory(); + Node* ctl = sfpt->control(); assert(sfpt->jvms() != NULL, "missed JVMS"); // Fields of scalar objs are referenced only at the end // of regular debuginfo at the last (youngest) JVMS. @@ -789,7 +866,7 @@ const TypeOopPtr *field_addr_type = res_type->add_offset(offset)->isa_oopptr(); - Node *field_val = value_from_mem(mem, basic_elem_type, field_type, field_addr_type, alloc); + Node *field_val = value_from_mem(mem, ctl, basic_elem_type, field_type, field_addr_type, alloc); if (field_val == NULL) { // We weren't able to find a value for this field, // give up on eliminating this allocation. diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/macro.hpp --- a/hotspot/src/share/vm/opto/macro.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/macro.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -85,8 +85,8 @@ Node* length, const TypeFunc* slow_call_type, address slow_call_address); - Node *value_from_mem(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc); - Node *value_from_mem_phi(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc, Node_Stack *value_phis, int level); + Node *value_from_mem(Node *mem, Node *ctl, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, AllocateNode *alloc); + Node *value_from_mem_phi(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, AllocateNode *alloc, Node_Stack *value_phis, int level); bool eliminate_boxing_node(CallStaticJavaNode *boxing); bool eliminate_allocate_node(AllocateNode *alloc); @@ -200,6 +200,8 @@ Node* old_eden_top, Node* new_eden_top, Node* length); + Node* make_arraycopy_load(ArrayCopyNode* ac, intptr_t offset, Node* ctl, BasicType ft, const Type *ftype, AllocateNode *alloc); + public: PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn), _has_locks(false) { _igvn.set_delay_transform(true); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/memnode.cpp --- a/hotspot/src/share/vm/opto/memnode.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/memnode.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -108,37 +108,6 @@ #endif -static bool membar_for_arraycopy_helper(const TypeOopPtr *t_oop, Node* n, PhaseTransform *phase) { - if (n->is_Proj()) { - n = n->in(0); - if (n->is_Call() && n->as_Call()->may_modify(t_oop, phase)) { - return true; - } - } - return false; -} - -static bool membar_for_arraycopy(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase) { - Node* mem = mb->in(TypeFunc::Memory); - - if (mem->is_MergeMem()) { - Node* n = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); - if (membar_for_arraycopy_helper(t_oop, n, phase)) { - return true; - } else if (n->is_Phi()) { - for (uint i = 1; i < n->req(); i++) { - if (n->in(i) != NULL) { - if (membar_for_arraycopy_helper(t_oop, n->in(i), phase)) { - return true; - } - } - } - } - } - - return false; -} - Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypeOopPtr *t_oop, Node *load, PhaseGVN *phase) { assert((t_oop != NULL), "sanity"); bool is_instance = t_oop->is_known_instance_field(); @@ -183,7 +152,7 @@ } } } else if (proj_in->is_MemBar()) { - if (membar_for_arraycopy(t_oop, proj_in->as_MemBar(), phase)) { + if (ArrayCopyNode::may_modify(t_oop, proj_in->as_MemBar(), phase)) { break; } result = proj_in->in(TypeFunc::Memory); @@ -545,35 +514,12 @@ Node* dest = ac->in(ArrayCopyNode::Dest); if (dest == ld_base) { - Node* src_pos = ac->in(ArrayCopyNode::SrcPos); - Node* dest_pos = ac->in(ArrayCopyNode::DestPos); - Node* len = ac->in(ArrayCopyNode::Length); - - const TypeInt *dest_pos_t = phase->type(dest_pos)->isa_int(); const TypeX *ld_offs_t = phase->type(ld_offs)->isa_intptr_t(); - const TypeInt *len_t = phase->type(len)->isa_int(); - const TypeAryPtr* ary_t = phase->type(dest)->isa_aryptr(); - - if (dest_pos_t != NULL && ld_offs_t != NULL && len_t != NULL && ary_t != NULL) { - BasicType ary_elem = ary_t->klass()->as_array_klass()->element_type()->basic_type(); - uint header = arrayOopDesc::base_offset_in_bytes(ary_elem); - uint elemsize = type2aelembytes(ary_elem); - - intptr_t dest_pos_plus_len_lo = (((intptr_t)dest_pos_t->_lo) + len_t->_lo) * elemsize + header; - intptr_t dest_pos_plus_len_hi = (((intptr_t)dest_pos_t->_hi) + len_t->_hi) * elemsize + header; - intptr_t dest_pos_lo = ((intptr_t)dest_pos_t->_lo) * elemsize + header; - intptr_t dest_pos_hi = ((intptr_t)dest_pos_t->_hi) * elemsize + header; - - if (can_see_stored_value) { - if (ld_offs_t->_lo >= dest_pos_hi && ld_offs_t->_hi < dest_pos_plus_len_lo) { - return ac; - } - } else { - if (ld_offs_t->_hi < dest_pos_lo || ld_offs_t->_lo >= dest_pos_plus_len_hi) { - mem = ac->in(TypeFunc::Memory); - } - return ac; - } + if (ac->modifies(ld_offs_t->_lo, ld_offs_t->_hi, phase, can_see_stored_value)) { + return ac; + } + if (!can_see_stored_value) { + mem = ac->in(TypeFunc::Memory); } } } @@ -703,7 +649,7 @@ continue; // (a) advance through independent call memory } } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) { - if (membar_for_arraycopy(addr_t, mem->in(0)->as_MemBar(), phase)) { + if (ArrayCopyNode::may_modify(addr_t, mem->in(0)->as_MemBar(), phase)) { break; } mem = mem->in(0)->in(TypeFunc::Memory); @@ -883,18 +829,17 @@ // Is the value loaded previously stored by an arraycopy? If so return // a load node that reads from the source array so we may be able to // optimize out the ArrayCopy node later. -Node* MemNode::can_see_arraycopy_value(Node* st, PhaseTransform* phase) const { +Node* LoadNode::can_see_arraycopy_value(Node* st, PhaseTransform* phase) const { Node* ld_adr = in(MemNode::Address); intptr_t ld_off = 0; AllocateNode* ld_alloc = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off); Node* ac = find_previous_arraycopy(phase, ld_alloc, st, true); if (ac != NULL) { assert(ac->is_ArrayCopy(), "what kind of node can this be?"); - assert(is_Load(), "only for loads"); - + + Node* ld = clone(); if (ac->as_ArrayCopy()->is_clonebasic()) { assert(ld_alloc != NULL, "need an alloc"); - Node* ld = clone(); Node* addp = in(MemNode::Address)->clone(); assert(addp->is_AddP(), "address must be addp"); assert(addp->in(AddPNode::Base) == ac->in(ArrayCopyNode::Dest)->in(AddPNode::Base), "strange pattern"); @@ -906,9 +851,7 @@ assert(ld_alloc->in(0) != NULL, "alloc must have control"); ld->set_req(0, ld_alloc->in(0)); } - return ld; } else { - Node* ld = clone(); Node* addp = in(MemNode::Address)->clone(); assert(addp->in(AddPNode::Base) == addp->in(AddPNode::Address), "should be"); addp->set_req(AddPNode::Base, ac->in(ArrayCopyNode::Src)); @@ -933,8 +876,10 @@ assert(ac->in(0) != NULL, "alloc must have control"); ld->set_req(0, ac->in(0)); } - return ld; } + // load depends on the tests that validate the arraycopy + ld->as_Load()->_depends_only_on_test = Pinned; + return ld; } return NULL; } @@ -2426,40 +2371,47 @@ Node* mem = in(MemNode::Memory); Node* address = in(MemNode::Address); - // Back-to-back stores to same address? Fold em up. Generally // unsafe if I have intervening uses... Also disallowed for StoreCM // since they must follow each StoreP operation. Redundant StoreCMs // are eliminated just before matching in final_graph_reshape. - if (mem->is_Store() && mem->in(MemNode::Address)->eqv_uncast(address) && - mem->Opcode() != Op_StoreCM) { - // Looking at a dead closed cycle of memory? - assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal"); - - assert(Opcode() == mem->Opcode() || - phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw, - "no mismatched stores, except on raw memory"); - - if (mem->outcnt() == 1 && // check for intervening uses - mem->as_Store()->memory_size() <= this->memory_size()) { - // If anybody other than 'this' uses 'mem', we cannot fold 'mem' away. - // For example, 'mem' might be the final state at a conditional return. - // Or, 'mem' might be used by some node which is live at the same time - // 'this' is live, which might be unschedulable. So, require exactly - // ONE user, the 'this' store, until such time as we clone 'mem' for - // each of 'mem's uses (thus making the exactly-1-user-rule hold true). - if (can_reshape) { // (%%% is this an anachronism?) - set_req_X(MemNode::Memory, mem->in(MemNode::Memory), - phase->is_IterGVN()); - } else { - // It's OK to do this in the parser, since DU info is always accurate, - // and the parser always refers to nodes via SafePointNode maps. - set_req(MemNode::Memory, mem->in(MemNode::Memory)); + { + Node* st = mem; + // If Store 'st' has more than one use, we cannot fold 'st' away. + // For example, 'st' might be the final state at a conditional + // return. Or, 'st' might be used by some node which is live at + // the same time 'st' is live, which might be unschedulable. So, + // require exactly ONE user until such time as we clone 'mem' for + // each of 'mem's uses (thus making the exactly-1-user-rule hold + // true). + while (st->is_Store() && st->outcnt() == 1 && st->Opcode() != Op_StoreCM) { + // Looking at a dead closed cycle of memory? + assert(st != st->in(MemNode::Memory), "dead loop in StoreNode::Ideal"); + assert(Opcode() == st->Opcode() || + st->Opcode() == Op_StoreVector || + Opcode() == Op_StoreVector || + phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw || + (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI), // expanded ClearArrayNode + err_msg_res("no mismatched stores, except on raw memory: %s %s", NodeClassNames[Opcode()], NodeClassNames[st->Opcode()])); + + if (st->in(MemNode::Address)->eqv_uncast(address) && + st->as_Store()->memory_size() <= this->memory_size()) { + Node* use = st->raw_out(0); + phase->igvn_rehash_node_delayed(use); + if (can_reshape) { + use->set_req_X(MemNode::Memory, st->in(MemNode::Memory), phase->is_IterGVN()); + } else { + // It's OK to do this in the parser, since DU info is always accurate, + // and the parser always refers to nodes via SafePointNode maps. + use->set_req(MemNode::Memory, st->in(MemNode::Memory)); + } + return this; } - return this; + st = st->in(MemNode::Memory); } } + // Capture an unaliased, unconditional, simple store into an initializer. // Or, if it is independent of the allocation, hoist it above the allocation. if (ReduceFieldZeroing && /*can_reshape &&*/ diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/opto/memnode.hpp --- a/hotspot/src/share/vm/opto/memnode.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/opto/memnode.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -126,7 +126,6 @@ // Can this node (load or store) accurately see a stored value in // the given memory state? (The state may or may not be in(Memory).) Node* can_see_stored_value(Node* st, PhaseTransform* phase) const; - Node* can_see_arraycopy_value(Node* st, PhaseTransform* phase) const; #ifndef PRODUCT static void dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st); @@ -252,6 +251,9 @@ protected: const Type* load_array_final_field(const TypeKlassPtr *tkls, ciKlass* klass) const; + + Node* can_see_arraycopy_value(Node* st, PhaseTransform* phase) const; + // depends_only_on_test is almost always true, and needs to be almost always // true to enable key hoisting & commoning optimizations. However, for the // special case of RawPtr loads from TLS top & end, and other loads performed by diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/prims/jvmtiRedefineClasses.cpp --- a/hotspot/src/share/vm/prims/jvmtiRedefineClasses.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/prims/jvmtiRedefineClasses.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -3771,7 +3771,7 @@ // Deoptimize all activations depending on marked nmethods Deoptimization::deoptimize_dependents(); - // Make the dependent methods not entrant (in VM_Deoptimize they are made zombies) + // Make the dependent methods not entrant CodeCache::make_marked_nmethods_not_entrant(); // From now on we know that the dependency information is complete diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/arguments.cpp --- a/hotspot/src/share/vm/runtime/arguments.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/arguments.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -81,8 +81,6 @@ bool Arguments::_has_profile = false; size_t Arguments::_conservative_max_heap_alignment = 0; size_t Arguments::_min_heap_size = 0; -uintx Arguments::_min_heap_free_ratio = 0; -uintx Arguments::_max_heap_free_ratio = 0; Arguments::Mode Arguments::_mode = _mixed; bool Arguments::_java_compiler = false; bool Arguments::_xdebug_mode = false; @@ -1614,11 +1612,9 @@ // unless the user actually sets these flags. if (FLAG_IS_DEFAULT(MinHeapFreeRatio)) { FLAG_SET_DEFAULT(MinHeapFreeRatio, 0); - _min_heap_free_ratio = MinHeapFreeRatio; } if (FLAG_IS_DEFAULT(MaxHeapFreeRatio)) { FLAG_SET_DEFAULT(MaxHeapFreeRatio, 100); - _max_heap_free_ratio = MaxHeapFreeRatio; } } @@ -3978,15 +3974,6 @@ return JNI_OK; } -// Any custom code post the 'CommandLineFlagConstraint::AfterErgo' constraint check -// can be done here. We pass a flag that specifies whether -// the check passed successfully -void Arguments::post_after_ergo_constraint_check(bool check_passed) { - // This does not set the flag itself, but stores the value in a safe place for later usage. - _min_heap_free_ratio = MinHeapFreeRatio; - _max_heap_free_ratio = MaxHeapFreeRatio; -} - int Arguments::PropertyList_count(SystemProperty* pl) { int count = 0; while(pl != NULL) { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/arguments.hpp --- a/hotspot/src/share/vm/runtime/arguments.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/arguments.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -288,10 +288,6 @@ static uintx _min_heap_size; - // Used to store original flag values - static uintx _min_heap_free_ratio; - static uintx _max_heap_free_ratio; - // -Xrun arguments static AgentLibraryList _libraryList; static void add_init_library(const char* name, char* options) @@ -463,8 +459,6 @@ static jint apply_ergo(); // Adjusts the arguments after the OS have adjusted the arguments static jint adjust_after_os(); - // Set any arguments that need to be set after the 'CommandLineFlagConstraint::AfterErgo' constraint check - static void post_after_ergo_constraint_check(bool check_passed); static void set_gc_specific_flags(); static inline bool gc_selected(); // whether a gc has been selected @@ -526,10 +520,6 @@ static size_t min_heap_size() { return _min_heap_size; } static void set_min_heap_size(size_t v) { _min_heap_size = v; } - // Returns the original values of -XX:MinHeapFreeRatio and -XX:MaxHeapFreeRatio - static uintx min_heap_free_ratio() { return _min_heap_free_ratio; } - static uintx max_heap_free_ratio() { return _max_heap_free_ratio; } - // -Xrun static AgentLibrary* libraries() { return _libraryList.first(); } static bool init_libraries_at_startup() { return !_libraryList.is_empty(); } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/commandLineFlagConstraintList.cpp --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintList.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintList.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -45,8 +45,8 @@ _constraint=func; } - Flag::Error apply_bool(bool* value, bool verbose) { - return _constraint(verbose, value); + Flag::Error apply_bool(bool value, bool verbose) { + return _constraint(value, verbose); } }; @@ -61,8 +61,8 @@ _constraint=func; } - Flag::Error apply_int(int* value, bool verbose) { - return _constraint(verbose, value); + Flag::Error apply_int(int value, bool verbose) { + return _constraint(value, verbose); } }; @@ -77,8 +77,8 @@ _constraint=func; } - Flag::Error apply_intx(intx* value, bool verbose) { - return _constraint(verbose, value); + Flag::Error apply_intx(intx value, bool verbose) { + return _constraint(value, verbose); } }; @@ -93,8 +93,8 @@ _constraint=func; } - Flag::Error apply_uint(uint* value, bool verbose) { - return _constraint(verbose, value); + Flag::Error apply_uint(uint value, bool verbose) { + return _constraint(value, verbose); } }; @@ -109,8 +109,8 @@ _constraint=func; } - Flag::Error apply_uintx(uintx* value, bool verbose) { - return _constraint(verbose, value); + Flag::Error apply_uintx(uintx value, bool verbose) { + return _constraint(value, verbose); } }; @@ -125,8 +125,8 @@ _constraint=func; } - Flag::Error apply_uint64_t(uint64_t* value, bool verbose) { - return _constraint(verbose, value); + Flag::Error apply_uint64_t(uint64_t value, bool verbose) { + return _constraint(value, verbose); } }; @@ -141,8 +141,8 @@ _constraint=func; } - Flag::Error apply_size_t(size_t* value, bool verbose) { - return _constraint(verbose, value); + Flag::Error apply_size_t(size_t value, bool verbose) { + return _constraint(value, verbose); } }; @@ -157,8 +157,8 @@ _constraint=func; } - Flag::Error apply_double(double* value, bool verbose) { - return _constraint(verbose, value); + Flag::Error apply_double(double value, bool verbose) { + return _constraint(value, verbose); } }; @@ -226,7 +226,6 @@ // Check the ranges of all flags that have them or print them out and exit if requested void CommandLineFlagConstraintList::init(void) { - _constraints = new (ResourceObj::C_HEAP, mtInternal) GrowableArray(INITIAL_CONSTRAINTS_SIZE, true); emit_constraint_no(NULL RUNTIME_FLAGS(EMIT_CONSTRAINT_DEVELOPER_FLAG, @@ -306,40 +305,6 @@ // Check constraints for specific constraint type. bool CommandLineFlagConstraintList::check_constraints(CommandLineFlagConstraint::ConstraintType type) { -//#define PRINT_CONSTRAINTS_SIZES -#ifdef PRINT_CONSTRAINTS_SIZES - { - size_t size_constraints = sizeof(CommandLineFlagConstraintList); - for (int i=0; iname(); - Flag* flag = Flag::find_flag(name, strlen(name), true, true); - if (flag->is_bool()) { - size_constraints += sizeof(CommandLineFlagConstraintFunc_bool); - size_constraints += sizeof(CommandLineFlagConstraint*); - } else if (flag->is_intx()) { - size_constraints += sizeof(CommandLineFlagConstraintFunc_intx); - size_constraints += sizeof(CommandLineFlagConstraint*); - } else if (flag->is_uintx()) { - size_constraints += sizeof(CommandLineFlagConstraintFunc_uintx); - size_constraints += sizeof(CommandLineFlagConstraint*); - } else if (flag->is_uint64_t()) { - size_constraints += sizeof(CommandLineFlagConstraintFunc_uint64_t); - size_constraints += sizeof(CommandLineFlagConstraint*); - } else if (flag->is_size_t()) { - size_constraints += sizeof(CommandLineFlagConstraintFunc_size_t); - size_constraints += sizeof(CommandLineFlagConstraint*); - } else if (flag->is_double()) { - size_constraints += sizeof(CommandLineFlagConstraintFunc_double); - size_constraints += sizeof(CommandLineFlagConstraint*); - } - } - fprintf(stderr, "Size of %d constraints: " SIZE_FORMAT " bytes\n", - length(), size_constraints); - } -#endif // PRINT_CONSTRAINTS_SIZES - // Skip if we already checked. if (type < _validating_type) { return true; @@ -350,27 +315,36 @@ for (int i=0; itype()) continue; - const char*name = constraint->name(); + const char* name = constraint->name(); Flag* flag = Flag::find_flag(name, strlen(name), true, true); + // We must check for NULL here as lp64_product flags on 32 bit architecture + // can generate constraint check (despite that they are declared as constants), + // but they will not be returned by Flag::find_flag() if (flag != NULL) { if (flag->is_bool()) { bool value = flag->get_bool(); - if (constraint->apply_bool(&value, true) != Flag::SUCCESS) status = false; + if (constraint->apply_bool(value, true) != Flag::SUCCESS) status = false; + } else if (flag->is_int()) { + int value = flag->get_int(); + if (constraint->apply_int(value, true) != Flag::SUCCESS) status = false; + } else if (flag->is_uint()) { + uint value = flag->get_uint(); + if (constraint->apply_uint(value, true) != Flag::SUCCESS) status = false; } else if (flag->is_intx()) { intx value = flag->get_intx(); - if (constraint->apply_intx(&value, true) != Flag::SUCCESS) status = false; + if (constraint->apply_intx(value, true) != Flag::SUCCESS) status = false; } else if (flag->is_uintx()) { uintx value = flag->get_uintx(); - if (constraint->apply_uintx(&value, true) != Flag::SUCCESS) status = false; + if (constraint->apply_uintx(value, true) != Flag::SUCCESS) status = false; } else if (flag->is_uint64_t()) { uint64_t value = flag->get_uint64_t(); - if (constraint->apply_uint64_t(&value, true) != Flag::SUCCESS) status = false; + if (constraint->apply_uint64_t(value, true) != Flag::SUCCESS) status = false; } else if (flag->is_size_t()) { size_t value = flag->get_size_t(); - if (constraint->apply_size_t(&value, true) != Flag::SUCCESS) status = false; + if (constraint->apply_size_t(value, true) != Flag::SUCCESS) status = false; } else if (flag->is_double()) { double value = flag->get_double(); - if (constraint->apply_double(&value, true) != Flag::SUCCESS) status = false; + if (constraint->apply_double(value, true) != Flag::SUCCESS) status = false; } } } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/commandLineFlagConstraintList.hpp --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintList.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintList.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -39,14 +39,14 @@ * "runtime/commandLineFlagConstraintsRuntime.hpp" for the functions themselves. */ -typedef Flag::Error (*CommandLineFlagConstraintFunc_bool)(bool verbose, bool* value); -typedef Flag::Error (*CommandLineFlagConstraintFunc_int)(bool verbose, int* value); -typedef Flag::Error (*CommandLineFlagConstraintFunc_intx)(bool verbose, intx* value); -typedef Flag::Error (*CommandLineFlagConstraintFunc_uint)(bool verbose, uint* value); -typedef Flag::Error (*CommandLineFlagConstraintFunc_uintx)(bool verbose, uintx* value); -typedef Flag::Error (*CommandLineFlagConstraintFunc_uint64_t)(bool verbose, uint64_t* value); -typedef Flag::Error (*CommandLineFlagConstraintFunc_size_t)(bool verbose, size_t* value); -typedef Flag::Error (*CommandLineFlagConstraintFunc_double)(bool verbose, double* value); +typedef Flag::Error (*CommandLineFlagConstraintFunc_bool)(bool value, bool verbose); +typedef Flag::Error (*CommandLineFlagConstraintFunc_int)(int value, bool verbose); +typedef Flag::Error (*CommandLineFlagConstraintFunc_intx)(intx value, bool verbose); +typedef Flag::Error (*CommandLineFlagConstraintFunc_uint)(uint value, bool verbose); +typedef Flag::Error (*CommandLineFlagConstraintFunc_uintx)(uintx value, bool verbose); +typedef Flag::Error (*CommandLineFlagConstraintFunc_uint64_t)(uint64_t value, bool verbose); +typedef Flag::Error (*CommandLineFlagConstraintFunc_size_t)(size_t value, bool verbose); +typedef Flag::Error (*CommandLineFlagConstraintFunc_double)(double value, bool verbose); class CommandLineFlagConstraint : public CHeapObj { public: @@ -70,14 +70,14 @@ ~CommandLineFlagConstraint() {}; const char* name() const { return _name; } ConstraintType type() const { return _validate_type; } - virtual Flag::Error apply_bool(bool* value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; - virtual Flag::Error apply_int(int* value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; - virtual Flag::Error apply_intx(intx* value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; - virtual Flag::Error apply_uint(uint* value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; - virtual Flag::Error apply_uintx(uintx* value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; - virtual Flag::Error apply_uint64_t(uint64_t* value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; - virtual Flag::Error apply_size_t(size_t* value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; - virtual Flag::Error apply_double(double* value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; + virtual Flag::Error apply_bool(bool value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; + virtual Flag::Error apply_int(int value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; + virtual Flag::Error apply_intx(intx value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; + virtual Flag::Error apply_uint(uint value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; + virtual Flag::Error apply_uintx(uintx value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; + virtual Flag::Error apply_uint64_t(uint64_t value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; + virtual Flag::Error apply_size_t(size_t value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; + virtual Flag::Error apply_double(double value, bool verbose = true) { ShouldNotReachHere(); return Flag::ERR_OTHER; }; }; class CommandLineFlagConstraintList : public AllStatic { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -25,17 +25,16 @@ #include "precompiled.hpp" #include "runtime/arguments.hpp" #include "runtime/commandLineFlagConstraintsCompiler.hpp" +#include "runtime/commandLineFlagRangeList.hpp" #include "runtime/globals.hpp" #include "utilities/defaultStream.hpp" -Flag::Error AliasLevelConstraintFunc(bool verbose, intx* value) { - if ((*value <= 1) && (Arguments::mode() == Arguments::_comp)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "AliasLevel (" INTX_FORMAT ") is not compatible " - "with -Xcomp \n", - *value); - } +Flag::Error AliasLevelConstraintFunc(intx value, bool verbose) { + if ((value <= 1) && (Arguments::mode() == Arguments::_comp)) { + CommandLineError::print(verbose, + "AliasLevel (" INTX_FORMAT ") is not " + "compatible with -Xcomp \n", + value); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; @@ -57,7 +56,7 @@ * 'TieredStopAtLevel = CompLevel_full_optimization' (the default value). As a result, * the minimum number of compiler threads is 2. */ -Flag::Error CICompilerCountConstraintFunc(bool verbose, intx* value) { +Flag::Error CICompilerCountConstraintFunc(intx value, bool verbose) { int min_number_of_compiler_threads = 0; #if !defined(COMPILER1) && !defined(COMPILER2) && !defined(SHARK) // case 1 @@ -75,12 +74,11 @@ // min_number_of_compiler_threads to exceed CI_COMPILER_COUNT. min_number_of_compiler_threads = MIN2(min_number_of_compiler_threads, CI_COMPILER_COUNT); - if (*value < (intx)min_number_of_compiler_threads) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "CICompilerCount=" INTX_FORMAT " must be at least %d \n", - *value, min_number_of_compiler_threads); - } + if (value < (intx)min_number_of_compiler_threads) { + CommandLineError::print(verbose, + "CICompilerCount (" INTX_FORMAT ") must be " + "at least %d \n", + value, min_number_of_compiler_threads); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -34,8 +34,8 @@ * an appropriate error value. */ -Flag::Error AliasLevelConstraintFunc(bool verbose, intx* value); +Flag::Error AliasLevelConstraintFunc(intx value, bool verbose); -Flag::Error CICompilerCountConstraintFunc(bool verbose, intx* value); +Flag::Error CICompilerCountConstraintFunc(intx value, bool verbose); #endif /* SHARE_VM_RUNTIME_COMMANDLINEFLAGCONSTRAINTSCOMPILER_HPP */ diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/commandLineFlagConstraintsGC.cpp --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsGC.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsGC.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "runtime/arguments.hpp" #include "runtime/commandLineFlagConstraintsGC.hpp" +#include "runtime/commandLineFlagRangeList.hpp" #include "runtime/globals.hpp" #include "utilities/defaultStream.hpp" @@ -41,97 +42,85 @@ #include "opto/c2_globals.hpp" #endif // COMPILER2 -static Flag::Error MinPLABSizeBounds(const char* name, bool verbose, size_t* value) { +static Flag::Error MinPLABSizeBounds(const char* name, size_t value, bool verbose) { #if INCLUDE_ALL_GCS - if ((UseConcMarkSweepGC || UseG1GC) && (*value < PLAB::min_size())) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "%s (" SIZE_FORMAT ") must be greater than " - "ergonomic PLAB minimum size (" SIZE_FORMAT ")\n", - name, *value, PLAB::min_size()); - } + if ((UseConcMarkSweepGC || UseG1GC) && (value < PLAB::min_size())) { + CommandLineError::print(verbose, + "%s (" SIZE_FORMAT ") must be " + "greater than or equal to ergonomic PLAB minimum size (" SIZE_FORMAT ")\n", + name, value, PLAB::min_size()); return Flag::VIOLATES_CONSTRAINT; } #endif // INCLUDE_ALL_GCS return Flag::SUCCESS; } -static Flag::Error MaxPLABSizeBounds(const char* name, bool verbose, size_t* value) { +static Flag::Error MaxPLABSizeBounds(const char* name, size_t value, bool verbose) { #if INCLUDE_ALL_GCS - if ((UseConcMarkSweepGC || UseG1GC) && (*value > PLAB::max_size())) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "%s (" SIZE_FORMAT ") must be less than " - "ergonomic PLAB maximum size (" SIZE_FORMAT ")\n", - name, *value, PLAB::max_size()); - } + if ((UseConcMarkSweepGC || UseG1GC) && (value > PLAB::max_size())) { + CommandLineError::print(verbose, + "%s (" SIZE_FORMAT ") must be " + "less than ergonomic PLAB maximum size (" SIZE_FORMAT ")\n", + name, value, PLAB::min_size()); return Flag::VIOLATES_CONSTRAINT; } #endif // INCLUDE_ALL_GCS return Flag::SUCCESS; } -static Flag::Error MinMaxPLABSizeBounds(const char* name, bool verbose, size_t* value) { - if (MinPLABSizeBounds(name, verbose, value) == Flag::SUCCESS) { - return MaxPLABSizeBounds(name, verbose, value); +static Flag::Error MinMaxPLABSizeBounds(const char* name, size_t value, bool verbose) { + if (MinPLABSizeBounds(name, value, verbose) == Flag::SUCCESS) { + return MaxPLABSizeBounds(name, value, verbose); } return Flag::VIOLATES_CONSTRAINT; } -Flag::Error YoungPLABSizeConstraintFunc(bool verbose, size_t* value) { - return MinMaxPLABSizeBounds("YoungPLABSize", verbose, value); +Flag::Error YoungPLABSizeConstraintFunc(size_t value, bool verbose) { + return MinMaxPLABSizeBounds("YoungPLABSize", value, verbose); } -Flag::Error MinHeapFreeRatioConstraintFunc(bool verbose, uintx* value) { - if (*value > MaxHeapFreeRatio) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "MinHeapFreeRatio (" UINTX_FORMAT ") must be less than or " - "equal to MaxHeapFreeRatio (" UINTX_FORMAT ")\n", - *value, MaxHeapFreeRatio); - } +Flag::Error MinHeapFreeRatioConstraintFunc(uintx value, bool verbose) { + if (value > MaxHeapFreeRatio) { + CommandLineError::print(verbose, + "MinHeapFreeRatio (" UINTX_FORMAT ") must be " + "less than or equal to MaxHeapFreeRatio (" UINTX_FORMAT ")\n", + value, MaxHeapFreeRatio); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; } } -Flag::Error MaxHeapFreeRatioConstraintFunc(bool verbose, uintx* value) { - if (*value < MinHeapFreeRatio) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "MaxHeapFreeRatio (" UINTX_FORMAT ") must be greater than or " - "equal to MinHeapFreeRatio (" UINTX_FORMAT ")\n", - *value, MinHeapFreeRatio); - } +Flag::Error MaxHeapFreeRatioConstraintFunc(uintx value, bool verbose) { + if (value < MinHeapFreeRatio) { + CommandLineError::print(verbose, + "MaxHeapFreeRatio (" UINTX_FORMAT ") must be " + "greater than or equal to MinHeapFreeRatio (" UINTX_FORMAT ")\n", + value, MinHeapFreeRatio); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; } } -Flag::Error MinMetaspaceFreeRatioConstraintFunc(bool verbose, uintx* value) { - if (*value > MaxMetaspaceFreeRatio) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "MinMetaspaceFreeRatio (" UINTX_FORMAT ") must be less than or " - "equal to MaxMetaspaceFreeRatio (" UINTX_FORMAT ")\n", - *value, MaxMetaspaceFreeRatio); - } +Flag::Error MinMetaspaceFreeRatioConstraintFunc(uintx value, bool verbose) { + if (value > MaxMetaspaceFreeRatio) { + CommandLineError::print(verbose, + "MinMetaspaceFreeRatio (" UINTX_FORMAT ") must be " + "less than or equal to MaxMetaspaceFreeRatio (" UINTX_FORMAT ")\n", + value, MaxMetaspaceFreeRatio); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; } } -Flag::Error MaxMetaspaceFreeRatioConstraintFunc(bool verbose, uintx* value) { - if (*value < MinMetaspaceFreeRatio) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "MaxMetaspaceFreeRatio (" UINTX_FORMAT ") must be greater than or " - "equal to MinMetaspaceFreeRatio (" UINTX_FORMAT ")\n", - *value, MinMetaspaceFreeRatio); - } +Flag::Error MaxMetaspaceFreeRatioConstraintFunc(uintx value, bool verbose) { + if (value < MinMetaspaceFreeRatio) { + CommandLineError::print(verbose, + "MaxMetaspaceFreeRatio (" UINTX_FORMAT ") must be " + "greater than or equal to MinMetaspaceFreeRatio (" UINTX_FORMAT ")\n", + value, MinMetaspaceFreeRatio); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; @@ -147,32 +136,28 @@ } \ } -Flag::Error InitialTenuringThresholdConstraintFunc(bool verbose, uintx* value) { - UseConcMarkSweepGCWorkaroundIfNeeded(*value, MaxTenuringThreshold); +Flag::Error InitialTenuringThresholdConstraintFunc(uintx value, bool verbose) { + UseConcMarkSweepGCWorkaroundIfNeeded(value, MaxTenuringThreshold); - if (*value > MaxTenuringThreshold) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "InitialTenuringThreshold (" UINTX_FORMAT ") must be less than or " - "equal to MaxTenuringThreshold (" UINTX_FORMAT ")\n", - *value, MaxTenuringThreshold); - } + if (value > MaxTenuringThreshold) { + CommandLineError::print(verbose, + "InitialTenuringThreshold (" UINTX_FORMAT ") must be " + "less than or equal to MaxTenuringThreshold (" UINTX_FORMAT ")\n", + value, MaxTenuringThreshold); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; } } -Flag::Error MaxTenuringThresholdConstraintFunc(bool verbose, uintx* value) { - UseConcMarkSweepGCWorkaroundIfNeeded(InitialTenuringThreshold, *value); +Flag::Error MaxTenuringThresholdConstraintFunc(uintx value, bool verbose) { + UseConcMarkSweepGCWorkaroundIfNeeded(InitialTenuringThreshold, value); - if (*value < InitialTenuringThreshold) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "MaxTenuringThreshold (" UINTX_FORMAT ") must be greater than or " - "equal to InitialTenuringThreshold (" UINTX_FORMAT ")\n", - *value, InitialTenuringThreshold); - } + if (value < InitialTenuringThreshold) { + CommandLineError::print(verbose, + "MaxTenuringThreshold (" UINTX_FORMAT ") must be " + "greater than or equal to InitialTenuringThreshold (" UINTX_FORMAT ")\n", + value, InitialTenuringThreshold); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; @@ -180,28 +165,24 @@ } #if INCLUDE_ALL_GCS -Flag::Error G1NewSizePercentConstraintFunc(bool verbose, uintx* value) { - if (*value > G1MaxNewSizePercent) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "G1NewSizePercent (" UINTX_FORMAT ") must be less than or " - "equal to G1MaxNewSizePercent (" UINTX_FORMAT ")\n", - *value, G1MaxNewSizePercent); - } +Flag::Error G1NewSizePercentConstraintFunc(uintx value, bool verbose) { + if (value > G1MaxNewSizePercent) { + CommandLineError::print(verbose, + "G1NewSizePercent (" UINTX_FORMAT ") must be " + "less than or equal to G1MaxNewSizePercent (" UINTX_FORMAT ")\n", + value, G1MaxNewSizePercent); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; } } -Flag::Error G1MaxNewSizePercentConstraintFunc(bool verbose, uintx* value) { - if (*value < G1NewSizePercent) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "G1MaxNewSizePercent (" UINTX_FORMAT ") must be greater than or " - "equal to G1NewSizePercent (" UINTX_FORMAT ")\n", - *value, G1NewSizePercent); - } +Flag::Error G1MaxNewSizePercentConstraintFunc(uintx value, bool verbose) { + if (value < G1NewSizePercent) { + CommandLineError::print(verbose, + "G1MaxNewSizePercent (" UINTX_FORMAT ") must be " + "greater than or equal to G1NewSizePercent (" UINTX_FORMAT ")\n", + value, G1NewSizePercent); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; @@ -210,65 +191,56 @@ #endif // INCLUDE_ALL_GCS -Flag::Error CMSOldPLABMinConstraintFunc(bool verbose, size_t* value) { - if (*value > CMSOldPLABMax) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "CMSOldPLABMin (" SIZE_FORMAT ") must be less than or " - "equal to CMSOldPLABMax (" SIZE_FORMAT ")\n", - *value, CMSOldPLABMax); - } +Flag::Error CMSOldPLABMinConstraintFunc(size_t value, bool verbose) { + if (value > CMSOldPLABMax) { + CommandLineError::print(verbose, + "CMSOldPLABMin (" SIZE_FORMAT ") must be " + "less than or equal to CMSOldPLABMax (" SIZE_FORMAT ")\n", + value, CMSOldPLABMax); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; } } -Flag::Error CMSPrecleanDenominatorConstraintFunc(bool verbose, uintx* value) { - if (*value <= CMSPrecleanNumerator) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "CMSPrecleanDenominator (" UINTX_FORMAT ") must be strickly greater than " - "CMSPrecleanNumerator (" UINTX_FORMAT ")\n", - *value, CMSPrecleanNumerator); - } +Flag::Error CMSPrecleanDenominatorConstraintFunc(uintx value, bool verbose) { + if (value <= CMSPrecleanNumerator) { + CommandLineError::print(verbose, + "CMSPrecleanDenominator (" UINTX_FORMAT ") must be " + "strickly greater than CMSPrecleanNumerator (" UINTX_FORMAT ")\n", + value, CMSPrecleanNumerator); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; } } -Flag::Error CMSPrecleanNumeratorConstraintFunc(bool verbose, uintx* value) { - if (*value > (CMSPrecleanDenominator - 1)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "CMSPrecleanNumerator (" UINTX_FORMAT ") must be less than or " - "equal to CMSPrecleanDenominator - 1 (" UINTX_FORMAT ")\n", *value, - CMSPrecleanDenominator - 1); - } +Flag::Error CMSPrecleanNumeratorConstraintFunc(uintx value, bool verbose) { + if (value > (CMSPrecleanDenominator - 1)) { + CommandLineError::print(verbose, + "CMSPrecleanNumerator (" UINTX_FORMAT ") must be " + "less than or equal to CMSPrecleanDenominator - 1 (" UINTX_FORMAT ")\n", + value, CMSPrecleanDenominator - 1); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; } } -Flag::Error SurvivorAlignmentInBytesConstraintFunc(bool verbose, intx* value) { - if (*value != 0) { - if (!is_power_of_2(*value)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "SurvivorAlignmentInBytes (" INTX_FORMAT ") must be power of 2\n", - *value); - } +Flag::Error SurvivorAlignmentInBytesConstraintFunc(intx value, bool verbose) { + if (value != 0) { + if (!is_power_of_2(value)) { + CommandLineError::print(verbose, + "SurvivorAlignmentInBytes (" INTX_FORMAT ") must be " + "power of 2\n", + value); return Flag::VIOLATES_CONSTRAINT; } - if (*value < ObjectAlignmentInBytes) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "SurvivorAlignmentInBytes (" INTX_FORMAT ") must be greater than or " - "equal to ObjectAlignmentInBytes (" INTX_FORMAT ")\n", - *value, ObjectAlignmentInBytes); - } + if (value < ObjectAlignmentInBytes) { + CommandLineError::print(verbose, + "SurvivorAlignmentInBytes (" INTX_FORMAT ") must be " + "greater than or equal to ObjectAlignmentInBytes (" INTX_FORMAT ")\n", + value, ObjectAlignmentInBytes); return Flag::VIOLATES_CONSTRAINT; } } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/commandLineFlagConstraintsGC.hpp --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsGC.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsGC.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -34,27 +34,27 @@ * an appropriate error value. */ -Flag::Error YoungPLABSizeConstraintFunc(bool verbose, size_t* value); +Flag::Error YoungPLABSizeConstraintFunc(size_t value, bool verbose); -Flag::Error MinHeapFreeRatioConstraintFunc(bool verbose, uintx* value); -Flag::Error MaxHeapFreeRatioConstraintFunc(bool verbose, uintx* value); +Flag::Error MinHeapFreeRatioConstraintFunc(uintx value, bool verbose); +Flag::Error MaxHeapFreeRatioConstraintFunc(uintx value, bool verbose); -Flag::Error MinMetaspaceFreeRatioConstraintFunc(bool verbose, uintx* value); -Flag::Error MaxMetaspaceFreeRatioConstraintFunc(bool verbose, uintx* value); +Flag::Error MinMetaspaceFreeRatioConstraintFunc(uintx value, bool verbose); +Flag::Error MaxMetaspaceFreeRatioConstraintFunc(uintx value, bool verbose); -Flag::Error InitialTenuringThresholdConstraintFunc(bool verbose, uintx* value); -Flag::Error MaxTenuringThresholdConstraintFunc(bool verbose, uintx* value); +Flag::Error InitialTenuringThresholdConstraintFunc(uintx value, bool verbose); +Flag::Error MaxTenuringThresholdConstraintFunc(uintx value, bool verbose); #if INCLUDE_ALL_GCS -Flag::Error G1NewSizePercentConstraintFunc(bool verbose, uintx* value); -Flag::Error G1MaxNewSizePercentConstraintFunc(bool verbose, uintx* value); +Flag::Error G1NewSizePercentConstraintFunc(uintx value, bool verbose); +Flag::Error G1MaxNewSizePercentConstraintFunc(uintx value, bool verbose); #endif // INCLUDE_ALL_GCS -Flag::Error CMSOldPLABMinConstraintFunc(bool verbose, size_t* value); +Flag::Error CMSOldPLABMinConstraintFunc(size_t value, bool verbose); -Flag::Error CMSPrecleanDenominatorConstraintFunc(bool verbose, uintx* value); -Flag::Error CMSPrecleanNumeratorConstraintFunc(bool verbose, uintx* value); +Flag::Error CMSPrecleanDenominatorConstraintFunc(uintx value, bool verbose); +Flag::Error CMSPrecleanNumeratorConstraintFunc(uintx value, bool verbose); -Flag::Error SurvivorAlignmentInBytesConstraintFunc(bool verbose, intx* value); +Flag::Error SurvivorAlignmentInBytesConstraintFunc(intx value, bool verbose); #endif /* SHARE_VM_RUNTIME_COMMANDLINEFLAGCONSTRAINTSGC_HPP */ diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/commandLineFlagConstraintsRuntime.cpp --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsRuntime.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsRuntime.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -25,25 +25,24 @@ #include "precompiled.hpp" #include "runtime/arguments.hpp" #include "runtime/commandLineFlagConstraintsRuntime.hpp" +#include "runtime/commandLineFlagRangeList.hpp" #include "runtime/globals.hpp" #include "utilities/defaultStream.hpp" -Flag::Error ObjectAlignmentInBytesConstraintFunc(bool verbose, intx* value) { - if (!is_power_of_2(*value)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "ObjectAlignmentInBytes=" INTX_FORMAT " must be power of 2\n", - *value); - } +Flag::Error ObjectAlignmentInBytesConstraintFunc(intx value, bool verbose) { + if (!is_power_of_2(value)) { + CommandLineError::print(verbose, + "ObjectAlignmentInBytes (" INTX_FORMAT ") must be " + "power of 2\n", + value); return Flag::VIOLATES_CONSTRAINT; } // In case page size is very small. - if (*value >= (intx)os::vm_page_size()) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "ObjectAlignmentInBytes=" INTX_FORMAT " must be less than page size " INTX_FORMAT "\n", - *value, (intx)os::vm_page_size()); - } + if (value >= (intx)os::vm_page_size()) { + CommandLineError::print(verbose, + "ObjectAlignmentInBytes (" INTX_FORMAT ") must be " + "less than page size " INTX_FORMAT "\n", + value, (intx)os::vm_page_size()); return Flag::VIOLATES_CONSTRAINT; } return Flag::SUCCESS; @@ -51,13 +50,12 @@ // Need to enforce the padding not to break the existing field alignments. // It is sufficient to check against the largest type size. -Flag::Error ContendedPaddingWidthConstraintFunc(bool verbose, intx* value) { - if ((*value != 0) && ((*value % BytesPerLong) != 0)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "ContendedPaddingWidth=" INTX_FORMAT " must be a multiple of %d\n", - *value, BytesPerLong); - } +Flag::Error ContendedPaddingWidthConstraintFunc(intx value, bool verbose) { + if ((value != 0) && ((value % BytesPerLong) != 0)) { + CommandLineError::print(verbose, + "ContendedPaddingWidth (" INTX_FORMAT ") must be " + "a multiple of %d\n", + value, BytesPerLong); return Flag::VIOLATES_CONSTRAINT; } else { return Flag::SUCCESS; diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/commandLineFlagConstraintsRuntime.hpp --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsRuntime.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsRuntime.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -34,8 +34,8 @@ * an appropriate error value. */ -Flag::Error ObjectAlignmentInBytesConstraintFunc(bool verbose, intx* value); +Flag::Error ObjectAlignmentInBytesConstraintFunc(intx value, bool verbose); -Flag::Error ContendedPaddingWidthConstraintFunc(bool verbose, intx* value); +Flag::Error ContendedPaddingWidthConstraintFunc(intx value, bool verbose); #endif /* SHARE_VM_RUNTIME_COMMANDLINEFLAGCONSTRAINTSRUNTIME_HPP */ diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/commandLineFlagRangeList.cpp --- a/hotspot/src/share/vm/runtime/commandLineFlagRangeList.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/commandLineFlagRangeList.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -32,6 +32,15 @@ #include "utilities/defaultStream.hpp" #include "utilities/macros.hpp" +void CommandLineError::print(bool verbose, const char* msg, ...) { + if (verbose) { + va_list listPointer; + va_start(listPointer, msg); + jio_vfprintf(defaultStream::error_stream(), msg, listPointer); + va_end(listPointer); + } +} + class CommandLineFlagRange_int : public CommandLineFlagRange { int _min; int _max; @@ -44,11 +53,10 @@ Flag::Error check_int(int value, bool verbose = true) { if ((value < _min) || (value > _max)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "int %s=%d is outside the allowed range [ %d ... %d ]\n", - name(), value, _min, _max); - } + CommandLineError::print(verbose, + "int %s=%d is outside the allowed range " + "[ %d ... %d ]\n", + name(), value, _min, _max); return Flag::OUT_OF_BOUNDS; } else { return Flag::SUCCESS; @@ -72,11 +80,10 @@ Flag::Error check_intx(intx value, bool verbose = true) { if ((value < _min) || (value > _max)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "intx %s=" INTX_FORMAT " is outside the allowed range [ " INTX_FORMAT " ... " INTX_FORMAT " ]\n", - name(), value, _min, _max); - } + CommandLineError::print(verbose, + "intx %s=" INTX_FORMAT " is outside the allowed range " + "[ " INTX_FORMAT " ... " INTX_FORMAT " ]\n", + name(), value, _min, _max); return Flag::OUT_OF_BOUNDS; } else { return Flag::SUCCESS; @@ -100,11 +107,10 @@ Flag::Error check_uint(uint value, bool verbose = true) { if ((value < _min) || (value > _max)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "uintx %s=%u is outside the allowed range [ %u ... %u ]\n", - name(), value, _min, _max); - } + CommandLineError::print(verbose, + "uint %s=%u is outside the allowed range " + "[ %u ... %u ]\n", + name(), value, _min, _max); return Flag::OUT_OF_BOUNDS; } else { return Flag::SUCCESS; @@ -128,11 +134,10 @@ Flag::Error check_uintx(uintx value, bool verbose = true) { if ((value < _min) || (value > _max)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "uintx %s=" UINTX_FORMAT " is outside the allowed range [ " UINTX_FORMAT " ... " UINTX_FORMAT " ]\n", - name(), value, _min, _max); - } + CommandLineError::print(verbose, + "uintx %s=" UINTX_FORMAT " is outside the allowed range " + "[ " UINTX_FORMAT " ... " UINTX_FORMAT " ]\n", + name(), value, _min, _max); return Flag::OUT_OF_BOUNDS; } else { return Flag::SUCCESS; @@ -156,11 +161,10 @@ Flag::Error check_uint64_t(uint64_t value, bool verbose = true) { if ((value < _min) || (value > _max)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "uint64_t %s=" UINT64_FORMAT " is outside the allowed range [ " UINT64_FORMAT " ... " UINT64_FORMAT " ]\n", - name(), value, _min, _max); - } + CommandLineError::print(verbose, + "uint64_t %s=" UINT64_FORMAT " is outside the allowed range " + "[ " UINT64_FORMAT " ... " UINT64_FORMAT " ]\n", + name(), value, _min, _max); return Flag::OUT_OF_BOUNDS; } else { return Flag::SUCCESS; @@ -184,11 +188,10 @@ Flag::Error check_size_t(size_t value, bool verbose = true) { if ((value < _min) || (value > _max)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "size_t %s=" SIZE_FORMAT " is outside the allowed range [ " SIZE_FORMAT " ... " SIZE_FORMAT " ]\n", - name(), value, _min, _max); - } + CommandLineError::print(verbose, + "size_t %s=" SIZE_FORMAT " is outside the allowed range " + "[ " SIZE_FORMAT " ... " SIZE_FORMAT " ]\n", + name(), value, _min, _max); return Flag::OUT_OF_BOUNDS; } else { return Flag::SUCCESS; @@ -212,11 +215,10 @@ Flag::Error check_double(double value, bool verbose = true) { if ((value < _min) || (value > _max)) { - if (verbose == true) { - jio_fprintf(defaultStream::error_stream(), - "double %s=%f is outside the allowed range [ %f ... %f ]\n", - name(), value, _min, _max); - } + CommandLineError::print(verbose, + "double %s=%f is outside the allowed range " + "[ %f ... %f ]\n", + name(), value, _min, _max); return Flag::OUT_OF_BOUNDS; } else { return Flag::SUCCESS; @@ -300,48 +302,48 @@ EMIT_RANGES_FOR_GLOBALS_EXT emit_range_no(NULL ARCH_FLAGS(EMIT_RANGE_DEVELOPER_FLAG, - EMIT_RANGE_PRODUCT_FLAG, - EMIT_RANGE_DIAGNOSTIC_FLAG, - EMIT_RANGE_EXPERIMENTAL_FLAG, - EMIT_RANGE_NOTPRODUCT_FLAG, - EMIT_RANGE_CHECK, - IGNORE_CONSTRAINT)); + EMIT_RANGE_PRODUCT_FLAG, + EMIT_RANGE_DIAGNOSTIC_FLAG, + EMIT_RANGE_EXPERIMENTAL_FLAG, + EMIT_RANGE_NOTPRODUCT_FLAG, + EMIT_RANGE_CHECK, + IGNORE_CONSTRAINT)); #ifdef COMPILER1 emit_range_no(NULL C1_FLAGS(EMIT_RANGE_DEVELOPER_FLAG, - EMIT_RANGE_PD_DEVELOPER_FLAG, - EMIT_RANGE_PRODUCT_FLAG, - EMIT_RANGE_PD_PRODUCT_FLAG, - EMIT_RANGE_DIAGNOSTIC_FLAG, - EMIT_RANGE_NOTPRODUCT_FLAG, - EMIT_RANGE_CHECK, - IGNORE_CONSTRAINT)); + EMIT_RANGE_PD_DEVELOPER_FLAG, + EMIT_RANGE_PRODUCT_FLAG, + EMIT_RANGE_PD_PRODUCT_FLAG, + EMIT_RANGE_DIAGNOSTIC_FLAG, + EMIT_RANGE_NOTPRODUCT_FLAG, + EMIT_RANGE_CHECK, + IGNORE_CONSTRAINT)); #endif // COMPILER1 #ifdef COMPILER2 emit_range_no(NULL C2_FLAGS(EMIT_RANGE_DEVELOPER_FLAG, - EMIT_RANGE_PD_DEVELOPER_FLAG, - EMIT_RANGE_PRODUCT_FLAG, - EMIT_RANGE_PD_PRODUCT_FLAG, - EMIT_RANGE_DIAGNOSTIC_FLAG, - EMIT_RANGE_EXPERIMENTAL_FLAG, - EMIT_RANGE_NOTPRODUCT_FLAG, - EMIT_RANGE_CHECK, - IGNORE_CONSTRAINT)); + EMIT_RANGE_PD_DEVELOPER_FLAG, + EMIT_RANGE_PRODUCT_FLAG, + EMIT_RANGE_PD_PRODUCT_FLAG, + EMIT_RANGE_DIAGNOSTIC_FLAG, + EMIT_RANGE_EXPERIMENTAL_FLAG, + EMIT_RANGE_NOTPRODUCT_FLAG, + EMIT_RANGE_CHECK, + IGNORE_CONSTRAINT)); #endif // COMPILER2 #if INCLUDE_ALL_GCS emit_range_no(NULL G1_FLAGS(EMIT_RANGE_DEVELOPER_FLAG, - EMIT_RANGE_PD_DEVELOPER_FLAG, - EMIT_RANGE_PRODUCT_FLAG, - EMIT_RANGE_PD_PRODUCT_FLAG, - EMIT_RANGE_DIAGNOSTIC_FLAG, - EMIT_RANGE_EXPERIMENTAL_FLAG, - EMIT_RANGE_NOTPRODUCT_FLAG, - EMIT_RANGE_MANAGEABLE_FLAG, - EMIT_RANGE_PRODUCT_RW_FLAG, - EMIT_RANGE_CHECK, - IGNORE_CONSTRAINT)); + EMIT_RANGE_PD_DEVELOPER_FLAG, + EMIT_RANGE_PRODUCT_FLAG, + EMIT_RANGE_PD_PRODUCT_FLAG, + EMIT_RANGE_DIAGNOSTIC_FLAG, + EMIT_RANGE_EXPERIMENTAL_FLAG, + EMIT_RANGE_NOTPRODUCT_FLAG, + EMIT_RANGE_MANAGEABLE_FLAG, + EMIT_RANGE_PRODUCT_RW_FLAG, + EMIT_RANGE_CHECK, + IGNORE_CONSTRAINT)); #endif // INCLUDE_ALL_GCS } @@ -367,45 +369,23 @@ } bool CommandLineFlagRangeList::check_ranges() { -//#define PRINT_RANGES_SIZES -#ifdef PRINT_RANGES_SIZES - { - size_t size_ranges = sizeof(CommandLineFlagRangeList); - for (int i=0; iname(); - Flag* flag = Flag::find_flag(name, strlen(name), true, true); - if (flag->is_intx()) { - size_ranges += 2*sizeof(intx); - size_ranges += sizeof(CommandLineFlagRange*); - } else if (flag->is_uintx()) { - size_ranges += 2*sizeof(uintx); - size_ranges += sizeof(CommandLineFlagRange*); - } else if (flag->is_uint64_t()) { - size_ranges += 2*sizeof(uint64_t); - size_ranges += sizeof(CommandLineFlagRange*); - } else if (flag->is_size_t()) { - size_ranges += 2*sizeof(size_t); - size_ranges += sizeof(CommandLineFlagRange*); - } else if (flag->is_double()) { - size_ranges += 2*sizeof(double); - size_ranges += sizeof(CommandLineFlagRange*); - } - } - fprintf(stderr, "Size of %d ranges: " SIZE_FORMAT " bytes\n", - length(), size_ranges); - } -#endif // PRINT_RANGES_SIZES - // Check ranges. bool status = true; for (int i=0; iname(); Flag* flag = Flag::find_flag(name, strlen(name), true, true); + // We must check for NULL here as lp64_product flags on 32 bit architecture + // can generate range check (despite that they are declared as constants), + // but they will not be returned by Flag::find_flag() if (flag != NULL) { - if (flag->is_intx()) { + if (flag->is_int()) { + int value = flag->get_int(); + if (range->check_int(value, true) != Flag::SUCCESS) status = false; + } else if (flag->is_uint()) { + uint value = flag->get_uint(); + if (range->check_uint(value, true) != Flag::SUCCESS) status = false; + } else if (flag->is_intx()) { intx value = flag->get_intx(); if (range->check_intx(value, true) != Flag::SUCCESS) status = false; } else if (flag->is_uintx()) { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/commandLineFlagRangeList.hpp --- a/hotspot/src/share/vm/runtime/commandLineFlagRangeList.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/commandLineFlagRangeList.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -38,6 +38,11 @@ * then we need to use constraint instead. */ +class CommandLineError : public AllStatic { +public: + static void print(bool verbose, const char* msg, ...); +}; + class CommandLineFlagRange : public CHeapObj { private: const char* _name; diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/globals.cpp --- a/hotspot/src/share/vm/runtime/globals.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/globals.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -310,13 +310,17 @@ void Flag::get_locked_message(char* buf, int buflen) const { buf[0] = '\0'; if (is_diagnostic() && !is_unlocked()) { - jio_snprintf(buf, buflen, "Error: VM option '%s' is diagnostic and must be enabled via -XX:+UnlockDiagnosticVMOptions.\n", - _name); + jio_snprintf(buf, buflen, + "Error: VM option '%s' is diagnostic and must be enabled via -XX:+UnlockDiagnosticVMOptions.\n" + "Error: The unlock option must precede '%s'.\n", + _name, _name); return; } if (is_experimental() && !is_unlocked()) { - jio_snprintf(buf, buflen, "Error: VM option '%s' is experimental and must be enabled via -XX:+UnlockExperimentalVMOptions.\n", - _name); + jio_snprintf(buf, buflen, + "Error: VM option '%s' is experimental and must be enabled via -XX:+UnlockExperimentalVMOptions.\n" + "Error: The unlock option must precede '%s'.\n", + _name, _name); return; } if (is_develop() && is_product_build()) { @@ -515,6 +519,20 @@ } } +const char* Flag::flag_error_str(Flag::Error error) { + switch (error) { + case Flag::MISSING_NAME: return "MISSING_NAME"; + case Flag::MISSING_VALUE: return "MISSING_VALUE"; + case Flag::NON_WRITABLE: return "NON_WRITABLE"; + case Flag::OUT_OF_BOUNDS: return "OUT_OF_BOUNDS"; + case Flag::VIOLATES_CONSTRAINT: return "VIOLATES_CONSTRAINT"; + case Flag::INVALID_FLAG: return "INVALID_FLAG"; + case Flag::ERR_OTHER: return "ERR_OTHER"; + case Flag::SUCCESS: return "SUCCESS"; + default: ShouldNotReachHere(); return "NULL"; + } +} + // 4991491 do not "optimize out" the was_set false values: omitting them // tickles a Microsoft compiler bug causing flagTable to be malformed @@ -758,17 +776,7 @@ e.commit(); } -static Flag::Error get_status_error(Flag::Error status_range, Flag::Error status_constraint) { - if (status_range != Flag::SUCCESS) { - return status_range; - } else if (status_constraint != Flag::SUCCESS) { - return status_constraint; - } else { - return Flag::SUCCESS; - } -} - -static Flag::Error apply_constraint_and_check_range_bool(const char* name, bool* new_value, bool verbose = true) { +static Flag::Error apply_constraint_and_check_range_bool(const char* name, bool new_value, bool verbose = true) { Flag::Error status = Flag::SUCCESS; CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); if (constraint != NULL) { @@ -789,7 +797,7 @@ Flag* result = Flag::find_flag(name, len); if (result == NULL) return Flag::INVALID_FLAG; if (!result->is_bool()) return Flag::WRONG_FORMAT; - Flag::Error check = apply_constraint_and_check_range_bool(name, value, !CommandLineFlagConstraintList::validated_after_ergo()); + Flag::Error check = apply_constraint_and_check_range_bool(name, *value, !CommandLineFlagConstraintList::validated_after_ergo()); if (check != Flag::SUCCESS) return check; bool old_value = result->get_bool(); trace_flag_changed(name, old_value, *value, origin); @@ -802,7 +810,7 @@ Flag::Error CommandLineFlagsEx::boolAtPut(CommandLineFlagWithType flag, bool value, Flag::Flags origin) { Flag* faddr = address_of_flag(flag); guarantee(faddr != NULL && faddr->is_bool(), "wrong flag type"); - Flag::Error check = apply_constraint_and_check_range_bool(faddr->_name, &value); + Flag::Error check = apply_constraint_and_check_range_bool(faddr->_name, value); if (check != Flag::SUCCESS) return check; trace_flag_changed(faddr->_name, faddr->get_bool(), value, origin); faddr->set_bool(value); @@ -810,18 +818,19 @@ return Flag::SUCCESS; } -static Flag::Error apply_constraint_and_check_range_int(const char* name, int* new_value, bool verbose = true) { - Flag::Error range_status = Flag::SUCCESS; +static Flag::Error apply_constraint_and_check_range_int(const char* name, int new_value, bool verbose = true) { + Flag::Error status = Flag::SUCCESS; CommandLineFlagRange* range = CommandLineFlagRangeList::find(name); if (range != NULL) { - range_status = range->check_int(*new_value, verbose); + status = range->check_int(new_value, verbose); } - Flag::Error constraint_status = Flag::SUCCESS; - CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); - if (constraint != NULL) { - constraint_status = constraint->apply_int(new_value, verbose); + if (status == Flag::SUCCESS) { + CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); + if (constraint != NULL) { + status = constraint->apply_int(new_value, verbose); + } } - return get_status_error(range_status, constraint_status); + return status; } Flag::Error CommandLineFlags::intAt(const char* name, size_t len, int* value, bool allow_locked, bool return_flag) { @@ -836,7 +845,7 @@ Flag* result = Flag::find_flag(name, len); if (result == NULL) return Flag::INVALID_FLAG; if (!result->is_int()) return Flag::WRONG_FORMAT; - Flag::Error check = apply_constraint_and_check_range_int(name, value, !CommandLineFlagConstraintList::validated_after_ergo()); + Flag::Error check = apply_constraint_and_check_range_int(name, *value, !CommandLineFlagConstraintList::validated_after_ergo()); if (check != Flag::SUCCESS) return check; int old_value = result->get_int(); trace_flag_changed(name, old_value, *value, origin); @@ -849,24 +858,27 @@ Flag::Error CommandLineFlagsEx::intAtPut(CommandLineFlagWithType flag, int value, Flag::Flags origin) { Flag* faddr = address_of_flag(flag); guarantee(faddr != NULL && faddr->is_int(), "wrong flag type"); + Flag::Error check = apply_constraint_and_check_range_int(faddr->_name, value, !CommandLineFlagConstraintList::validated_after_ergo()); + if (check != Flag::SUCCESS) return check; trace_flag_changed(faddr->_name, faddr->get_int(), value, origin); faddr->set_int(value); faddr->set_origin(origin); return Flag::SUCCESS; } -static Flag::Error apply_constraint_and_check_range_uint(const char* name, uint* new_value, bool verbose = true) { - Flag::Error range_status = Flag::SUCCESS; +static Flag::Error apply_constraint_and_check_range_uint(const char* name, uint new_value, bool verbose = true) { + Flag::Error status = Flag::SUCCESS; CommandLineFlagRange* range = CommandLineFlagRangeList::find(name); if (range != NULL) { - range_status = range->check_uint(*new_value, verbose); + status = range->check_uint(new_value, verbose); } - Flag::Error constraint_status = Flag::SUCCESS; - CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); - if (constraint != NULL) { - constraint_status = constraint->apply_uint(new_value, verbose); + if (status == Flag::SUCCESS) { + CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); + if (constraint != NULL) { + status = constraint->apply_uint(new_value, verbose); + } } - return get_status_error(range_status, constraint_status); + return status; } Flag::Error CommandLineFlags::uintAt(const char* name, size_t len, uint* value, bool allow_locked, bool return_flag) { @@ -881,7 +893,7 @@ Flag* result = Flag::find_flag(name, len); if (result == NULL) return Flag::INVALID_FLAG; if (!result->is_uint()) return Flag::WRONG_FORMAT; - Flag::Error check = apply_constraint_and_check_range_uint(name, value, !CommandLineFlagConstraintList::validated_after_ergo()); + Flag::Error check = apply_constraint_and_check_range_uint(name, *value, !CommandLineFlagConstraintList::validated_after_ergo()); if (check != Flag::SUCCESS) return check; uint old_value = result->get_uint(); trace_flag_changed(name, old_value, *value, origin); @@ -894,6 +906,8 @@ Flag::Error CommandLineFlagsEx::uintAtPut(CommandLineFlagWithType flag, uint value, Flag::Flags origin) { Flag* faddr = address_of_flag(flag); guarantee(faddr != NULL && faddr->is_uint(), "wrong flag type"); + Flag::Error check = apply_constraint_and_check_range_uint(faddr->_name, value, !CommandLineFlagConstraintList::validated_after_ergo()); + if (check != Flag::SUCCESS) return check; trace_flag_changed(faddr->_name, faddr->get_uint(), value, origin); faddr->set_uint(value); faddr->set_origin(origin); @@ -908,25 +922,26 @@ return Flag::SUCCESS; } -static Flag::Error apply_constraint_and_check_range_intx(const char* name, intx* new_value, bool verbose = true) { - Flag::Error range_status = Flag::SUCCESS; +static Flag::Error apply_constraint_and_check_range_intx(const char* name, intx new_value, bool verbose = true) { + Flag::Error status = Flag::SUCCESS; CommandLineFlagRange* range = CommandLineFlagRangeList::find(name); if (range != NULL) { - range_status = range->check_intx(*new_value, verbose); + status = range->check_intx(new_value, verbose); } - Flag::Error constraint_status = Flag::SUCCESS; - CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); - if (constraint != NULL) { - constraint_status = constraint->apply_intx(new_value, verbose); + if (status == Flag::SUCCESS) { + CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); + if (constraint != NULL) { + status = constraint->apply_intx(new_value, verbose); + } } - return get_status_error(range_status, constraint_status); + return status; } Flag::Error CommandLineFlags::intxAtPut(const char* name, size_t len, intx* value, Flag::Flags origin) { Flag* result = Flag::find_flag(name, len); if (result == NULL) return Flag::INVALID_FLAG; if (!result->is_intx()) return Flag::WRONG_FORMAT; - Flag::Error check = apply_constraint_and_check_range_intx(name, value, !CommandLineFlagConstraintList::validated_after_ergo()); + Flag::Error check = apply_constraint_and_check_range_intx(name, *value, !CommandLineFlagConstraintList::validated_after_ergo()); if (check != Flag::SUCCESS) return check; intx old_value = result->get_intx(); trace_flag_changed(name, old_value, *value, origin); @@ -939,7 +954,7 @@ Flag::Error CommandLineFlagsEx::intxAtPut(CommandLineFlagWithType flag, intx value, Flag::Flags origin) { Flag* faddr = address_of_flag(flag); guarantee(faddr != NULL && faddr->is_intx(), "wrong flag type"); - Flag::Error check = apply_constraint_and_check_range_intx(faddr->_name, &value); + Flag::Error check = apply_constraint_and_check_range_intx(faddr->_name, value); if (check != Flag::SUCCESS) return check; trace_flag_changed(faddr->_name, faddr->get_intx(), value, origin); faddr->set_intx(value); @@ -955,25 +970,26 @@ return Flag::SUCCESS; } -static Flag::Error apply_constraint_and_check_range_uintx(const char* name, uintx* new_value, bool verbose = true) { - Flag::Error range_status = Flag::SUCCESS; +static Flag::Error apply_constraint_and_check_range_uintx(const char* name, uintx new_value, bool verbose = true) { + Flag::Error status = Flag::SUCCESS; CommandLineFlagRange* range = CommandLineFlagRangeList::find(name); if (range != NULL) { - range_status = range->check_uintx(*new_value, verbose); + status = range->check_uintx(new_value, verbose); } - Flag::Error constraint_status = Flag::SUCCESS; - CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); - if (constraint != NULL) { - constraint_status = constraint->apply_uintx(new_value, verbose); + if (status == Flag::SUCCESS) { + CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); + if (constraint != NULL) { + status = constraint->apply_uintx(new_value, verbose); + } } - return get_status_error(range_status, constraint_status); + return status; } Flag::Error CommandLineFlags::uintxAtPut(const char* name, size_t len, uintx* value, Flag::Flags origin) { Flag* result = Flag::find_flag(name, len); if (result == NULL) return Flag::INVALID_FLAG; if (!result->is_uintx()) return Flag::WRONG_FORMAT; - Flag::Error check = apply_constraint_and_check_range_uintx(name, value, !CommandLineFlagConstraintList::validated_after_ergo()); + Flag::Error check = apply_constraint_and_check_range_uintx(name, *value, !CommandLineFlagConstraintList::validated_after_ergo()); if (check != Flag::SUCCESS) return check; uintx old_value = result->get_uintx(); trace_flag_changed(name, old_value, *value, origin); @@ -986,7 +1002,7 @@ Flag::Error CommandLineFlagsEx::uintxAtPut(CommandLineFlagWithType flag, uintx value, Flag::Flags origin) { Flag* faddr = address_of_flag(flag); guarantee(faddr != NULL && faddr->is_uintx(), "wrong flag type"); - Flag::Error check = apply_constraint_and_check_range_uintx(faddr->_name, &value); + Flag::Error check = apply_constraint_and_check_range_uintx(faddr->_name, value); if (check != Flag::SUCCESS) return check; trace_flag_changed(faddr->_name, faddr->get_uintx(), value, origin); faddr->set_uintx(value); @@ -1002,25 +1018,26 @@ return Flag::SUCCESS; } -static Flag::Error apply_constraint_and_check_range_uint64_t(const char* name, uint64_t* new_value, bool verbose = true) { - Flag::Error range_status = Flag::SUCCESS; +static Flag::Error apply_constraint_and_check_range_uint64_t(const char* name, uint64_t new_value, bool verbose = true) { + Flag::Error status = Flag::SUCCESS; CommandLineFlagRange* range = CommandLineFlagRangeList::find(name); if (range != NULL) { - range_status = range->check_uint64_t(*new_value, verbose); + status = range->check_uint64_t(new_value, verbose); } - Flag::Error constraint_status = Flag::SUCCESS; - CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); - if (constraint != NULL) { - constraint_status = constraint->apply_uint64_t(new_value, verbose); + if (status == Flag::SUCCESS) { + CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); + if (constraint != NULL) { + status = constraint->apply_uint64_t(new_value, verbose); + } } - return get_status_error(range_status, constraint_status); + return status; } Flag::Error CommandLineFlags::uint64_tAtPut(const char* name, size_t len, uint64_t* value, Flag::Flags origin) { Flag* result = Flag::find_flag(name, len); if (result == NULL) return Flag::INVALID_FLAG; if (!result->is_uint64_t()) return Flag::WRONG_FORMAT; - Flag::Error check = apply_constraint_and_check_range_uint64_t(name, value, !CommandLineFlagConstraintList::validated_after_ergo()); + Flag::Error check = apply_constraint_and_check_range_uint64_t(name, *value, !CommandLineFlagConstraintList::validated_after_ergo()); if (check != Flag::SUCCESS) return check; uint64_t old_value = result->get_uint64_t(); trace_flag_changed(name, old_value, *value, origin); @@ -1033,7 +1050,7 @@ Flag::Error CommandLineFlagsEx::uint64_tAtPut(CommandLineFlagWithType flag, uint64_t value, Flag::Flags origin) { Flag* faddr = address_of_flag(flag); guarantee(faddr != NULL && faddr->is_uint64_t(), "wrong flag type"); - Flag::Error check = apply_constraint_and_check_range_uint64_t(faddr->_name, &value); + Flag::Error check = apply_constraint_and_check_range_uint64_t(faddr->_name, value); if (check != Flag::SUCCESS) return check; trace_flag_changed(faddr->_name, faddr->get_uint64_t(), value, origin); faddr->set_uint64_t(value); @@ -1049,25 +1066,26 @@ return Flag::SUCCESS; } -static Flag::Error apply_constraint_and_check_range_size_t(const char* name, size_t* new_value, bool verbose = true) { - Flag::Error range_status = Flag::SUCCESS; +static Flag::Error apply_constraint_and_check_range_size_t(const char* name, size_t new_value, bool verbose = true) { + Flag::Error status = Flag::SUCCESS; CommandLineFlagRange* range = CommandLineFlagRangeList::find(name); if (range != NULL) { - range_status = range->check_size_t(*new_value, verbose); + status = range->check_size_t(new_value, verbose); } - Flag::Error constraint_status = Flag::SUCCESS; - CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); - if (constraint != NULL) { - constraint_status = constraint->apply_size_t(new_value, verbose); + if (status == Flag::SUCCESS) { + CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); + if (constraint != NULL) { + status = constraint->apply_size_t(new_value, verbose); + } } - return get_status_error(range_status, constraint_status); + return status; } Flag::Error CommandLineFlags::size_tAtPut(const char* name, size_t len, size_t* value, Flag::Flags origin) { Flag* result = Flag::find_flag(name, len); if (result == NULL) return Flag::INVALID_FLAG; if (!result->is_size_t()) return Flag::WRONG_FORMAT; - Flag::Error check = apply_constraint_and_check_range_size_t(name, value, !CommandLineFlagConstraintList::validated_after_ergo()); + Flag::Error check = apply_constraint_and_check_range_size_t(name, *value, !CommandLineFlagConstraintList::validated_after_ergo()); if (check != Flag::SUCCESS) return check; size_t old_value = result->get_size_t(); trace_flag_changed(name, old_value, *value, origin); @@ -1080,7 +1098,7 @@ Flag::Error CommandLineFlagsEx::size_tAtPut(CommandLineFlagWithType flag, size_t value, Flag::Flags origin) { Flag* faddr = address_of_flag(flag); guarantee(faddr != NULL && faddr->is_size_t(), "wrong flag type"); - Flag::Error check = apply_constraint_and_check_range_size_t(faddr->_name, &value); + Flag::Error check = apply_constraint_and_check_range_size_t(faddr->_name, value); if (check != Flag::SUCCESS) return check; trace_flag_changed(faddr->_name, faddr->get_size_t(), value, origin); faddr->set_size_t(value); @@ -1096,25 +1114,26 @@ return Flag::SUCCESS; } -static Flag::Error apply_constraint_and_check_range_double(const char* name, double* new_value, bool verbose = true) { - Flag::Error range_status = Flag::SUCCESS; +static Flag::Error apply_constraint_and_check_range_double(const char* name, double new_value, bool verbose = true) { + Flag::Error status = Flag::SUCCESS; CommandLineFlagRange* range = CommandLineFlagRangeList::find(name); if (range != NULL) { - range_status = range->check_double(*new_value, verbose); + status = range->check_double(new_value, verbose); } - Flag::Error constraint_status = Flag::SUCCESS; - CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); - if (constraint != NULL) { - constraint_status = constraint->apply_double(new_value, verbose); + if (status == Flag::SUCCESS) { + CommandLineFlagConstraint* constraint = CommandLineFlagConstraintList::find_if_needs_check(name); + if (constraint != NULL) { + status = constraint->apply_double(new_value, verbose); + } } - return get_status_error(range_status, constraint_status); + return status; } Flag::Error CommandLineFlags::doubleAtPut(const char* name, size_t len, double* value, Flag::Flags origin) { Flag* result = Flag::find_flag(name, len); if (result == NULL) return Flag::INVALID_FLAG; if (!result->is_double()) return Flag::WRONG_FORMAT; - Flag::Error check = apply_constraint_and_check_range_double(name, value, !CommandLineFlagConstraintList::validated_after_ergo()); + Flag::Error check = apply_constraint_and_check_range_double(name, *value, !CommandLineFlagConstraintList::validated_after_ergo()); if (check != Flag::SUCCESS) return check; double old_value = result->get_double(); trace_flag_changed(name, old_value, *value, origin); @@ -1127,7 +1146,7 @@ Flag::Error CommandLineFlagsEx::doubleAtPut(CommandLineFlagWithType flag, double value, Flag::Flags origin) { Flag* faddr = address_of_flag(flag); guarantee(faddr != NULL && faddr->is_double(), "wrong flag type"); - Flag::Error check = apply_constraint_and_check_range_double(faddr->_name, &value); + Flag::Error check = apply_constraint_and_check_range_double(faddr->_name, value); if (check != Flag::SUCCESS) return check; trace_flag_changed(faddr->_name, faddr->get_double(), value, origin); faddr->set_double(value); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/globals.hpp --- a/hotspot/src/share/vm/runtime/globals.hpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/globals.hpp Thu Sep 03 16:14:02 2015 -0700 @@ -372,19 +372,7 @@ void print_kind(outputStream* st); void print_as_flag(outputStream* st); - static const char* flag_error_str(Flag::Error error) { - switch (error) { - case Flag::MISSING_NAME: return "MISSING_NAME"; - case Flag::MISSING_VALUE: return "MISSING_VALUE"; - case Flag::NON_WRITABLE: return "NON_WRITABLE"; - case Flag::OUT_OF_BOUNDS: return "OUT_OF_BOUNDS"; - case Flag::VIOLATES_CONSTRAINT: return "VIOLATES_CONSTRAINT"; - case Flag::INVALID_FLAG: return "INVALID_FLAG"; - case Flag::ERR_OTHER: return "ERR_OTHER"; - case Flag::SUCCESS: return "SUCCESS"; - default: return "NULL"; - } - } + static const char* flag_error_str(Flag::Error error); }; // debug flags control various aspects of the VM and are global accessible @@ -1564,6 +1552,10 @@ product(uint, ParallelGCThreads, 0, \ "Number of parallel threads parallel gc will use") \ \ + diagnostic(bool, UseSemaphoreGCThreadsSynchronization, true, \ + "Use semaphore synchronization for the GC Threads, " \ + "instead of synchronization based on mutexes") \ + \ product(bool, UseDynamicNumberOfGCThreads, false, \ "Dynamically choose the number of parallel threads " \ "parallel gc will use") \ @@ -1575,7 +1567,7 @@ product(size_t, HeapSizePerGCThread, ScaleForWordSize(64*M), \ "Size of heap (bytes) per GC thread used in calculating the " \ "number of GC threads") \ - range((uintx)os::vm_page_size(), max_uintx) \ + range((size_t)os::vm_page_size(), (size_t)max_uintx) \ \ product(bool, TraceDynamicGCThreads, false, \ "Trace the dynamic GC thread usage") \ @@ -1856,6 +1848,7 @@ product(size_t, MarkStackSize, NOT_LP64(32*K) LP64_ONLY(4*M), \ "Size of marking stack") \ \ + /* where does the range max value of (max_jint - 1) come from? */ \ product(size_t, MarkStackSizeMax, NOT_LP64(4*M) LP64_ONLY(512*M), \ "Maximum size of marking stack") \ range(1, (max_jint - 1)) \ @@ -2920,12 +2913,6 @@ notproduct(bool, ICMissHistogram, false, \ "Produce histogram of IC misses") \ \ - notproduct(bool, PrintClassStatistics, false, \ - "Print class statistics at end of run") \ - \ - notproduct(bool, PrintMethodStatistics, false, \ - "Print method statistics at end of run") \ - \ /* interpreter */ \ develop(bool, ClearInterpreterLocals, false, \ "Always clear local variables of interpreter activations upon " \ diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/java.cpp --- a/hotspot/src/share/vm/runtime/java.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/java.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -304,13 +304,6 @@ CodeCache::print_internals(); } - if (PrintClassStatistics) { - SystemDictionary::print_class_statistics(); - } - if (PrintMethodStatistics) { - SystemDictionary::print_method_statistics(); - } - if (PrintVtableStats) { klassVtable::print_statistics(); klassItable::print_statistics(); diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/stubRoutines.cpp --- a/hotspot/src/share/vm/runtime/stubRoutines.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -181,7 +181,7 @@ StubGenerator_generate(&buffer, false); // When new stubs added we need to make sure there is some space left // to catch situation when we should increase size again. - assert(buffer.insts_remaining() > 200, "increase code_size1"); + assert(code_size1 == 0 || buffer.insts_remaining() > 200, "increase code_size1"); } } @@ -274,7 +274,7 @@ StubGenerator_generate(&buffer, true); // When new stubs added we need to make sure there is some space left // to catch situation when we should increase size again. - assert(buffer.insts_remaining() > 200, "increase code_size2"); + assert(code_size2 == 0 || buffer.insts_remaining() > 200, "increase code_size2"); } #ifdef ASSERT diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/sweeper.cpp --- a/hotspot/src/share/vm/runtime/sweeper.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/sweeper.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -618,19 +618,14 @@ MutexLocker cl(CompiledIC_lock); nm->clear_ic_stubs(); } - // Acquiring the CompiledIC_lock may block for a safepoint and set the - // nmethod to zombie (see 'CodeCache::make_marked_nmethods_zombies'). - // Check if nmethod is still non-entrant at this point. - if (nm->is_not_entrant()) { - if (PrintMethodFlushing && Verbose) { - tty->print_cr("### Nmethod %3d/" PTR_FORMAT " (not entrant) being made zombie", nm->compile_id(), nm); - } - // Code cache state change is tracked in make_zombie() - nm->make_zombie(); - SWEEP(nm); - assert(result == None, "sanity"); - result = MadeZombie; + if (PrintMethodFlushing && Verbose) { + tty->print_cr("### Nmethod %3d/" PTR_FORMAT " (not entrant) being made zombie", nm->compile_id(), nm); } + // Code cache state change is tracked in make_zombie() + nm->make_zombie(); + SWEEP(nm); + assert(result == None, "sanity"); + result = MadeZombie; assert(nm->is_zombie(), "nmethod must be zombie"); } else { // Still alive, clean up its inline caches diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/thread.cpp --- a/hotspot/src/share/vm/runtime/thread.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/thread.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -3331,7 +3331,6 @@ // Final check of all 'AfterErgo' constraints after ergonomics which may change values. bool constraint_result = CommandLineFlagConstraintList::check_constraints(CommandLineFlagConstraint::AfterErgo); - Arguments::post_after_ergo_constraint_check(constraint_result); if (!constraint_result) { return JNI_EINVAL; } diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/vmStructs.cpp --- a/hotspot/src/share/vm/runtime/vmStructs.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/vmStructs.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -405,7 +405,7 @@ nonstatic_field(ObjArrayKlass, _element_klass, Klass*) \ nonstatic_field(ObjArrayKlass, _bottom_klass, Klass*) \ volatile_nonstatic_field(Symbol, _refcount, short) \ - nonstatic_field(Symbol, _identity_hash, int) \ + nonstatic_field(Symbol, _identity_hash, short) \ nonstatic_field(Symbol, _length, unsigned short) \ unchecked_nonstatic_field(Symbol, _body, sizeof(jbyte)) /* NOTE: no type */ \ nonstatic_field(TypeArrayKlass, _max_length, int) \ @@ -1565,6 +1565,7 @@ declare_toplevel_type(Generation*) \ declare_toplevel_type(GenerationSpec**) \ declare_toplevel_type(HeapWord*) \ + declare_toplevel_type(HeapWord* volatile) \ declare_toplevel_type(MemRegion*) \ declare_toplevel_type(OffsetTableContigSpace*) \ declare_toplevel_type(Space*) \ diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/runtime/vm_operations.cpp --- a/hotspot/src/share/vm/runtime/vm_operations.cpp Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/runtime/vm_operations.cpp Thu Sep 03 16:14:02 2015 -0700 @@ -109,8 +109,8 @@ // Deoptimize all activations depending on marked nmethods Deoptimization::deoptimize_dependents(); - // Make the dependent methods zombies - CodeCache::make_marked_nmethods_zombies(); + // Make the dependent methods not entrant + CodeCache::make_marked_nmethods_not_entrant(); } void VM_MarkActiveNMethods::doit() { diff -r eb1661ea942c -r 6675700073c1 hotspot/src/share/vm/trace/trace.xml --- a/hotspot/src/share/vm/trace/trace.xml Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/src/share/vm/trace/trace.xml Thu Sep 03 16:14:02 2015 -0700 @@ -346,6 +346,29 @@ + + + + + + + + + + + + + + + + + + + + + = dst_off && i < dst_off + len) { + if (dst[i] != src[i - dst_off + src_off]) { + return false; + } + } else { + if (dst[i] != 0x42-i) { + return false; + } + } + } + } + return true; + } + + // Array copy with Phi + static boolean m5(int[] src, boolean flag1, boolean flag2) { + int[] dst = new int[10]; + if (flag1) { + System.arraycopy(src, 0, dst, 0, 10); + } + if (flag2) { + for (int i = 0; i < dst.length; i++) { + if (dst[i] != src[i]) { + return false; + } + } + } + return true; + } + + static public void main(String[] args) throws Exception { + boolean success = true; + A a = new A(); + a.f0 = 0x42; + for (int i = 0; i < 20000; i++) { + m1(a, false); + } + if (!m1(a, true)) { + System.out.println("m1 failed"); + success = false; + } + + for (int i = 0; i < 20000; i++) { + m2(false); + } + if (!m2(true)) { + System.out.println("m2 failed"); + success = false; + } + + int[] src = new int[10]; + for (int i = 0; i < src.length; i++) { + src[i] = 0x42+i; + } + + for (int i = 0; i < 20000; i++) { + m3(src, false); + } + if (!m3(src, true)) { + System.out.println("m3 failed"); + success = false; + } + + for (int i = 0; i < 20000; i++) { + m4(src, false); + } + if (!m4(src, true)) { + System.out.println("m4 failed"); + success = false; + } + + for (int i = 0; i < 20000; i++) { + m5(src, i%2 == 0, false); + } + if (!m5(src, true, true)) { + System.out.println("m4 failed"); + success = false; + } + + if (!success) { + throw new RuntimeException("Test failed"); + } + } +} diff -r eb1661ea942c -r 6675700073c1 hotspot/test/compiler/arraycopy/TestEliminatedArrayCopyPhi.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/arraycopy/TestEliminatedArrayCopyPhi.java Thu Sep 03 16:14:02 2015 -0700 @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8134321 + * @summary Code that capture field values of eliminated allocation at a safepoint when there's an arraycopy behind a Phi is broken + * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement TestEliminatedArrayCopyPhi + * + */ + +public class TestEliminatedArrayCopyPhi { + + static int[] escaped; + + static void test(int[] src, boolean flag1, boolean flag2) { + int[] array = new int[10]; + if (flag1) { + System.arraycopy(src, 0, array, 0, src.length); + } else { + } + + if (flag2) { + // never taken + escaped = array; + } + } + + public static void main(String[] args) { + int[] src = new int[10]; + for (int i = 0; i < 20000; i++) { + test(src, (i % 2) == 0, false); + } + } +} diff -r eb1661ea942c -r 6675700073c1 hotspot/test/compiler/floatingpoint/NaNTest.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/floatingpoint/NaNTest.java Thu Sep 03 16:14:02 2015 -0700 @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +/** + * @test + * @bug 8076373 + * @summary Verify if signaling NaNs are preserved. + * @run main NaNTest + */ +public class NaNTest { + static void testFloat() { + int originalValue = 0x7f800001; + int readBackValue = Float.floatToRawIntBits(Float.intBitsToFloat(originalValue)); + if (originalValue != readBackValue) { + String errorMessage = String.format("Original and read back float values mismatch\n0x%X 0x%X\n", + originalValue, + readBackValue); + throw new RuntimeException(errorMessage); + } else { + System.out.printf("Written and read back float values match\n0x%X 0x%X\n", + originalValue, + readBackValue); + } + } + + static void testDouble() { + long originalValue = 0xFFF0000000000001L; + long readBackValue = Double.doubleToRawLongBits(Double.longBitsToDouble(originalValue)); + if (originalValue != readBackValue) { + String errorMessage = String.format("Original and read back double values mismatch\n0x%X 0x%X\n", + originalValue, + readBackValue); + throw new RuntimeException(errorMessage); + } else { + System.out.printf("Written and read back double values match\n0x%X 0x%X\n", + originalValue, + readBackValue); + } + + } + + public static void main(String args[]) { + System.out.println("### NanTest started"); + + testFloat(); + testDouble(); + + System.out.println("### NanTest ended"); + } +} diff -r eb1661ea942c -r 6675700073c1 hotspot/test/compiler/loopopts/TestMoveStoresOutOfLoops.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/loopopts/TestMoveStoresOutOfLoops.java Thu Sep 03 16:14:02 2015 -0700 @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8080289 + * @summary Sink stores out of loops if possible + * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:+PrintCompilation -XX:CompileCommand=dontinline,TestMoveStoresOutOfLoops::test* TestMoveStoresOutOfLoops + * + */ + +import java.lang.reflect.*; +import java.util.*; +import java.util.function.*; + +public class TestMoveStoresOutOfLoops { + + private static long[] array = new long[10]; + private static long[] array2 = new long[10]; + private static boolean[] array3 = new boolean[1000]; + private static byte[] byte_array = new byte[10]; + + // Array store should be moved out of the loop, value stored + // should be 999, the loop should be eliminated + static void test_after_1(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = i; + } + } + + // Array store can't be moved out of loop because of following + // non loop invariant array access + static void test_after_2(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = i; + array2[i%10] = i; + } + } + + // Array store can't be moved out of loop because of following + // use + static void test_after_3(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = i; + if (array[0] == -1) { + break; + } + } + } + + // Array store can't be moved out of loop because of preceding + // use + static void test_after_4(int idx) { + for (int i = 0; i < 1000; i++) { + if (array[0] == -2) { + break; + } + array[idx] = i; + } + } + + // All array stores should be moved out of the loop, one after + // the other + static void test_after_5(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = i; + array[idx+1] = i; + array[idx+2] = i; + array[idx+3] = i; + array[idx+4] = i; + array[idx+5] = i; + } + } + + // Array store can be moved after the loop but needs to be + // cloned on both exit paths + static void test_after_6(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = i; + if (array3[i]) { + return; + } + } + } + + // Optimize out redundant stores + static void test_stores_1(int ignored) { + array[0] = 0; + array[1] = 1; + array[2] = 2; + array[0] = 0; + array[1] = 1; + array[2] = 2; + } + + static void test_stores_2(int idx) { + array[idx+0] = 0; + array[idx+1] = 1; + array[idx+2] = 2; + array[idx+0] = 0; + array[idx+1] = 1; + array[idx+2] = 2; + } + + static void test_stores_3(int idx) { + byte_array[idx+0] = 0; + byte_array[idx+1] = 1; + byte_array[idx+2] = 2; + byte_array[idx+0] = 0; + byte_array[idx+1] = 1; + byte_array[idx+2] = 2; + } + + // Array store can be moved out of the loop before the loop header + static void test_before_1(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = 999; + } + } + + // Array store can't be moved out of the loop before the loop + // header because there's more than one store on this slice + static void test_before_2(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = 999; + array[i%2] = 0; + } + } + + // Array store can't be moved out of the loop before the loop + // header because of use before store + static int test_before_3(int idx) { + int res = 0; + for (int i = 0; i < 1000; i++) { + res += array[i%10]; + array[idx] = 999; + } + return res; + } + + // Array store can't be moved out of the loop before the loop + // header because of possible early exit + static void test_before_4(int idx) { + for (int i = 0; i < 1000; i++) { + if (idx / (i+1) > 0) { + return; + } + array[idx] = 999; + } + } + + // Array store can't be moved out of the loop before the loop + // header because it doesn't postdominate the loop head + static void test_before_5(int idx) { + for (int i = 0; i < 1000; i++) { + if (i % 2 == 0) { + array[idx] = 999; + } + } + } + + // Array store can be moved out of the loop before the loop header + static int test_before_6(int idx) { + int res = 0; + for (int i = 0; i < 1000; i++) { + if (i%2 == 1) { + res *= 2; + } else { + res++; + } + array[idx] = 999; + } + return res; + } + + final HashMap tests = new HashMap<>(); + { + for (Method m : this.getClass().getDeclaredMethods()) { + if (m.getName().matches("test_(before|after|stores)_[0-9]+")) { + assert(Modifier.isStatic(m.getModifiers())) : m; + tests.put(m.getName(), m); + } + } + } + + boolean success = true; + void doTest(String name, Runnable init, Function check) throws Exception { + Method m = tests.get(name); + for (int i = 0; i < 20000; i++) { + init.run(); + m.invoke(null, 0); + success = success && check.apply(name); + if (!success) { + break; + } + } + } + + static void array_init() { + array[0] = -1; + } + + static boolean array_check(String name) { + boolean success = true; + if (array[0] != 999) { + success = false; + System.out.println(name + " failed: array[0] = " + array[0]); + } + return success; + } + + static void array_init2() { + for (int i = 0; i < 6; i++) { + array[i] = -1; + } + } + + static boolean array_check2(String name) { + boolean success = true; + for (int i = 0; i < 6; i++) { + if (array[i] != 999) { + success = false; + System.out.println(name + " failed: array[" + i + "] = " + array[i]); + } + } + return success; + } + + static void array_init3() { + for (int i = 0; i < 3; i++) { + array[i] = -1; + } + } + + static boolean array_check3(String name) { + boolean success = true; + for (int i = 0; i < 3; i++) { + if (array[i] != i) { + success = false; + System.out.println(name + " failed: array[" + i + "] = " + array[i]); + } + } + return success; + } + + static void array_init4() { + for (int i = 0; i < 3; i++) { + byte_array[i] = -1; + } + } + + static boolean array_check4(String name) { + boolean success = true; + for (int i = 0; i < 3; i++) { + if (byte_array[i] != i) { + success = false; + System.out.println(name + " failed: byte_array[" + i + "] = " + byte_array[i]); + } + } + return success; + } + + static public void main(String[] args) throws Exception { + TestMoveStoresOutOfLoops test = new TestMoveStoresOutOfLoops(); + test.doTest("test_after_1", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_after_2", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_after_3", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_after_4", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_after_5", TestMoveStoresOutOfLoops::array_init2, TestMoveStoresOutOfLoops::array_check2); + test.doTest("test_after_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + array3[999] = true; + test.doTest("test_after_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + + test.doTest("test_stores_1", TestMoveStoresOutOfLoops::array_init3, TestMoveStoresOutOfLoops::array_check3); + test.doTest("test_stores_2", TestMoveStoresOutOfLoops::array_init3, TestMoveStoresOutOfLoops::array_check3); + test.doTest("test_stores_3", TestMoveStoresOutOfLoops::array_init4, TestMoveStoresOutOfLoops::array_check4); + + test.doTest("test_before_1", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_before_2", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_before_3", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_before_4", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_before_5", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_before_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + + if (!test.success) { + throw new RuntimeException("Some tests failed"); + } + } +} diff -r eb1661ea942c -r 6675700073c1 hotspot/test/compiler/regalloc/TestVectorRegAlloc.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/regalloc/TestVectorRegAlloc.java Thu Sep 03 16:14:02 2015 -0700 @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8131969 + * @summary assert in register allocation code when vector Phi for a loop is processed because code assumes all inputs already processed + * @run main/othervm -Xbatch TestVectorRegAlloc + * + */ + +public class TestVectorRegAlloc { + + static int test_helper_i; + static boolean test_helper() { + test_helper_i++; + return (test_helper_i & 7) != 0; + } + + static void test(double[] src, double[] dst, boolean flag) { + double j = 0.0; + while(test_helper()) { + for (int i = 0; i < src.length; i++) { + dst[i] = src[i] + j; + } + // Loop will be unswitched and ReplicateD of zero will be + // split through the Phi of outer loop + for (int i = 0; i < src.length; i++) { + double k; + if (flag) { + k = j; + } else { + k = 0; + } + dst[i] = src[i] + k; + } + j++; + } + } + + static public void main(String[] args) { + double[] src = new double[10]; + double[] dst = new double[10]; + for (int i = 0; i < 20000; i++) { + test(src, dst, (i % 2) == 0); + } + } +} diff -r eb1661ea942c -r 6675700073c1 hotspot/test/gc/survivorAlignment/TestPromotionFromSurvivorToTenuredAfterMinorGC.java --- a/hotspot/test/gc/survivorAlignment/TestPromotionFromSurvivorToTenuredAfterMinorGC.java Thu Sep 03 14:24:41 2015 -0700 +++ b/hotspot/test/gc/survivorAlignment/TestPromotionFromSurvivorToTenuredAfterMinorGC.java Thu Sep 03 16:14:02 2015 -0700 @@ -28,7 +28,6 @@ * when their age exceeded tenuring threshold are not aligned to * SurvivorAlignmentInBytes value. * @library /testlibrary /../../test/lib - * @ignore 8130308 * @modules java.base/sun.misc * java.management * @build TestPromotionFromSurvivorToTenuredAfterMinorGC @@ -99,11 +98,18 @@ .getActualMemoryUsage(); test.allocate(); - for (int i = 0; i <= SurvivorAlignmentTestMain.MAX_TENURING_THRESHOLD; - i++) { + for (int i = 0; i <= SurvivorAlignmentTestMain.MAX_TENURING_THRESHOLD; i++) { SurvivorAlignmentTestMain.WHITE_BOX.youngGC(); } + // Sometimes we see that data unrelated to the test has been allocated during + // the loop. This data is included in the expectedMemoryUsage since we look + // through all threads to see what they allocated. If this data is still in + // the survivor area however, it should not be included in expectedMemoryUsage + // since the verification below only look at what's in tenured space. + expectedMemoryUsage -= SurvivorAlignmentTestMain.getAlignmentHelper( + SurvivorAlignmentTestMain.HeapSpace.SURVIVOR) + .getActualMemoryUsage(); test.verifyMemoryUsage(expectedMemoryUsage); } }