hotspot/src/cpu/aarch64/vm/aarch64.ad
changeset 29969 c59f96b13bc7
parent 29586 889895365eb9
child 30025 d148e1b2fac2
equal deleted inserted replaced
29963:ac3f5a39d4ff 29969:c59f96b13bc7
   791     // count one adr and one far branch instruction
   791     // count one adr and one far branch instruction
   792     return 4 * NativeInstruction::instruction_size;
   792     return 4 * NativeInstruction::instruction_size;
   793   }
   793   }
   794 };
   794 };
   795 
   795 
   796   bool preceded_by_ordered_load(const Node *barrier);
   796   // graph traversal helpers
       
   797   MemBarNode *has_parent_membar(const Node *n,
       
   798 				ProjNode *&ctl, ProjNode *&mem);
       
   799   MemBarNode *has_child_membar(const MemBarNode *n,
       
   800 			       ProjNode *&ctl, ProjNode *&mem);
       
   801 
       
   802   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
       
   803   bool unnecessary_acquire(const Node *barrier);
       
   804   bool needs_acquiring_load(const Node *load);
       
   805 
       
   806   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
       
   807   bool unnecessary_release(const Node *barrier);
       
   808   bool unnecessary_volatile(const Node *barrier);
       
   809   bool needs_releasing_store(const Node *store);
   797 
   810 
   798   // Use barrier instructions rather than load acquire / store
   811   // Use barrier instructions rather than load acquire / store
   799   // release.
   812   // release.
   800   const bool UseBarriersForVolatile = true;
   813   const bool UseBarriersForVolatile = false;
       
   814   // Use barrier instructions for unsafe volatile gets rather than
       
   815   // trying to identify an exact signature for them
       
   816   const bool UseBarriersForUnsafeVolatileGet = false;
   801 %}
   817 %}
   802 
   818 
   803 source %{
   819 source %{
   804 
   820 
   805   // AArch64 has load acquire and store release instructions which we
   821   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
   806   // use for ordered memory accesses, e.g. for volatiles.  The ideal
   822   // use to implement volatile reads and writes. For a volatile read
   807   // graph generator also inserts memory barriers around volatile
   823   // we simply need
   808   // accesses, and we don't want to generate both barriers and acq/rel
   824   //
   809   // instructions.  So, when we emit a MemBarAcquire we look back in
   825   //   ldar<x>
   810   // the ideal graph for an ordered load and only emit the barrier if
   826   //
   811   // we don't find one.
   827   // and for a volatile write we need
   812 
   828   //
   813 bool preceded_by_ordered_load(const Node *barrier) {
   829   //   stlr<x>
       
   830   // 
       
   831   // Alternatively, we can implement them by pairing a normal
       
   832   // load/store with a memory barrier. For a volatile read we need
       
   833   // 
       
   834   //   ldr<x>
       
   835   //   dmb ishld
       
   836   //
       
   837   // for a volatile write
       
   838   //
       
   839   //   dmb ish
       
   840   //   str<x>
       
   841   //   dmb ish
       
   842   //
       
   843   // In order to generate the desired instruction sequence we need to
       
   844   // be able to identify specific 'signature' ideal graph node
       
   845   // sequences which i) occur as a translation of a volatile reads or
       
   846   // writes and ii) do not occur through any other translation or
       
   847   // graph transformation. We can then provide alternative aldc
       
   848   // matching rules which translate these node sequences to the
       
   849   // desired machine code sequences. Selection of the alternative
       
   850   // rules can be implemented by predicates which identify the
       
   851   // relevant node sequences.
       
   852   //
       
   853   // The ideal graph generator translates a volatile read to the node
       
   854   // sequence
       
   855   //
       
   856   //   LoadX[mo_acquire]
       
   857   //   MemBarAcquire
       
   858   //
       
   859   // As a special case when using the compressed oops optimization we
       
   860   // may also see this variant
       
   861   //
       
   862   //   LoadN[mo_acquire]
       
   863   //   DecodeN
       
   864   //   MemBarAcquire
       
   865   //
       
   866   // A volatile write is translated to the node sequence
       
   867   //
       
   868   //   MemBarRelease
       
   869   //   StoreX[mo_release]
       
   870   //   MemBarVolatile
       
   871   //
       
   872   // n.b. the above node patterns are generated with a strict
       
   873   // 'signature' configuration of input and output dependencies (see
       
   874   // the predicates below for exact details). The two signatures are
       
   875   // unique to translated volatile reads/stores -- they will not
       
   876   // appear as a result of any other bytecode translation or inlining
       
   877   // nor as a consequence of optimizing transforms.
       
   878   //
       
   879   // We also want to catch inlined unsafe volatile gets and puts and
       
   880   // be able to implement them using either ldar<x>/stlr<x> or some
       
   881   // combination of ldr<x>/stlr<x> and dmb instructions.
       
   882   //
       
   883   // Inlined unsafe volatiles puts manifest as a minor variant of the
       
   884   // normal volatile put node sequence containing an extra cpuorder
       
   885   // membar
       
   886   //
       
   887   //   MemBarRelease
       
   888   //   MemBarCPUOrder
       
   889   //   StoreX[mo_release]
       
   890   //   MemBarVolatile
       
   891   //
       
   892   // n.b. as an aside, the cpuorder membar is not itself subject to
       
   893   // matching and translation by adlc rules.  However, the rule
       
   894   // predicates need to detect its presence in order to correctly
       
   895   // select the desired adlc rules.
       
   896   //
       
   897   // Inlined unsafe volatiles gets manifest as a somewhat different
       
   898   // node sequence to a normal volatile get
       
   899   //
       
   900   //   MemBarCPUOrder
       
   901   //        ||       \\
       
   902   //   MemBarAcquire LoadX[mo_acquire]
       
   903   //        ||
       
   904   //   MemBarCPUOrder
       
   905   //
       
   906   // In this case the acquire membar does not directly depend on the
       
   907   // load. However, we can be sure that the load is generated from an
       
   908   // inlined unsafe volatile get if we see it dependent on this unique
       
   909   // sequence of membar nodes. Similarly, given an acquire membar we
       
   910   // can know that it was added because of an inlined unsafe volatile
       
   911   // get if it is fed and feeds a cpuorder membar and if its feed
       
   912   // membar also feeds an acquiring load.
       
   913   //
       
   914   // So, where we can identify these volatile read and write
       
   915   // signatures we can choose to plant either of the above two code
       
   916   // sequences. For a volatile read we can simply plant a normal
       
   917   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
       
   918   // also choose to inhibit translation of the MemBarAcquire and
       
   919   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
       
   920   //
       
   921   // When we recognise a volatile store signature we can choose to
       
   922   // plant at a dmb ish as a translation for the MemBarRelease, a
       
   923   // normal str<x> and then a dmb ish for the MemBarVolatile.
       
   924   // Alternatively, we can inhibit translation of the MemBarRelease
       
   925   // and MemBarVolatile and instead plant a simple stlr<x>
       
   926   // instruction.
       
   927   //
       
   928   // Of course, the above only applies when we see these signature
       
   929   // configurations. We still want to plant dmb instructions in any
       
   930   // other cases where we may see a MemBarAcquire, MemBarRelease or
       
   931   // MemBarVolatile. For example, at the end of a constructor which
       
   932   // writes final/volatile fields we will see a MemBarRelease
       
   933   // instruction and this needs a 'dmb ish' lest we risk the
       
   934   // constructed object being visible without making the
       
   935   // final/volatile field writes visible.
       
   936   //
       
   937   // n.b. the translation rules below which rely on detection of the
       
   938   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
       
   939   // If we see anything other than the signature configurations we
       
   940   // always just translate the loads and stors to ldr<x> and str<x>
       
   941   // and translate acquire, release and volatile membars to the
       
   942   // relevant dmb instructions.
       
   943   //
       
   944   // n.b.b as a case in point for the above comment, the current
       
   945   // predicates don't detect the precise signature for certain types
       
   946   // of volatile object stores (where the heap_base input type is not
       
   947   // known at compile-time to be non-NULL). In those cases the
       
   948   // MemBarRelease and MemBarVolatile bracket an if-then-else sequence
       
   949   // with a store in each branch (we need a different store depending
       
   950   // on whether heap_base is actually NULL). In such a case we will
       
   951   // just plant a dmb both before and after the branch/merge. The
       
   952   // predicate could (and probably should) be fixed later to also
       
   953   // detect this case.
       
   954 
       
   955   // graph traversal helpers
       
   956 
       
   957   // if node n is linked to a parent MemBarNode by an intervening
       
   958   // Control or Memory ProjNode return the MemBarNode otherwise return
       
   959   // NULL.
       
   960   //
       
   961   // n may only be a Load or a MemBar.
       
   962   //
       
   963   // The ProjNode* references c and m are used to return the relevant
       
   964   // nodes.
       
   965 
       
   966   MemBarNode *has_parent_membar(const Node *n, ProjNode *&c, ProjNode *&m)
       
   967   {
       
   968     Node *ctl = NULL;
       
   969     Node *mem = NULL;
       
   970     Node *membar = NULL;
       
   971 
       
   972     if (n->is_Load()) {
       
   973       ctl = n->lookup(LoadNode::Control);
       
   974       mem = n->lookup(LoadNode::Memory);
       
   975     } else if (n->is_MemBar()) {
       
   976       ctl = n->lookup(TypeFunc::Control);
       
   977       mem = n->lookup(TypeFunc::Memory);
       
   978     } else {
       
   979 	return NULL;
       
   980     }
       
   981 
       
   982     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj())
       
   983       return NULL;
       
   984 
       
   985     c = ctl->as_Proj();
       
   986 
       
   987     membar = ctl->lookup(0);
       
   988 
       
   989     if (!membar || !membar->is_MemBar())
       
   990       return NULL;
       
   991 
       
   992     m = mem->as_Proj();
       
   993 
       
   994     if (mem->lookup(0) != membar)
       
   995       return NULL;
       
   996 
       
   997     return membar->as_MemBar();
       
   998   }
       
   999 
       
  1000   // if n is linked to a child MemBarNode by intervening Control and
       
  1001   // Memory ProjNodes return the MemBarNode otherwise return NULL.
       
  1002   //
       
  1003   // The ProjNode** arguments c and m are used to return pointers to
       
  1004   // the relevant nodes. A null argument means don't don't return a
       
  1005   // value.
       
  1006 
       
  1007   MemBarNode *has_child_membar(const MemBarNode *n, ProjNode *&c, ProjNode *&m)
       
  1008   {
       
  1009     ProjNode *ctl = n->proj_out(TypeFunc::Control);
       
  1010     ProjNode *mem = n->proj_out(TypeFunc::Memory);
       
  1011 
       
  1012     // MemBar needs to have both a Ctl and Mem projection
       
  1013     if (! ctl || ! mem)
       
  1014       return NULL;
       
  1015 
       
  1016     c = ctl;
       
  1017     m = mem;
       
  1018 
       
  1019     MemBarNode *child = NULL;
       
  1020     Node *x;
       
  1021 
       
  1022     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
       
  1023       x = ctl->fast_out(i);
       
  1024       // if we see a membar we keep hold of it. we may also see a new
       
  1025       // arena copy of the original but it will appear later
       
  1026       if (x->is_MemBar()) {
       
  1027 	  child = x->as_MemBar();
       
  1028 	  break;
       
  1029       }
       
  1030     }
       
  1031 
       
  1032     if (child == NULL)
       
  1033       return NULL;
       
  1034 
       
  1035     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
       
  1036       x = mem->fast_out(i);
       
  1037       // if we see a membar we keep hold of it. we may also see a new
       
  1038       // arena copy of the original but it will appear later
       
  1039       if (x == child) {
       
  1040 	return child;
       
  1041       }
       
  1042     }
       
  1043     return NULL;
       
  1044   }
       
  1045 
       
  1046   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
       
  1047 
       
  1048 bool unnecessary_acquire(const Node *barrier) {
       
  1049   // assert barrier->is_MemBar();
       
  1050   if (UseBarriersForVolatile)
       
  1051     // we need to plant a dmb
       
  1052     return false;
       
  1053 
       
  1054   // a volatile read derived from bytecode (or also from an inlined
       
  1055   // SHA field read via LibraryCallKit::load_field_from_object)
       
  1056   // manifests as a LoadX[mo_acquire] followed by an acquire membar
       
  1057   // with a bogus read dependency on it's preceding load. so in those
       
  1058   // cases we will find the load node at the PARMS offset of the
       
  1059   // acquire membar.  n.b. there may be an intervening DecodeN node.
       
  1060   //
       
  1061   // a volatile load derived from an inlined unsafe field access
       
  1062   // manifests as a cpuorder membar with Ctl and Mem projections
       
  1063   // feeding both an acquire membar and a LoadX[mo_acquire]. The
       
  1064   // acquire then feeds another cpuorder membar via Ctl and Mem
       
  1065   // projections. The load has no output dependency on these trailing
       
  1066   // membars because subsequent nodes inserted into the graph take
       
  1067   // their control feed from the final membar cpuorder meaning they
       
  1068   // are all ordered after the load.
       
  1069 
   814   Node *x = barrier->lookup(TypeFunc::Parms);
  1070   Node *x = barrier->lookup(TypeFunc::Parms);
   815 
  1071   if (x) {
   816   if (! x)
  1072     // we are starting from an acquire and it has a fake dependency
       
  1073     //
       
  1074     // need to check for
       
  1075     //
       
  1076     //   LoadX[mo_acquire]
       
  1077     //   {  |1   }
       
  1078     //   {DecodeN}
       
  1079     //      |Parms
       
  1080     //   MemBarAcquire*
       
  1081     //
       
  1082     // where * tags node we were passed
       
  1083     // and |k means input k
       
  1084     if (x->is_DecodeNarrowPtr())
       
  1085       x = x->in(1);
       
  1086 
       
  1087     return (x->is_Load() && x->as_Load()->is_acquire());
       
  1088   }
       
  1089   
       
  1090   // only continue if we want to try to match unsafe volatile gets
       
  1091   if (UseBarriersForUnsafeVolatileGet)
   817     return false;
  1092     return false;
   818 
  1093 
   819   if (x->is_DecodeNarrowPtr())
  1094   // need to check for
   820     x = x->in(1);
  1095   //
   821 
  1096   //     MemBarCPUOrder
   822   if (x->is_Load())
  1097   //        ||       \\
   823     return ! x->as_Load()->is_unordered();
  1098   //   MemBarAcquire* LoadX[mo_acquire]
   824 
  1099   //        ||
   825   return false;
  1100   //   MemBarCPUOrder
       
  1101   //
       
  1102   // where * tags node we were passed
       
  1103   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
       
  1104 
       
  1105   // check for a parent MemBarCPUOrder
       
  1106   ProjNode *ctl;
       
  1107   ProjNode *mem;
       
  1108   MemBarNode *parent = has_parent_membar(barrier, ctl, mem);
       
  1109   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
       
  1110     return false;
       
  1111   // ensure the proj nodes both feed a LoadX[mo_acquire]
       
  1112   LoadNode *ld = NULL;
       
  1113   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
       
  1114     x = ctl->fast_out(i);
       
  1115     // if we see a load we keep hold of it and stop searching
       
  1116     if (x->is_Load()) {
       
  1117       ld = x->as_Load();
       
  1118       break;
       
  1119     }
       
  1120   }
       
  1121   // it must be an acquiring load
       
  1122   if (! ld || ! ld->is_acquire())
       
  1123     return false;
       
  1124   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
       
  1125     x = mem->fast_out(i);
       
  1126     // if we see the same load we drop it and stop searching
       
  1127     if (x == ld) {
       
  1128       ld = NULL;
       
  1129       break;
       
  1130     }
       
  1131   }
       
  1132   // we must have dropped the load
       
  1133   if (ld)
       
  1134     return false;
       
  1135   // check for a child cpuorder membar
       
  1136   MemBarNode *child  = has_child_membar(barrier->as_MemBar(), ctl, mem);
       
  1137   if (!child || child->Opcode() != Op_MemBarCPUOrder)
       
  1138     return false;
       
  1139 
       
  1140   return true;
   826 }
  1141 }
       
  1142 
       
  1143 bool needs_acquiring_load(const Node *n)
       
  1144 {
       
  1145   // assert n->is_Load();
       
  1146   if (UseBarriersForVolatile)
       
  1147     // we use a normal load and a dmb
       
  1148     return false;
       
  1149 
       
  1150   LoadNode *ld = n->as_Load();
       
  1151 
       
  1152   if (!ld->is_acquire())
       
  1153     return false;
       
  1154 
       
  1155   // check if this load is feeding an acquire membar
       
  1156   //
       
  1157   //   LoadX[mo_acquire]
       
  1158   //   {  |1   }
       
  1159   //   {DecodeN}
       
  1160   //      |Parms
       
  1161   //   MemBarAcquire*
       
  1162   //
       
  1163   // where * tags node we were passed
       
  1164   // and |k means input k
       
  1165 
       
  1166   Node *start = ld;
       
  1167   Node *mbacq = NULL;
       
  1168 
       
  1169   // if we hit a DecodeNarrowPtr we reset the start node and restart
       
  1170   // the search through the outputs
       
  1171  restart:
       
  1172 
       
  1173   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
       
  1174     Node *x = start->fast_out(i);
       
  1175     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
       
  1176       mbacq = x;
       
  1177     } else if (!mbacq &&
       
  1178 	       (x->is_DecodeNarrowPtr() ||
       
  1179 		(x->is_Mach() && x->Opcode() == Op_DecodeN))) {
       
  1180       start = x;
       
  1181       goto restart;
       
  1182     }
       
  1183   }
       
  1184 
       
  1185   if (mbacq) {
       
  1186     return true;
       
  1187   }
       
  1188 
       
  1189   // only continue if we want to try to match unsafe volatile gets
       
  1190   if (UseBarriersForUnsafeVolatileGet)
       
  1191     return false;
       
  1192 
       
  1193   // check if Ctl and Proj feed comes from a MemBarCPUOrder
       
  1194   //
       
  1195   //     MemBarCPUOrder
       
  1196   //        ||       \\
       
  1197   //   MemBarAcquire* LoadX[mo_acquire]
       
  1198   //        ||
       
  1199   //   MemBarCPUOrder
       
  1200 
       
  1201   MemBarNode *membar;
       
  1202   ProjNode *ctl;
       
  1203   ProjNode *mem;
       
  1204 
       
  1205   membar = has_parent_membar(ld, ctl, mem);
       
  1206 
       
  1207   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
       
  1208     return false;
       
  1209 
       
  1210   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
       
  1211 
       
  1212   membar = has_child_membar(membar, ctl, mem);
       
  1213 
       
  1214   if (!membar || !membar->Opcode() == Op_MemBarAcquire)
       
  1215     return false;
       
  1216 
       
  1217   membar = has_child_membar(membar, ctl, mem);
       
  1218   
       
  1219   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
       
  1220     return false;
       
  1221 
       
  1222   return true;
       
  1223 }
       
  1224 
       
  1225 bool unnecessary_release(const Node *n) {
       
  1226   // assert n->is_MemBar();
       
  1227   if (UseBarriersForVolatile)
       
  1228     // we need to plant a dmb
       
  1229     return false;
       
  1230 
       
  1231   // ok, so we can omit this release barrier if it has been inserted
       
  1232   // as part of a volatile store sequence
       
  1233   //
       
  1234   //   MemBarRelease
       
  1235   //  {      ||      }
       
  1236   //  {MemBarCPUOrder} -- optional
       
  1237   //         ||     \\
       
  1238   //         ||     StoreX[mo_release]
       
  1239   //         | \     /
       
  1240   //         | MergeMem
       
  1241   //         | /
       
  1242   //   MemBarVolatile
       
  1243   //
       
  1244   // where
       
  1245   //  || and \\ represent Ctl and Mem feeds via Proj nodes
       
  1246   //  | \ and / indicate further routing of the Ctl and Mem feeds
       
  1247   // 
       
  1248   // so we need to check that
       
  1249   //
       
  1250   // ia) the release membar (or its dependent cpuorder membar) feeds
       
  1251   // control to a store node (via a Control project node)
       
  1252   //
       
  1253   // ii) the store is ordered release
       
  1254   //
       
  1255   // iii) the release membar (or its dependent cpuorder membar) feeds
       
  1256   // control to a volatile membar (via the same Control project node)
       
  1257   //
       
  1258   // iv) the release membar feeds memory to a merge mem and to the
       
  1259   // same store (both via a single Memory proj node)
       
  1260   //
       
  1261   // v) the store outputs to the merge mem
       
  1262   //
       
  1263   // vi) the merge mem outputs to the same volatile membar
       
  1264   //
       
  1265   // n.b. if this is an inlined unsafe node then the release membar
       
  1266   // may feed its control and memory links via an intervening cpuorder
       
  1267   // membar. this case can be dealt with when we check the release
       
  1268   // membar projections. if they both feed a single cpuorder membar
       
  1269   // node continue to make the same checks as above but with the
       
  1270   // cpuorder membar substituted for the release membar. if they don't
       
  1271   // both feed a cpuorder membar then the check fails.
       
  1272   //
       
  1273   // n.b.b. for an inlined unsafe store of an object in the case where
       
  1274   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
       
  1275   // an embedded if then else where we expect the store. this is
       
  1276   // needed to do the right type of store depending on whether
       
  1277   // heap_base is NULL. We could check for that but for now we can
       
  1278   // just take the hit of on inserting a redundant dmb for this
       
  1279   // redundant volatile membar
       
  1280 
       
  1281   MemBarNode *barrier = n->as_MemBar();
       
  1282   ProjNode *ctl;
       
  1283   ProjNode *mem;
       
  1284   // check for an intervening cpuorder membar
       
  1285   MemBarNode *b = has_child_membar(barrier, ctl, mem);
       
  1286   if (b && b->Opcode() == Op_MemBarCPUOrder) {
       
  1287     // ok, so start form the dependent cpuorder barrier
       
  1288     barrier = b;
       
  1289   }
       
  1290   // check the ctl and mem flow
       
  1291   ctl = barrier->proj_out(TypeFunc::Control);
       
  1292   mem = barrier->proj_out(TypeFunc::Memory);
       
  1293 
       
  1294   // the barrier needs to have both a Ctl and Mem projection
       
  1295   if (! ctl || ! mem)
       
  1296     return false;
       
  1297 
       
  1298   Node *x = NULL;
       
  1299   Node *mbvol = NULL;
       
  1300   StoreNode * st = NULL;
       
  1301 
       
  1302   // For a normal volatile write the Ctl ProjNode should have output
       
  1303   // to a MemBarVolatile and a Store marked as releasing
       
  1304   //
       
  1305   // n.b. for an inlined unsafe store of an object in the case where
       
  1306   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
       
  1307   // an embedded if then else where we expect the store. this is
       
  1308   // needed to do the right type of store depending on whether
       
  1309   // heap_base is NULL. We could check for that case too but for now
       
  1310   // we can just take the hit of inserting a dmb and a non-volatile
       
  1311   // store to implement the volatile store
       
  1312 
       
  1313   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
       
  1314     x = ctl->fast_out(i);
       
  1315     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
       
  1316       if (mbvol) {
       
  1317 	return false;
       
  1318       }
       
  1319       mbvol = x;
       
  1320     } else if (x->is_Store()) {
       
  1321       st = x->as_Store();
       
  1322       if (! st->is_release()) {
       
  1323 	return false;
       
  1324       }
       
  1325     } else if (!x->is_Mach()) {
       
  1326       // we may see mach nodes added during matching but nothing else
       
  1327       return false;
       
  1328     }
       
  1329   }
       
  1330 
       
  1331   if (!mbvol || !st)
       
  1332     return false;
       
  1333 
       
  1334   // the Mem ProjNode should output to a MergeMem and the same Store
       
  1335   Node *mm = NULL;
       
  1336   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
       
  1337     x = mem->fast_out(i);
       
  1338     if (!mm && x->is_MergeMem()) {
       
  1339       mm = x;
       
  1340     } else if (x != st && !x->is_Mach()) {
       
  1341       // we may see mach nodes added during matching but nothing else
       
  1342       return false;
       
  1343     }
       
  1344   }
       
  1345 
       
  1346   if (!mm)
       
  1347     return false;
       
  1348 
       
  1349   // the MergeMem should output to the MemBarVolatile
       
  1350   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
       
  1351     x = mm->fast_out(i);
       
  1352     if (x != mbvol && !x->is_Mach()) {
       
  1353       // we may see mach nodes added during matching but nothing else
       
  1354       return false;
       
  1355     }
       
  1356   }
       
  1357 
       
  1358   return true;
       
  1359 }
       
  1360 
       
  1361 bool unnecessary_volatile(const Node *n) {
       
  1362   // assert n->is_MemBar();
       
  1363   if (UseBarriersForVolatile)
       
  1364     // we need to plant a dmb
       
  1365     return false;
       
  1366 
       
  1367   // ok, so we can omit this volatile barrier if it has been inserted
       
  1368   // as part of a volatile store sequence
       
  1369   //
       
  1370   //   MemBarRelease
       
  1371   //  {      ||      }
       
  1372   //  {MemBarCPUOrder} -- optional
       
  1373   //         ||     \\
       
  1374   //         ||     StoreX[mo_release]
       
  1375   //         | \     /
       
  1376   //         | MergeMem
       
  1377   //         | /
       
  1378   //   MemBarVolatile
       
  1379   //
       
  1380   // where
       
  1381   //  || and \\ represent Ctl and Mem feeds via Proj nodes
       
  1382   //  | \ and / indicate further routing of the Ctl and Mem feeds
       
  1383   // 
       
  1384   // we need to check that
       
  1385   //
       
  1386   // i) the volatile membar gets its control feed from a release
       
  1387   // membar (or its dependent cpuorder membar) via a Control project
       
  1388   // node
       
  1389   //
       
  1390   // ii) the release membar (or its dependent cpuorder membar) also
       
  1391   // feeds control to a store node via the same proj node
       
  1392   //
       
  1393   // iii) the store is ordered release
       
  1394   //
       
  1395   // iv) the release membar (or its dependent cpuorder membar) feeds
       
  1396   // memory to a merge mem and to the same store (both via a single
       
  1397   // Memory proj node)
       
  1398   //
       
  1399   // v) the store outputs to the merge mem
       
  1400   //
       
  1401   // vi) the merge mem outputs to the volatile membar
       
  1402   //
       
  1403   // n.b. for an inlined unsafe store of an object in the case where
       
  1404   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
       
  1405   // an embedded if then else where we expect the store. this is
       
  1406   // needed to do the right type of store depending on whether
       
  1407   // heap_base is NULL. We could check for that but for now we can
       
  1408   // just take the hit of on inserting a redundant dmb for this
       
  1409   // redundant volatile membar
       
  1410 
       
  1411   MemBarNode *mbvol = n->as_MemBar();
       
  1412   Node *x = n->lookup(TypeFunc::Control);
       
  1413 
       
  1414   if (! x || !x->is_Proj())
       
  1415     return false;
       
  1416 
       
  1417   ProjNode *proj = x->as_Proj();
       
  1418 
       
  1419   x = proj->lookup(0);
       
  1420 
       
  1421   if (!x || !x->is_MemBar())
       
  1422     return false;
       
  1423 
       
  1424   MemBarNode *barrier = x->as_MemBar();
       
  1425 
       
  1426   // if the barrier is a release membar we have what we want. if it is
       
  1427   // a cpuorder membar then we need to ensure that it is fed by a
       
  1428   // release membar in which case we proceed to check the graph below
       
  1429   // this cpuorder membar as the feed
       
  1430 
       
  1431   if (x->Opcode() != Op_MemBarRelease) {
       
  1432     if (x->Opcode() != Op_MemBarCPUOrder)
       
  1433       return false;
       
  1434     ProjNode *ctl;
       
  1435     ProjNode *mem;
       
  1436     MemBarNode *b = has_parent_membar(x, ctl, mem);
       
  1437     if (!b || !b->Opcode() == Op_MemBarRelease)
       
  1438       return false;
       
  1439   }
       
  1440 
       
  1441   ProjNode *ctl = barrier->proj_out(TypeFunc::Control);
       
  1442   ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
       
  1443 
       
  1444   // barrier needs to have both a Ctl and Mem projection
       
  1445   // and we need to have reached it via the Ctl projection
       
  1446   if (! ctl || ! mem || ctl != proj)
       
  1447     return false;
       
  1448 
       
  1449   StoreNode * st = NULL;
       
  1450 
       
  1451   // The Ctl ProjNode should have output to a MemBarVolatile and
       
  1452   // a Store marked as releasing
       
  1453   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
       
  1454     x = ctl->fast_out(i);
       
  1455     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
       
  1456       if (x != mbvol) {
       
  1457 	return false;
       
  1458       }
       
  1459     } else if (x->is_Store()) {
       
  1460       st = x->as_Store();
       
  1461       if (! st->is_release()) {
       
  1462 	return false;
       
  1463       }
       
  1464     } else if (!x->is_Mach()){
       
  1465       // we may see mach nodes added during matching but nothing else
       
  1466       return false;
       
  1467     }
       
  1468   }
       
  1469 
       
  1470   if (!st)
       
  1471     return false;
       
  1472 
       
  1473   // the Mem ProjNode should output to a MergeMem and the same Store
       
  1474   Node *mm = NULL;
       
  1475   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
       
  1476     x = mem->fast_out(i);
       
  1477     if (!mm && x->is_MergeMem()) {
       
  1478       mm = x;
       
  1479     } else if (x != st && !x->is_Mach()) {
       
  1480       // we may see mach nodes added during matching but nothing else
       
  1481       return false;
       
  1482     }
       
  1483   }
       
  1484 
       
  1485   if (!mm)
       
  1486     return false;
       
  1487 
       
  1488   // the MergeMem should output to the MemBarVolatile
       
  1489   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
       
  1490     x = mm->fast_out(i);
       
  1491     if (x != mbvol && !x->is_Mach()) {
       
  1492       // we may see mach nodes added during matching but nothing else
       
  1493       return false;
       
  1494     }
       
  1495   }
       
  1496 
       
  1497   return true;
       
  1498 }
       
  1499 
       
  1500 
       
  1501 
       
  1502 bool needs_releasing_store(const Node *n)
       
  1503 {
       
  1504   // assert n->is_Store();
       
  1505   if (UseBarriersForVolatile)
       
  1506     // we use a normal store and dmb combination
       
  1507     return false;
       
  1508 
       
  1509   StoreNode *st = n->as_Store();
       
  1510 
       
  1511   if (!st->is_release())
       
  1512     return false;
       
  1513 
       
  1514   // check if this store is bracketed by a release (or its dependent
       
  1515   // cpuorder membar) and a volatile membar
       
  1516   //
       
  1517   //   MemBarRelease
       
  1518   //  {      ||      }
       
  1519   //  {MemBarCPUOrder} -- optional
       
  1520   //         ||     \\
       
  1521   //         ||     StoreX[mo_release]
       
  1522   //         | \     /
       
  1523   //         | MergeMem
       
  1524   //         | /
       
  1525   //   MemBarVolatile
       
  1526   //
       
  1527   // where
       
  1528   //  || and \\ represent Ctl and Mem feeds via Proj nodes
       
  1529   //  | \ and / indicate further routing of the Ctl and Mem feeds
       
  1530   // 
       
  1531 
       
  1532 
       
  1533   Node *x = st->lookup(TypeFunc::Control);
       
  1534 
       
  1535   if (! x || !x->is_Proj())
       
  1536     return false;
       
  1537 
       
  1538   ProjNode *proj = x->as_Proj();
       
  1539 
       
  1540   x = proj->lookup(0);
       
  1541 
       
  1542   if (!x || !x->is_MemBar())
       
  1543     return false;
       
  1544 
       
  1545   MemBarNode *barrier = x->as_MemBar();
       
  1546 
       
  1547   // if the barrier is a release membar we have what we want. if it is
       
  1548   // a cpuorder membar then we need to ensure that it is fed by a
       
  1549   // release membar in which case we proceed to check the graph below
       
  1550   // this cpuorder membar as the feed
       
  1551 
       
  1552   if (x->Opcode() != Op_MemBarRelease) {
       
  1553     if (x->Opcode() != Op_MemBarCPUOrder)
       
  1554       return false;
       
  1555     Node *ctl = x->lookup(TypeFunc::Control);
       
  1556     Node *mem = x->lookup(TypeFunc::Memory);
       
  1557     if (!ctl || !ctl->is_Proj() || !mem || !mem->is_Proj())
       
  1558       return false;
       
  1559     x = ctl->lookup(0);
       
  1560     if (!x || !x->is_MemBar() || !x->Opcode() == Op_MemBarRelease)
       
  1561       return false;
       
  1562     Node *y = mem->lookup(0);
       
  1563     if (!y || y != x)
       
  1564       return false;
       
  1565   }
       
  1566 
       
  1567   ProjNode *ctl = barrier->proj_out(TypeFunc::Control);
       
  1568   ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
       
  1569 
       
  1570   // MemBarRelease needs to have both a Ctl and Mem projection
       
  1571   // and we need to have reached it via the Ctl projection
       
  1572   if (! ctl || ! mem || ctl != proj)
       
  1573     return false;
       
  1574 
       
  1575   MemBarNode *mbvol = NULL;
       
  1576 
       
  1577   // The Ctl ProjNode should have output to a MemBarVolatile and
       
  1578   // a Store marked as releasing
       
  1579   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
       
  1580     x = ctl->fast_out(i);
       
  1581     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
       
  1582       mbvol = x->as_MemBar();
       
  1583     } else if (x->is_Store()) {
       
  1584       if (x != st) {
       
  1585 	return false;
       
  1586       }
       
  1587     } else if (!x->is_Mach()){
       
  1588       return false;
       
  1589     }
       
  1590   }
       
  1591 
       
  1592   if (!mbvol)
       
  1593     return false;
       
  1594 
       
  1595   // the Mem ProjNode should output to a MergeMem and the same Store
       
  1596   Node *mm = NULL;
       
  1597   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
       
  1598     x = mem->fast_out(i);
       
  1599     if (!mm && x->is_MergeMem()) {
       
  1600       mm = x;
       
  1601     } else if (x != st && !x->is_Mach()) {
       
  1602       return false;
       
  1603     }
       
  1604   }
       
  1605 
       
  1606   if (!mm)
       
  1607     return false;
       
  1608 
       
  1609   // the MergeMem should output to the MemBarVolatile
       
  1610   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
       
  1611     x = mm->fast_out(i);
       
  1612     if (x != mbvol && !x->is_Mach()) {
       
  1613       return false;
       
  1614     }
       
  1615   }
       
  1616 
       
  1617   return true;
       
  1618 }
       
  1619 
       
  1620 
   827 
  1621 
   828 #define __ _masm.
  1622 #define __ _masm.
   829 
  1623 
   830 // advance declarations for helper functions to convert register
  1624 // advance declarations for helper functions to convert register
   831 // indices to register objects
  1625 // indices to register objects
  5149 
  5943 
  5150 // Load Byte (8 bit signed)
  5944 // Load Byte (8 bit signed)
  5151 instruct loadB(iRegINoSp dst, memory mem)
  5945 instruct loadB(iRegINoSp dst, memory mem)
  5152 %{
  5946 %{
  5153   match(Set dst (LoadB mem));
  5947   match(Set dst (LoadB mem));
  5154   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  5948   predicate(!needs_acquiring_load(n));
  5155 
  5949 
  5156   ins_cost(4 * INSN_COST);
  5950   ins_cost(4 * INSN_COST);
  5157   format %{ "ldrsbw  $dst, $mem\t# byte" %}
  5951   format %{ "ldrsbw  $dst, $mem\t# byte" %}
  5158 
  5952 
  5159   ins_encode(aarch64_enc_ldrsbw(dst, mem));
  5953   ins_encode(aarch64_enc_ldrsbw(dst, mem));
  5163 
  5957 
  5164 // Load Byte (8 bit signed) into long
  5958 // Load Byte (8 bit signed) into long
  5165 instruct loadB2L(iRegLNoSp dst, memory mem)
  5959 instruct loadB2L(iRegLNoSp dst, memory mem)
  5166 %{
  5960 %{
  5167   match(Set dst (ConvI2L (LoadB mem)));
  5961   match(Set dst (ConvI2L (LoadB mem)));
  5168   predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered());
  5962   predicate(!needs_acquiring_load(n->in(1)));
  5169 
  5963 
  5170   ins_cost(4 * INSN_COST);
  5964   ins_cost(4 * INSN_COST);
  5171   format %{ "ldrsb  $dst, $mem\t# byte" %}
  5965   format %{ "ldrsb  $dst, $mem\t# byte" %}
  5172 
  5966 
  5173   ins_encode(aarch64_enc_ldrsb(dst, mem));
  5967   ins_encode(aarch64_enc_ldrsb(dst, mem));
  5177 
  5971 
  5178 // Load Byte (8 bit unsigned)
  5972 // Load Byte (8 bit unsigned)
  5179 instruct loadUB(iRegINoSp dst, memory mem)
  5973 instruct loadUB(iRegINoSp dst, memory mem)
  5180 %{
  5974 %{
  5181   match(Set dst (LoadUB mem));
  5975   match(Set dst (LoadUB mem));
  5182   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  5976   predicate(!needs_acquiring_load(n));
  5183 
  5977 
  5184   ins_cost(4 * INSN_COST);
  5978   ins_cost(4 * INSN_COST);
  5185   format %{ "ldrbw  $dst, $mem\t# byte" %}
  5979   format %{ "ldrbw  $dst, $mem\t# byte" %}
  5186 
  5980 
  5187   ins_encode(aarch64_enc_ldrb(dst, mem));
  5981   ins_encode(aarch64_enc_ldrb(dst, mem));
  5191 
  5985 
  5192 // Load Byte (8 bit unsigned) into long
  5986 // Load Byte (8 bit unsigned) into long
  5193 instruct loadUB2L(iRegLNoSp dst, memory mem)
  5987 instruct loadUB2L(iRegLNoSp dst, memory mem)
  5194 %{
  5988 %{
  5195   match(Set dst (ConvI2L (LoadUB mem)));
  5989   match(Set dst (ConvI2L (LoadUB mem)));
  5196   predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered());
  5990   predicate(!needs_acquiring_load(n->in(1)));
  5197 
  5991 
  5198   ins_cost(4 * INSN_COST);
  5992   ins_cost(4 * INSN_COST);
  5199   format %{ "ldrb  $dst, $mem\t# byte" %}
  5993   format %{ "ldrb  $dst, $mem\t# byte" %}
  5200 
  5994 
  5201   ins_encode(aarch64_enc_ldrb(dst, mem));
  5995   ins_encode(aarch64_enc_ldrb(dst, mem));
  5205 
  5999 
  5206 // Load Short (16 bit signed)
  6000 // Load Short (16 bit signed)
  5207 instruct loadS(iRegINoSp dst, memory mem)
  6001 instruct loadS(iRegINoSp dst, memory mem)
  5208 %{
  6002 %{
  5209   match(Set dst (LoadS mem));
  6003   match(Set dst (LoadS mem));
  5210   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  6004   predicate(!needs_acquiring_load(n));
  5211 
  6005 
  5212   ins_cost(4 * INSN_COST);
  6006   ins_cost(4 * INSN_COST);
  5213   format %{ "ldrshw  $dst, $mem\t# short" %}
  6007   format %{ "ldrshw  $dst, $mem\t# short" %}
  5214 
  6008 
  5215   ins_encode(aarch64_enc_ldrshw(dst, mem));
  6009   ins_encode(aarch64_enc_ldrshw(dst, mem));
  5219 
  6013 
  5220 // Load Short (16 bit signed) into long
  6014 // Load Short (16 bit signed) into long
  5221 instruct loadS2L(iRegLNoSp dst, memory mem)
  6015 instruct loadS2L(iRegLNoSp dst, memory mem)
  5222 %{
  6016 %{
  5223   match(Set dst (ConvI2L (LoadS mem)));
  6017   match(Set dst (ConvI2L (LoadS mem)));
  5224   predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered());
  6018   predicate(!needs_acquiring_load(n->in(1)));
  5225 
  6019 
  5226   ins_cost(4 * INSN_COST);
  6020   ins_cost(4 * INSN_COST);
  5227   format %{ "ldrsh  $dst, $mem\t# short" %}
  6021   format %{ "ldrsh  $dst, $mem\t# short" %}
  5228 
  6022 
  5229   ins_encode(aarch64_enc_ldrsh(dst, mem));
  6023   ins_encode(aarch64_enc_ldrsh(dst, mem));
  5233 
  6027 
  5234 // Load Char (16 bit unsigned)
  6028 // Load Char (16 bit unsigned)
  5235 instruct loadUS(iRegINoSp dst, memory mem)
  6029 instruct loadUS(iRegINoSp dst, memory mem)
  5236 %{
  6030 %{
  5237   match(Set dst (LoadUS mem));
  6031   match(Set dst (LoadUS mem));
  5238   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  6032   predicate(!needs_acquiring_load(n));
  5239 
  6033 
  5240   ins_cost(4 * INSN_COST);
  6034   ins_cost(4 * INSN_COST);
  5241   format %{ "ldrh  $dst, $mem\t# short" %}
  6035   format %{ "ldrh  $dst, $mem\t# short" %}
  5242 
  6036 
  5243   ins_encode(aarch64_enc_ldrh(dst, mem));
  6037   ins_encode(aarch64_enc_ldrh(dst, mem));
  5247 
  6041 
  5248 // Load Short/Char (16 bit unsigned) into long
  6042 // Load Short/Char (16 bit unsigned) into long
  5249 instruct loadUS2L(iRegLNoSp dst, memory mem)
  6043 instruct loadUS2L(iRegLNoSp dst, memory mem)
  5250 %{
  6044 %{
  5251   match(Set dst (ConvI2L (LoadUS mem)));
  6045   match(Set dst (ConvI2L (LoadUS mem)));
  5252   predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered());
  6046   predicate(!needs_acquiring_load(n->in(1)));
  5253 
  6047 
  5254   ins_cost(4 * INSN_COST);
  6048   ins_cost(4 * INSN_COST);
  5255   format %{ "ldrh  $dst, $mem\t# short" %}
  6049   format %{ "ldrh  $dst, $mem\t# short" %}
  5256 
  6050 
  5257   ins_encode(aarch64_enc_ldrh(dst, mem));
  6051   ins_encode(aarch64_enc_ldrh(dst, mem));
  5261 
  6055 
  5262 // Load Integer (32 bit signed)
  6056 // Load Integer (32 bit signed)
  5263 instruct loadI(iRegINoSp dst, memory mem)
  6057 instruct loadI(iRegINoSp dst, memory mem)
  5264 %{
  6058 %{
  5265   match(Set dst (LoadI mem));
  6059   match(Set dst (LoadI mem));
  5266   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  6060   predicate(!needs_acquiring_load(n));
  5267 
  6061 
  5268   ins_cost(4 * INSN_COST);
  6062   ins_cost(4 * INSN_COST);
  5269   format %{ "ldrw  $dst, $mem\t# int" %}
  6063   format %{ "ldrw  $dst, $mem\t# int" %}
  5270 
  6064 
  5271   ins_encode(aarch64_enc_ldrw(dst, mem));
  6065   ins_encode(aarch64_enc_ldrw(dst, mem));
  5275 
  6069 
  5276 // Load Integer (32 bit signed) into long
  6070 // Load Integer (32 bit signed) into long
  5277 instruct loadI2L(iRegLNoSp dst, memory mem)
  6071 instruct loadI2L(iRegLNoSp dst, memory mem)
  5278 %{
  6072 %{
  5279   match(Set dst (ConvI2L (LoadI mem)));
  6073   match(Set dst (ConvI2L (LoadI mem)));
  5280   predicate(UseBarriersForVolatile || n->in(1)->as_Load()->is_unordered());
  6074   predicate(!needs_acquiring_load(n->in(1)));
  5281 
  6075 
  5282   ins_cost(4 * INSN_COST);
  6076   ins_cost(4 * INSN_COST);
  5283   format %{ "ldrsw  $dst, $mem\t# int" %}
  6077   format %{ "ldrsw  $dst, $mem\t# int" %}
  5284 
  6078 
  5285   ins_encode(aarch64_enc_ldrsw(dst, mem));
  6079   ins_encode(aarch64_enc_ldrsw(dst, mem));
  5289 
  6083 
  5290 // Load Integer (32 bit unsigned) into long
  6084 // Load Integer (32 bit unsigned) into long
  5291 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
  6085 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
  5292 %{
  6086 %{
  5293   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
  6087   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
  5294   predicate(UseBarriersForVolatile || n->in(1)->in(1)->as_Load()->is_unordered());
  6088   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
  5295 
  6089 
  5296   ins_cost(4 * INSN_COST);
  6090   ins_cost(4 * INSN_COST);
  5297   format %{ "ldrw  $dst, $mem\t# int" %}
  6091   format %{ "ldrw  $dst, $mem\t# int" %}
  5298 
  6092 
  5299   ins_encode(aarch64_enc_ldrw(dst, mem));
  6093   ins_encode(aarch64_enc_ldrw(dst, mem));
  5303 
  6097 
  5304 // Load Long (64 bit signed)
  6098 // Load Long (64 bit signed)
  5305 instruct loadL(iRegLNoSp dst, memory mem)
  6099 instruct loadL(iRegLNoSp dst, memory mem)
  5306 %{
  6100 %{
  5307   match(Set dst (LoadL mem));
  6101   match(Set dst (LoadL mem));
  5308   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  6102   predicate(!needs_acquiring_load(n));
  5309 
  6103 
  5310   ins_cost(4 * INSN_COST);
  6104   ins_cost(4 * INSN_COST);
  5311   format %{ "ldr  $dst, $mem\t# int" %}
  6105   format %{ "ldr  $dst, $mem\t# int" %}
  5312 
  6106 
  5313   ins_encode(aarch64_enc_ldr(dst, mem));
  6107   ins_encode(aarch64_enc_ldr(dst, mem));
  5330 
  6124 
  5331 // Load Pointer
  6125 // Load Pointer
  5332 instruct loadP(iRegPNoSp dst, memory mem)
  6126 instruct loadP(iRegPNoSp dst, memory mem)
  5333 %{
  6127 %{
  5334   match(Set dst (LoadP mem));
  6128   match(Set dst (LoadP mem));
  5335   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  6129   predicate(!needs_acquiring_load(n));
  5336 
  6130 
  5337   ins_cost(4 * INSN_COST);
  6131   ins_cost(4 * INSN_COST);
  5338   format %{ "ldr  $dst, $mem\t# ptr" %}
  6132   format %{ "ldr  $dst, $mem\t# ptr" %}
  5339 
  6133 
  5340   ins_encode(aarch64_enc_ldr(dst, mem));
  6134   ins_encode(aarch64_enc_ldr(dst, mem));
  5344 
  6138 
  5345 // Load Compressed Pointer
  6139 // Load Compressed Pointer
  5346 instruct loadN(iRegNNoSp dst, memory mem)
  6140 instruct loadN(iRegNNoSp dst, memory mem)
  5347 %{
  6141 %{
  5348   match(Set dst (LoadN mem));
  6142   match(Set dst (LoadN mem));
  5349   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  6143   predicate(!needs_acquiring_load(n));
  5350 
  6144 
  5351   ins_cost(4 * INSN_COST);
  6145   ins_cost(4 * INSN_COST);
  5352   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
  6146   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
  5353 
  6147 
  5354   ins_encode(aarch64_enc_ldrw(dst, mem));
  6148   ins_encode(aarch64_enc_ldrw(dst, mem));
  5358 
  6152 
  5359 // Load Klass Pointer
  6153 // Load Klass Pointer
  5360 instruct loadKlass(iRegPNoSp dst, memory mem)
  6154 instruct loadKlass(iRegPNoSp dst, memory mem)
  5361 %{
  6155 %{
  5362   match(Set dst (LoadKlass mem));
  6156   match(Set dst (LoadKlass mem));
  5363   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  6157   predicate(!needs_acquiring_load(n));
  5364 
  6158 
  5365   ins_cost(4 * INSN_COST);
  6159   ins_cost(4 * INSN_COST);
  5366   format %{ "ldr  $dst, $mem\t# class" %}
  6160   format %{ "ldr  $dst, $mem\t# class" %}
  5367 
  6161 
  5368   ins_encode(aarch64_enc_ldr(dst, mem));
  6162   ins_encode(aarch64_enc_ldr(dst, mem));
  5372 
  6166 
  5373 // Load Narrow Klass Pointer
  6167 // Load Narrow Klass Pointer
  5374 instruct loadNKlass(iRegNNoSp dst, memory mem)
  6168 instruct loadNKlass(iRegNNoSp dst, memory mem)
  5375 %{
  6169 %{
  5376   match(Set dst (LoadNKlass mem));
  6170   match(Set dst (LoadNKlass mem));
  5377   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  6171   predicate(!needs_acquiring_load(n));
  5378 
  6172 
  5379   ins_cost(4 * INSN_COST);
  6173   ins_cost(4 * INSN_COST);
  5380   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
  6174   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
  5381 
  6175 
  5382   ins_encode(aarch64_enc_ldrw(dst, mem));
  6176   ins_encode(aarch64_enc_ldrw(dst, mem));
  5386 
  6180 
  5387 // Load Float
  6181 // Load Float
  5388 instruct loadF(vRegF dst, memory mem)
  6182 instruct loadF(vRegF dst, memory mem)
  5389 %{
  6183 %{
  5390   match(Set dst (LoadF mem));
  6184   match(Set dst (LoadF mem));
  5391   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  6185   predicate(!needs_acquiring_load(n));
  5392 
  6186 
  5393   ins_cost(4 * INSN_COST);
  6187   ins_cost(4 * INSN_COST);
  5394   format %{ "ldrs  $dst, $mem\t# float" %}
  6188   format %{ "ldrs  $dst, $mem\t# float" %}
  5395 
  6189 
  5396   ins_encode( aarch64_enc_ldrs(dst, mem) );
  6190   ins_encode( aarch64_enc_ldrs(dst, mem) );
  5400 
  6194 
  5401 // Load Double
  6195 // Load Double
  5402 instruct loadD(vRegD dst, memory mem)
  6196 instruct loadD(vRegD dst, memory mem)
  5403 %{
  6197 %{
  5404   match(Set dst (LoadD mem));
  6198   match(Set dst (LoadD mem));
  5405   predicate(UseBarriersForVolatile || n->as_Load()->is_unordered());
  6199   predicate(!needs_acquiring_load(n));
  5406 
  6200 
  5407   ins_cost(4 * INSN_COST);
  6201   ins_cost(4 * INSN_COST);
  5408   format %{ "ldrd  $dst, $mem\t# double" %}
  6202   format %{ "ldrd  $dst, $mem\t# double" %}
  5409 
  6203 
  5410   ins_encode( aarch64_enc_ldrd(dst, mem) );
  6204   ins_encode( aarch64_enc_ldrd(dst, mem) );
  5631 
  6425 
  5632 // Store Byte
  6426 // Store Byte
  5633 instruct storeB(iRegIorL2I src, memory mem)
  6427 instruct storeB(iRegIorL2I src, memory mem)
  5634 %{
  6428 %{
  5635   match(Set mem (StoreB mem src));
  6429   match(Set mem (StoreB mem src));
  5636   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6430   predicate(!needs_releasing_store(n));
  5637 
  6431 
  5638   ins_cost(INSN_COST);
  6432   ins_cost(INSN_COST);
  5639   format %{ "strb  $src, $mem\t# byte" %}
  6433   format %{ "strb  $src, $mem\t# byte" %}
  5640 
  6434 
  5641   ins_encode(aarch64_enc_strb(src, mem));
  6435   ins_encode(aarch64_enc_strb(src, mem));
  5645 
  6439 
  5646 
  6440 
  5647 instruct storeimmB0(immI0 zero, memory mem)
  6441 instruct storeimmB0(immI0 zero, memory mem)
  5648 %{
  6442 %{
  5649   match(Set mem (StoreB mem zero));
  6443   match(Set mem (StoreB mem zero));
  5650   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6444   predicate(!needs_releasing_store(n));
  5651 
  6445 
  5652   ins_cost(INSN_COST);
  6446   ins_cost(INSN_COST);
  5653   format %{ "strb zr, $mem\t# byte" %}
  6447   format %{ "strb zr, $mem\t# byte" %}
  5654 
  6448 
  5655   ins_encode(aarch64_enc_strb0(mem));
  6449   ins_encode(aarch64_enc_strb0(mem));
  5659 
  6453 
  5660 // Store Char/Short
  6454 // Store Char/Short
  5661 instruct storeC(iRegIorL2I src, memory mem)
  6455 instruct storeC(iRegIorL2I src, memory mem)
  5662 %{
  6456 %{
  5663   match(Set mem (StoreC mem src));
  6457   match(Set mem (StoreC mem src));
  5664   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6458   predicate(!needs_releasing_store(n));
  5665 
  6459 
  5666   ins_cost(INSN_COST);
  6460   ins_cost(INSN_COST);
  5667   format %{ "strh  $src, $mem\t# short" %}
  6461   format %{ "strh  $src, $mem\t# short" %}
  5668 
  6462 
  5669   ins_encode(aarch64_enc_strh(src, mem));
  6463   ins_encode(aarch64_enc_strh(src, mem));
  5672 %}
  6466 %}
  5673 
  6467 
  5674 instruct storeimmC0(immI0 zero, memory mem)
  6468 instruct storeimmC0(immI0 zero, memory mem)
  5675 %{
  6469 %{
  5676   match(Set mem (StoreC mem zero));
  6470   match(Set mem (StoreC mem zero));
  5677   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6471   predicate(!needs_releasing_store(n));
  5678 
  6472 
  5679   ins_cost(INSN_COST);
  6473   ins_cost(INSN_COST);
  5680   format %{ "strh  zr, $mem\t# short" %}
  6474   format %{ "strh  zr, $mem\t# short" %}
  5681 
  6475 
  5682   ins_encode(aarch64_enc_strh0(mem));
  6476   ins_encode(aarch64_enc_strh0(mem));
  5687 // Store Integer
  6481 // Store Integer
  5688 
  6482 
  5689 instruct storeI(iRegIorL2I src, memory mem)
  6483 instruct storeI(iRegIorL2I src, memory mem)
  5690 %{
  6484 %{
  5691   match(Set mem(StoreI mem src));
  6485   match(Set mem(StoreI mem src));
  5692   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6486   predicate(!needs_releasing_store(n));
  5693 
  6487 
  5694   ins_cost(INSN_COST);
  6488   ins_cost(INSN_COST);
  5695   format %{ "strw  $src, $mem\t# int" %}
  6489   format %{ "strw  $src, $mem\t# int" %}
  5696 
  6490 
  5697   ins_encode(aarch64_enc_strw(src, mem));
  6491   ins_encode(aarch64_enc_strw(src, mem));
  5700 %}
  6494 %}
  5701 
  6495 
  5702 instruct storeimmI0(immI0 zero, memory mem)
  6496 instruct storeimmI0(immI0 zero, memory mem)
  5703 %{
  6497 %{
  5704   match(Set mem(StoreI mem zero));
  6498   match(Set mem(StoreI mem zero));
  5705   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6499   predicate(!needs_releasing_store(n));
  5706 
  6500 
  5707   ins_cost(INSN_COST);
  6501   ins_cost(INSN_COST);
  5708   format %{ "strw  zr, $mem\t# int" %}
  6502   format %{ "strw  zr, $mem\t# int" %}
  5709 
  6503 
  5710   ins_encode(aarch64_enc_strw0(mem));
  6504   ins_encode(aarch64_enc_strw0(mem));
  5714 
  6508 
  5715 // Store Long (64 bit signed)
  6509 // Store Long (64 bit signed)
  5716 instruct storeL(iRegL src, memory mem)
  6510 instruct storeL(iRegL src, memory mem)
  5717 %{
  6511 %{
  5718   match(Set mem (StoreL mem src));
  6512   match(Set mem (StoreL mem src));
  5719   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6513   predicate(!needs_releasing_store(n));
  5720 
  6514 
  5721   ins_cost(INSN_COST);
  6515   ins_cost(INSN_COST);
  5722   format %{ "str  $src, $mem\t# int" %}
  6516   format %{ "str  $src, $mem\t# int" %}
  5723 
  6517 
  5724   ins_encode(aarch64_enc_str(src, mem));
  6518   ins_encode(aarch64_enc_str(src, mem));
  5728 
  6522 
  5729 // Store Long (64 bit signed)
  6523 // Store Long (64 bit signed)
  5730 instruct storeimmL0(immL0 zero, memory mem)
  6524 instruct storeimmL0(immL0 zero, memory mem)
  5731 %{
  6525 %{
  5732   match(Set mem (StoreL mem zero));
  6526   match(Set mem (StoreL mem zero));
  5733   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6527   predicate(!needs_releasing_store(n));
  5734 
  6528 
  5735   ins_cost(INSN_COST);
  6529   ins_cost(INSN_COST);
  5736   format %{ "str  zr, $mem\t# int" %}
  6530   format %{ "str  zr, $mem\t# int" %}
  5737 
  6531 
  5738   ins_encode(aarch64_enc_str0(mem));
  6532   ins_encode(aarch64_enc_str0(mem));
  5742 
  6536 
  5743 // Store Pointer
  6537 // Store Pointer
  5744 instruct storeP(iRegP src, memory mem)
  6538 instruct storeP(iRegP src, memory mem)
  5745 %{
  6539 %{
  5746   match(Set mem (StoreP mem src));
  6540   match(Set mem (StoreP mem src));
  5747   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6541   predicate(!needs_releasing_store(n));
  5748 
  6542 
  5749   ins_cost(INSN_COST);
  6543   ins_cost(INSN_COST);
  5750   format %{ "str  $src, $mem\t# ptr" %}
  6544   format %{ "str  $src, $mem\t# ptr" %}
  5751 
  6545 
  5752   ins_encode(aarch64_enc_str(src, mem));
  6546   ins_encode(aarch64_enc_str(src, mem));
  5756 
  6550 
  5757 // Store Pointer
  6551 // Store Pointer
  5758 instruct storeimmP0(immP0 zero, memory mem)
  6552 instruct storeimmP0(immP0 zero, memory mem)
  5759 %{
  6553 %{
  5760   match(Set mem (StoreP mem zero));
  6554   match(Set mem (StoreP mem zero));
  5761   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6555   predicate(!needs_releasing_store(n));
  5762 
  6556 
  5763   ins_cost(INSN_COST);
  6557   ins_cost(INSN_COST);
  5764   format %{ "str zr, $mem\t# ptr" %}
  6558   format %{ "str zr, $mem\t# ptr" %}
  5765 
  6559 
  5766   ins_encode(aarch64_enc_str0(mem));
  6560   ins_encode(aarch64_enc_str0(mem));
  5770 
  6564 
  5771 // Store Compressed Pointer
  6565 // Store Compressed Pointer
  5772 instruct storeN(iRegN src, memory mem)
  6566 instruct storeN(iRegN src, memory mem)
  5773 %{
  6567 %{
  5774   match(Set mem (StoreN mem src));
  6568   match(Set mem (StoreN mem src));
  5775   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6569   predicate(!needs_releasing_store(n));
  5776 
  6570 
  5777   ins_cost(INSN_COST);
  6571   ins_cost(INSN_COST);
  5778   format %{ "strw  $src, $mem\t# compressed ptr" %}
  6572   format %{ "strw  $src, $mem\t# compressed ptr" %}
  5779 
  6573 
  5780   ins_encode(aarch64_enc_strw(src, mem));
  6574   ins_encode(aarch64_enc_strw(src, mem));
  5785 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
  6579 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
  5786 %{
  6580 %{
  5787   match(Set mem (StoreN mem zero));
  6581   match(Set mem (StoreN mem zero));
  5788   predicate(Universe::narrow_oop_base() == NULL &&
  6582   predicate(Universe::narrow_oop_base() == NULL &&
  5789             Universe::narrow_klass_base() == NULL &&
  6583             Universe::narrow_klass_base() == NULL &&
  5790             (UseBarriersForVolatile || n->as_Store()->is_unordered()));
  6584             (!needs_releasing_store(n)));
  5791 
  6585 
  5792   ins_cost(INSN_COST);
  6586   ins_cost(INSN_COST);
  5793   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
  6587   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
  5794 
  6588 
  5795   ins_encode(aarch64_enc_strw(heapbase, mem));
  6589   ins_encode(aarch64_enc_strw(heapbase, mem));
  5799 
  6593 
  5800 // Store Float
  6594 // Store Float
  5801 instruct storeF(vRegF src, memory mem)
  6595 instruct storeF(vRegF src, memory mem)
  5802 %{
  6596 %{
  5803   match(Set mem (StoreF mem src));
  6597   match(Set mem (StoreF mem src));
  5804   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6598   predicate(!needs_releasing_store(n));
  5805 
  6599 
  5806   ins_cost(INSN_COST);
  6600   ins_cost(INSN_COST);
  5807   format %{ "strs  $src, $mem\t# float" %}
  6601   format %{ "strs  $src, $mem\t# float" %}
  5808 
  6602 
  5809   ins_encode( aarch64_enc_strs(src, mem) );
  6603   ins_encode( aarch64_enc_strs(src, mem) );
  5816 
  6610 
  5817 // Store Double
  6611 // Store Double
  5818 instruct storeD(vRegD src, memory mem)
  6612 instruct storeD(vRegD src, memory mem)
  5819 %{
  6613 %{
  5820   match(Set mem (StoreD mem src));
  6614   match(Set mem (StoreD mem src));
  5821   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6615   predicate(!needs_releasing_store(n));
  5822 
  6616 
  5823   ins_cost(INSN_COST);
  6617   ins_cost(INSN_COST);
  5824   format %{ "strd  $src, $mem\t# double" %}
  6618   format %{ "strd  $src, $mem\t# double" %}
  5825 
  6619 
  5826   ins_encode( aarch64_enc_strd(src, mem) );
  6620   ins_encode( aarch64_enc_strd(src, mem) );
  5829 %}
  6623 %}
  5830 
  6624 
  5831 // Store Compressed Klass Pointer
  6625 // Store Compressed Klass Pointer
  5832 instruct storeNKlass(iRegN src, memory mem)
  6626 instruct storeNKlass(iRegN src, memory mem)
  5833 %{
  6627 %{
  5834   predicate(UseBarriersForVolatile || n->as_Store()->is_unordered());
  6628   predicate(!needs_releasing_store(n));
  5835   match(Set mem (StoreNKlass mem src));
  6629   match(Set mem (StoreNKlass mem src));
  5836 
  6630 
  5837   ins_cost(INSN_COST);
  6631   ins_cost(INSN_COST);
  5838   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
  6632   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
  5839 
  6633 
  6291   %}
  7085   %}
  6292   ins_pipe(pipe_serial);
  7086   ins_pipe(pipe_serial);
  6293 %}
  7087 %}
  6294 
  7088 
  6295 instruct unnecessary_membar_acquire() %{
  7089 instruct unnecessary_membar_acquire() %{
  6296   predicate(! UseBarriersForVolatile && preceded_by_ordered_load(n));
  7090   predicate(unnecessary_acquire(n));
  6297   match(MemBarAcquire);
  7091   match(MemBarAcquire);
  6298   ins_cost(0);
  7092   ins_cost(0);
  6299 
  7093 
  6300   format %{ "membar_acquire (elided)" %}
  7094   format %{ "membar_acquire (elided)" %}
  6301 
  7095 
  6343     __ membar(Assembler::LoadStore|Assembler::StoreStore);
  7137     __ membar(Assembler::LoadStore|Assembler::StoreStore);
  6344   %}
  7138   %}
  6345   ins_pipe(pipe_serial);
  7139   ins_pipe(pipe_serial);
  6346 %}
  7140 %}
  6347 
  7141 
       
  7142 instruct unnecessary_membar_release() %{
       
  7143   predicate(unnecessary_release(n));
       
  7144   match(MemBarRelease);
       
  7145   ins_cost(0);
       
  7146 
       
  7147   format %{ "membar_release (elided)" %}
       
  7148 
       
  7149   ins_encode %{
       
  7150     __ block_comment("membar_release (elided)");
       
  7151   %}
       
  7152   ins_pipe(pipe_serial);
       
  7153 %}
       
  7154 
  6348 instruct membar_release() %{
  7155 instruct membar_release() %{
  6349   match(MemBarRelease);
  7156   match(MemBarRelease);
  6350   ins_cost(VOLATILE_REF_COST);
  7157   ins_cost(VOLATILE_REF_COST);
  6351 
  7158 
  6352   format %{ "membar_release" %}
  7159   format %{ "membar_release" %}
  6375 
  7182 
  6376   format %{ "membar_release_lock" %}
  7183   format %{ "membar_release_lock" %}
  6377 
  7184 
  6378   ins_encode %{
  7185   ins_encode %{
  6379     __ membar(Assembler::LoadStore|Assembler::StoreStore);
  7186     __ membar(Assembler::LoadStore|Assembler::StoreStore);
       
  7187   %}
       
  7188 
       
  7189   ins_pipe(pipe_serial);
       
  7190 %}
       
  7191 
       
  7192 instruct unnecessary_membar_volatile() %{
       
  7193   predicate(unnecessary_volatile(n));
       
  7194   match(MemBarVolatile);
       
  7195   ins_cost(0);
       
  7196 
       
  7197   format %{ "membar_volatile (elided)" %}
       
  7198 
       
  7199   ins_encode %{
       
  7200     __ block_comment("membar_volatile (elided)");
  6380   %}
  7201   %}
  6381 
  7202 
  6382   ins_pipe(pipe_serial);
  7203   ins_pipe(pipe_serial);
  6383 %}
  7204 %}
  6384 
  7205