791 // count one adr and one far branch instruction |
791 // count one adr and one far branch instruction |
792 return 4 * NativeInstruction::instruction_size; |
792 return 4 * NativeInstruction::instruction_size; |
793 } |
793 } |
794 }; |
794 }; |
795 |
795 |
796 bool preceded_by_ordered_load(const Node *barrier); |
796 // graph traversal helpers |
|
797 MemBarNode *has_parent_membar(const Node *n, |
|
798 ProjNode *&ctl, ProjNode *&mem); |
|
799 MemBarNode *has_child_membar(const MemBarNode *n, |
|
800 ProjNode *&ctl, ProjNode *&mem); |
|
801 |
|
802 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb |
|
803 bool unnecessary_acquire(const Node *barrier); |
|
804 bool needs_acquiring_load(const Node *load); |
|
805 |
|
806 // predicates controlling emit of str<x>/stlr<x> and associated dmbs |
|
807 bool unnecessary_release(const Node *barrier); |
|
808 bool unnecessary_volatile(const Node *barrier); |
|
809 bool needs_releasing_store(const Node *store); |
797 |
810 |
798 // Use barrier instructions rather than load acquire / store |
811 // Use barrier instructions rather than load acquire / store |
799 // release. |
812 // release. |
800 const bool UseBarriersForVolatile = true; |
813 const bool UseBarriersForVolatile = false; |
|
814 // Use barrier instructions for unsafe volatile gets rather than |
|
815 // trying to identify an exact signature for them |
|
816 const bool UseBarriersForUnsafeVolatileGet = false; |
801 %} |
817 %} |
802 |
818 |
803 source %{ |
819 source %{ |
804 |
820 |
805 // AArch64 has load acquire and store release instructions which we |
821 // AArch64 has ldar<x> and stlr<x> instructions which we can safely |
806 // use for ordered memory accesses, e.g. for volatiles. The ideal |
822 // use to implement volatile reads and writes. For a volatile read |
807 // graph generator also inserts memory barriers around volatile |
823 // we simply need |
808 // accesses, and we don't want to generate both barriers and acq/rel |
824 // |
809 // instructions. So, when we emit a MemBarAcquire we look back in |
825 // ldar<x> |
810 // the ideal graph for an ordered load and only emit the barrier if |
826 // |
811 // we don't find one. |
827 // and for a volatile write we need |
812 |
828 // |
813 bool preceded_by_ordered_load(const Node *barrier) { |
829 // stlr<x> |
|
830 // |
|
831 // Alternatively, we can implement them by pairing a normal |
|
832 // load/store with a memory barrier. For a volatile read we need |
|
833 // |
|
834 // ldr<x> |
|
835 // dmb ishld |
|
836 // |
|
837 // for a volatile write |
|
838 // |
|
839 // dmb ish |
|
840 // str<x> |
|
841 // dmb ish |
|
842 // |
|
843 // In order to generate the desired instruction sequence we need to |
|
844 // be able to identify specific 'signature' ideal graph node |
|
845 // sequences which i) occur as a translation of a volatile reads or |
|
846 // writes and ii) do not occur through any other translation or |
|
847 // graph transformation. We can then provide alternative aldc |
|
848 // matching rules which translate these node sequences to the |
|
849 // desired machine code sequences. Selection of the alternative |
|
850 // rules can be implemented by predicates which identify the |
|
851 // relevant node sequences. |
|
852 // |
|
853 // The ideal graph generator translates a volatile read to the node |
|
854 // sequence |
|
855 // |
|
856 // LoadX[mo_acquire] |
|
857 // MemBarAcquire |
|
858 // |
|
859 // As a special case when using the compressed oops optimization we |
|
860 // may also see this variant |
|
861 // |
|
862 // LoadN[mo_acquire] |
|
863 // DecodeN |
|
864 // MemBarAcquire |
|
865 // |
|
866 // A volatile write is translated to the node sequence |
|
867 // |
|
868 // MemBarRelease |
|
869 // StoreX[mo_release] |
|
870 // MemBarVolatile |
|
871 // |
|
872 // n.b. the above node patterns are generated with a strict |
|
873 // 'signature' configuration of input and output dependencies (see |
|
874 // the predicates below for exact details). The two signatures are |
|
875 // unique to translated volatile reads/stores -- they will not |
|
876 // appear as a result of any other bytecode translation or inlining |
|
877 // nor as a consequence of optimizing transforms. |
|
878 // |
|
879 // We also want to catch inlined unsafe volatile gets and puts and |
|
880 // be able to implement them using either ldar<x>/stlr<x> or some |
|
881 // combination of ldr<x>/stlr<x> and dmb instructions. |
|
882 // |
|
883 // Inlined unsafe volatiles puts manifest as a minor variant of the |
|
884 // normal volatile put node sequence containing an extra cpuorder |
|
885 // membar |
|
886 // |
|
887 // MemBarRelease |
|
888 // MemBarCPUOrder |
|
889 // StoreX[mo_release] |
|
890 // MemBarVolatile |
|
891 // |
|
892 // n.b. as an aside, the cpuorder membar is not itself subject to |
|
893 // matching and translation by adlc rules. However, the rule |
|
894 // predicates need to detect its presence in order to correctly |
|
895 // select the desired adlc rules. |
|
896 // |
|
897 // Inlined unsafe volatiles gets manifest as a somewhat different |
|
898 // node sequence to a normal volatile get |
|
899 // |
|
900 // MemBarCPUOrder |
|
901 // || \\ |
|
902 // MemBarAcquire LoadX[mo_acquire] |
|
903 // || |
|
904 // MemBarCPUOrder |
|
905 // |
|
906 // In this case the acquire membar does not directly depend on the |
|
907 // load. However, we can be sure that the load is generated from an |
|
908 // inlined unsafe volatile get if we see it dependent on this unique |
|
909 // sequence of membar nodes. Similarly, given an acquire membar we |
|
910 // can know that it was added because of an inlined unsafe volatile |
|
911 // get if it is fed and feeds a cpuorder membar and if its feed |
|
912 // membar also feeds an acquiring load. |
|
913 // |
|
914 // So, where we can identify these volatile read and write |
|
915 // signatures we can choose to plant either of the above two code |
|
916 // sequences. For a volatile read we can simply plant a normal |
|
917 // ldr<x> and translate the MemBarAcquire to a dmb. However, we can |
|
918 // also choose to inhibit translation of the MemBarAcquire and |
|
919 // inhibit planting of the ldr<x>, instead planting an ldar<x>. |
|
920 // |
|
921 // When we recognise a volatile store signature we can choose to |
|
922 // plant at a dmb ish as a translation for the MemBarRelease, a |
|
923 // normal str<x> and then a dmb ish for the MemBarVolatile. |
|
924 // Alternatively, we can inhibit translation of the MemBarRelease |
|
925 // and MemBarVolatile and instead plant a simple stlr<x> |
|
926 // instruction. |
|
927 // |
|
928 // Of course, the above only applies when we see these signature |
|
929 // configurations. We still want to plant dmb instructions in any |
|
930 // other cases where we may see a MemBarAcquire, MemBarRelease or |
|
931 // MemBarVolatile. For example, at the end of a constructor which |
|
932 // writes final/volatile fields we will see a MemBarRelease |
|
933 // instruction and this needs a 'dmb ish' lest we risk the |
|
934 // constructed object being visible without making the |
|
935 // final/volatile field writes visible. |
|
936 // |
|
937 // n.b. the translation rules below which rely on detection of the |
|
938 // volatile signatures and insert ldar<x> or stlr<x> are failsafe. |
|
939 // If we see anything other than the signature configurations we |
|
940 // always just translate the loads and stors to ldr<x> and str<x> |
|
941 // and translate acquire, release and volatile membars to the |
|
942 // relevant dmb instructions. |
|
943 // |
|
944 // n.b.b as a case in point for the above comment, the current |
|
945 // predicates don't detect the precise signature for certain types |
|
946 // of volatile object stores (where the heap_base input type is not |
|
947 // known at compile-time to be non-NULL). In those cases the |
|
948 // MemBarRelease and MemBarVolatile bracket an if-then-else sequence |
|
949 // with a store in each branch (we need a different store depending |
|
950 // on whether heap_base is actually NULL). In such a case we will |
|
951 // just plant a dmb both before and after the branch/merge. The |
|
952 // predicate could (and probably should) be fixed later to also |
|
953 // detect this case. |
|
954 |
|
955 // graph traversal helpers |
|
956 |
|
957 // if node n is linked to a parent MemBarNode by an intervening |
|
958 // Control or Memory ProjNode return the MemBarNode otherwise return |
|
959 // NULL. |
|
960 // |
|
961 // n may only be a Load or a MemBar. |
|
962 // |
|
963 // The ProjNode* references c and m are used to return the relevant |
|
964 // nodes. |
|
965 |
|
966 MemBarNode *has_parent_membar(const Node *n, ProjNode *&c, ProjNode *&m) |
|
967 { |
|
968 Node *ctl = NULL; |
|
969 Node *mem = NULL; |
|
970 Node *membar = NULL; |
|
971 |
|
972 if (n->is_Load()) { |
|
973 ctl = n->lookup(LoadNode::Control); |
|
974 mem = n->lookup(LoadNode::Memory); |
|
975 } else if (n->is_MemBar()) { |
|
976 ctl = n->lookup(TypeFunc::Control); |
|
977 mem = n->lookup(TypeFunc::Memory); |
|
978 } else { |
|
979 return NULL; |
|
980 } |
|
981 |
|
982 if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) |
|
983 return NULL; |
|
984 |
|
985 c = ctl->as_Proj(); |
|
986 |
|
987 membar = ctl->lookup(0); |
|
988 |
|
989 if (!membar || !membar->is_MemBar()) |
|
990 return NULL; |
|
991 |
|
992 m = mem->as_Proj(); |
|
993 |
|
994 if (mem->lookup(0) != membar) |
|
995 return NULL; |
|
996 |
|
997 return membar->as_MemBar(); |
|
998 } |
|
999 |
|
1000 // if n is linked to a child MemBarNode by intervening Control and |
|
1001 // Memory ProjNodes return the MemBarNode otherwise return NULL. |
|
1002 // |
|
1003 // The ProjNode** arguments c and m are used to return pointers to |
|
1004 // the relevant nodes. A null argument means don't don't return a |
|
1005 // value. |
|
1006 |
|
1007 MemBarNode *has_child_membar(const MemBarNode *n, ProjNode *&c, ProjNode *&m) |
|
1008 { |
|
1009 ProjNode *ctl = n->proj_out(TypeFunc::Control); |
|
1010 ProjNode *mem = n->proj_out(TypeFunc::Memory); |
|
1011 |
|
1012 // MemBar needs to have both a Ctl and Mem projection |
|
1013 if (! ctl || ! mem) |
|
1014 return NULL; |
|
1015 |
|
1016 c = ctl; |
|
1017 m = mem; |
|
1018 |
|
1019 MemBarNode *child = NULL; |
|
1020 Node *x; |
|
1021 |
|
1022 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { |
|
1023 x = ctl->fast_out(i); |
|
1024 // if we see a membar we keep hold of it. we may also see a new |
|
1025 // arena copy of the original but it will appear later |
|
1026 if (x->is_MemBar()) { |
|
1027 child = x->as_MemBar(); |
|
1028 break; |
|
1029 } |
|
1030 } |
|
1031 |
|
1032 if (child == NULL) |
|
1033 return NULL; |
|
1034 |
|
1035 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
|
1036 x = mem->fast_out(i); |
|
1037 // if we see a membar we keep hold of it. we may also see a new |
|
1038 // arena copy of the original but it will appear later |
|
1039 if (x == child) { |
|
1040 return child; |
|
1041 } |
|
1042 } |
|
1043 return NULL; |
|
1044 } |
|
1045 |
|
1046 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb |
|
1047 |
|
1048 bool unnecessary_acquire(const Node *barrier) { |
|
1049 // assert barrier->is_MemBar(); |
|
1050 if (UseBarriersForVolatile) |
|
1051 // we need to plant a dmb |
|
1052 return false; |
|
1053 |
|
1054 // a volatile read derived from bytecode (or also from an inlined |
|
1055 // SHA field read via LibraryCallKit::load_field_from_object) |
|
1056 // manifests as a LoadX[mo_acquire] followed by an acquire membar |
|
1057 // with a bogus read dependency on it's preceding load. so in those |
|
1058 // cases we will find the load node at the PARMS offset of the |
|
1059 // acquire membar. n.b. there may be an intervening DecodeN node. |
|
1060 // |
|
1061 // a volatile load derived from an inlined unsafe field access |
|
1062 // manifests as a cpuorder membar with Ctl and Mem projections |
|
1063 // feeding both an acquire membar and a LoadX[mo_acquire]. The |
|
1064 // acquire then feeds another cpuorder membar via Ctl and Mem |
|
1065 // projections. The load has no output dependency on these trailing |
|
1066 // membars because subsequent nodes inserted into the graph take |
|
1067 // their control feed from the final membar cpuorder meaning they |
|
1068 // are all ordered after the load. |
|
1069 |
814 Node *x = barrier->lookup(TypeFunc::Parms); |
1070 Node *x = barrier->lookup(TypeFunc::Parms); |
815 |
1071 if (x) { |
816 if (! x) |
1072 // we are starting from an acquire and it has a fake dependency |
|
1073 // |
|
1074 // need to check for |
|
1075 // |
|
1076 // LoadX[mo_acquire] |
|
1077 // { |1 } |
|
1078 // {DecodeN} |
|
1079 // |Parms |
|
1080 // MemBarAcquire* |
|
1081 // |
|
1082 // where * tags node we were passed |
|
1083 // and |k means input k |
|
1084 if (x->is_DecodeNarrowPtr()) |
|
1085 x = x->in(1); |
|
1086 |
|
1087 return (x->is_Load() && x->as_Load()->is_acquire()); |
|
1088 } |
|
1089 |
|
1090 // only continue if we want to try to match unsafe volatile gets |
|
1091 if (UseBarriersForUnsafeVolatileGet) |
817 return false; |
1092 return false; |
818 |
1093 |
819 if (x->is_DecodeNarrowPtr()) |
1094 // need to check for |
820 x = x->in(1); |
1095 // |
821 |
1096 // MemBarCPUOrder |
822 if (x->is_Load()) |
1097 // || \\ |
823 return ! x->as_Load()->is_unordered(); |
1098 // MemBarAcquire* LoadX[mo_acquire] |
824 |
1099 // || |
825 return false; |
1100 // MemBarCPUOrder |
|
1101 // |
|
1102 // where * tags node we were passed |
|
1103 // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes |
|
1104 |
|
1105 // check for a parent MemBarCPUOrder |
|
1106 ProjNode *ctl; |
|
1107 ProjNode *mem; |
|
1108 MemBarNode *parent = has_parent_membar(barrier, ctl, mem); |
|
1109 if (!parent || parent->Opcode() != Op_MemBarCPUOrder) |
|
1110 return false; |
|
1111 // ensure the proj nodes both feed a LoadX[mo_acquire] |
|
1112 LoadNode *ld = NULL; |
|
1113 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { |
|
1114 x = ctl->fast_out(i); |
|
1115 // if we see a load we keep hold of it and stop searching |
|
1116 if (x->is_Load()) { |
|
1117 ld = x->as_Load(); |
|
1118 break; |
|
1119 } |
|
1120 } |
|
1121 // it must be an acquiring load |
|
1122 if (! ld || ! ld->is_acquire()) |
|
1123 return false; |
|
1124 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
|
1125 x = mem->fast_out(i); |
|
1126 // if we see the same load we drop it and stop searching |
|
1127 if (x == ld) { |
|
1128 ld = NULL; |
|
1129 break; |
|
1130 } |
|
1131 } |
|
1132 // we must have dropped the load |
|
1133 if (ld) |
|
1134 return false; |
|
1135 // check for a child cpuorder membar |
|
1136 MemBarNode *child = has_child_membar(barrier->as_MemBar(), ctl, mem); |
|
1137 if (!child || child->Opcode() != Op_MemBarCPUOrder) |
|
1138 return false; |
|
1139 |
|
1140 return true; |
826 } |
1141 } |
|
1142 |
|
1143 bool needs_acquiring_load(const Node *n) |
|
1144 { |
|
1145 // assert n->is_Load(); |
|
1146 if (UseBarriersForVolatile) |
|
1147 // we use a normal load and a dmb |
|
1148 return false; |
|
1149 |
|
1150 LoadNode *ld = n->as_Load(); |
|
1151 |
|
1152 if (!ld->is_acquire()) |
|
1153 return false; |
|
1154 |
|
1155 // check if this load is feeding an acquire membar |
|
1156 // |
|
1157 // LoadX[mo_acquire] |
|
1158 // { |1 } |
|
1159 // {DecodeN} |
|
1160 // |Parms |
|
1161 // MemBarAcquire* |
|
1162 // |
|
1163 // where * tags node we were passed |
|
1164 // and |k means input k |
|
1165 |
|
1166 Node *start = ld; |
|
1167 Node *mbacq = NULL; |
|
1168 |
|
1169 // if we hit a DecodeNarrowPtr we reset the start node and restart |
|
1170 // the search through the outputs |
|
1171 restart: |
|
1172 |
|
1173 for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) { |
|
1174 Node *x = start->fast_out(i); |
|
1175 if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) { |
|
1176 mbacq = x; |
|
1177 } else if (!mbacq && |
|
1178 (x->is_DecodeNarrowPtr() || |
|
1179 (x->is_Mach() && x->Opcode() == Op_DecodeN))) { |
|
1180 start = x; |
|
1181 goto restart; |
|
1182 } |
|
1183 } |
|
1184 |
|
1185 if (mbacq) { |
|
1186 return true; |
|
1187 } |
|
1188 |
|
1189 // only continue if we want to try to match unsafe volatile gets |
|
1190 if (UseBarriersForUnsafeVolatileGet) |
|
1191 return false; |
|
1192 |
|
1193 // check if Ctl and Proj feed comes from a MemBarCPUOrder |
|
1194 // |
|
1195 // MemBarCPUOrder |
|
1196 // || \\ |
|
1197 // MemBarAcquire* LoadX[mo_acquire] |
|
1198 // || |
|
1199 // MemBarCPUOrder |
|
1200 |
|
1201 MemBarNode *membar; |
|
1202 ProjNode *ctl; |
|
1203 ProjNode *mem; |
|
1204 |
|
1205 membar = has_parent_membar(ld, ctl, mem); |
|
1206 |
|
1207 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) |
|
1208 return false; |
|
1209 |
|
1210 // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain |
|
1211 |
|
1212 membar = has_child_membar(membar, ctl, mem); |
|
1213 |
|
1214 if (!membar || !membar->Opcode() == Op_MemBarAcquire) |
|
1215 return false; |
|
1216 |
|
1217 membar = has_child_membar(membar, ctl, mem); |
|
1218 |
|
1219 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) |
|
1220 return false; |
|
1221 |
|
1222 return true; |
|
1223 } |
|
1224 |
|
1225 bool unnecessary_release(const Node *n) { |
|
1226 // assert n->is_MemBar(); |
|
1227 if (UseBarriersForVolatile) |
|
1228 // we need to plant a dmb |
|
1229 return false; |
|
1230 |
|
1231 // ok, so we can omit this release barrier if it has been inserted |
|
1232 // as part of a volatile store sequence |
|
1233 // |
|
1234 // MemBarRelease |
|
1235 // { || } |
|
1236 // {MemBarCPUOrder} -- optional |
|
1237 // || \\ |
|
1238 // || StoreX[mo_release] |
|
1239 // | \ / |
|
1240 // | MergeMem |
|
1241 // | / |
|
1242 // MemBarVolatile |
|
1243 // |
|
1244 // where |
|
1245 // || and \\ represent Ctl and Mem feeds via Proj nodes |
|
1246 // | \ and / indicate further routing of the Ctl and Mem feeds |
|
1247 // |
|
1248 // so we need to check that |
|
1249 // |
|
1250 // ia) the release membar (or its dependent cpuorder membar) feeds |
|
1251 // control to a store node (via a Control project node) |
|
1252 // |
|
1253 // ii) the store is ordered release |
|
1254 // |
|
1255 // iii) the release membar (or its dependent cpuorder membar) feeds |
|
1256 // control to a volatile membar (via the same Control project node) |
|
1257 // |
|
1258 // iv) the release membar feeds memory to a merge mem and to the |
|
1259 // same store (both via a single Memory proj node) |
|
1260 // |
|
1261 // v) the store outputs to the merge mem |
|
1262 // |
|
1263 // vi) the merge mem outputs to the same volatile membar |
|
1264 // |
|
1265 // n.b. if this is an inlined unsafe node then the release membar |
|
1266 // may feed its control and memory links via an intervening cpuorder |
|
1267 // membar. this case can be dealt with when we check the release |
|
1268 // membar projections. if they both feed a single cpuorder membar |
|
1269 // node continue to make the same checks as above but with the |
|
1270 // cpuorder membar substituted for the release membar. if they don't |
|
1271 // both feed a cpuorder membar then the check fails. |
|
1272 // |
|
1273 // n.b.b. for an inlined unsafe store of an object in the case where |
|
1274 // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see |
|
1275 // an embedded if then else where we expect the store. this is |
|
1276 // needed to do the right type of store depending on whether |
|
1277 // heap_base is NULL. We could check for that but for now we can |
|
1278 // just take the hit of on inserting a redundant dmb for this |
|
1279 // redundant volatile membar |
|
1280 |
|
1281 MemBarNode *barrier = n->as_MemBar(); |
|
1282 ProjNode *ctl; |
|
1283 ProjNode *mem; |
|
1284 // check for an intervening cpuorder membar |
|
1285 MemBarNode *b = has_child_membar(barrier, ctl, mem); |
|
1286 if (b && b->Opcode() == Op_MemBarCPUOrder) { |
|
1287 // ok, so start form the dependent cpuorder barrier |
|
1288 barrier = b; |
|
1289 } |
|
1290 // check the ctl and mem flow |
|
1291 ctl = barrier->proj_out(TypeFunc::Control); |
|
1292 mem = barrier->proj_out(TypeFunc::Memory); |
|
1293 |
|
1294 // the barrier needs to have both a Ctl and Mem projection |
|
1295 if (! ctl || ! mem) |
|
1296 return false; |
|
1297 |
|
1298 Node *x = NULL; |
|
1299 Node *mbvol = NULL; |
|
1300 StoreNode * st = NULL; |
|
1301 |
|
1302 // For a normal volatile write the Ctl ProjNode should have output |
|
1303 // to a MemBarVolatile and a Store marked as releasing |
|
1304 // |
|
1305 // n.b. for an inlined unsafe store of an object in the case where |
|
1306 // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see |
|
1307 // an embedded if then else where we expect the store. this is |
|
1308 // needed to do the right type of store depending on whether |
|
1309 // heap_base is NULL. We could check for that case too but for now |
|
1310 // we can just take the hit of inserting a dmb and a non-volatile |
|
1311 // store to implement the volatile store |
|
1312 |
|
1313 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { |
|
1314 x = ctl->fast_out(i); |
|
1315 if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { |
|
1316 if (mbvol) { |
|
1317 return false; |
|
1318 } |
|
1319 mbvol = x; |
|
1320 } else if (x->is_Store()) { |
|
1321 st = x->as_Store(); |
|
1322 if (! st->is_release()) { |
|
1323 return false; |
|
1324 } |
|
1325 } else if (!x->is_Mach()) { |
|
1326 // we may see mach nodes added during matching but nothing else |
|
1327 return false; |
|
1328 } |
|
1329 } |
|
1330 |
|
1331 if (!mbvol || !st) |
|
1332 return false; |
|
1333 |
|
1334 // the Mem ProjNode should output to a MergeMem and the same Store |
|
1335 Node *mm = NULL; |
|
1336 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
|
1337 x = mem->fast_out(i); |
|
1338 if (!mm && x->is_MergeMem()) { |
|
1339 mm = x; |
|
1340 } else if (x != st && !x->is_Mach()) { |
|
1341 // we may see mach nodes added during matching but nothing else |
|
1342 return false; |
|
1343 } |
|
1344 } |
|
1345 |
|
1346 if (!mm) |
|
1347 return false; |
|
1348 |
|
1349 // the MergeMem should output to the MemBarVolatile |
|
1350 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { |
|
1351 x = mm->fast_out(i); |
|
1352 if (x != mbvol && !x->is_Mach()) { |
|
1353 // we may see mach nodes added during matching but nothing else |
|
1354 return false; |
|
1355 } |
|
1356 } |
|
1357 |
|
1358 return true; |
|
1359 } |
|
1360 |
|
1361 bool unnecessary_volatile(const Node *n) { |
|
1362 // assert n->is_MemBar(); |
|
1363 if (UseBarriersForVolatile) |
|
1364 // we need to plant a dmb |
|
1365 return false; |
|
1366 |
|
1367 // ok, so we can omit this volatile barrier if it has been inserted |
|
1368 // as part of a volatile store sequence |
|
1369 // |
|
1370 // MemBarRelease |
|
1371 // { || } |
|
1372 // {MemBarCPUOrder} -- optional |
|
1373 // || \\ |
|
1374 // || StoreX[mo_release] |
|
1375 // | \ / |
|
1376 // | MergeMem |
|
1377 // | / |
|
1378 // MemBarVolatile |
|
1379 // |
|
1380 // where |
|
1381 // || and \\ represent Ctl and Mem feeds via Proj nodes |
|
1382 // | \ and / indicate further routing of the Ctl and Mem feeds |
|
1383 // |
|
1384 // we need to check that |
|
1385 // |
|
1386 // i) the volatile membar gets its control feed from a release |
|
1387 // membar (or its dependent cpuorder membar) via a Control project |
|
1388 // node |
|
1389 // |
|
1390 // ii) the release membar (or its dependent cpuorder membar) also |
|
1391 // feeds control to a store node via the same proj node |
|
1392 // |
|
1393 // iii) the store is ordered release |
|
1394 // |
|
1395 // iv) the release membar (or its dependent cpuorder membar) feeds |
|
1396 // memory to a merge mem and to the same store (both via a single |
|
1397 // Memory proj node) |
|
1398 // |
|
1399 // v) the store outputs to the merge mem |
|
1400 // |
|
1401 // vi) the merge mem outputs to the volatile membar |
|
1402 // |
|
1403 // n.b. for an inlined unsafe store of an object in the case where |
|
1404 // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see |
|
1405 // an embedded if then else where we expect the store. this is |
|
1406 // needed to do the right type of store depending on whether |
|
1407 // heap_base is NULL. We could check for that but for now we can |
|
1408 // just take the hit of on inserting a redundant dmb for this |
|
1409 // redundant volatile membar |
|
1410 |
|
1411 MemBarNode *mbvol = n->as_MemBar(); |
|
1412 Node *x = n->lookup(TypeFunc::Control); |
|
1413 |
|
1414 if (! x || !x->is_Proj()) |
|
1415 return false; |
|
1416 |
|
1417 ProjNode *proj = x->as_Proj(); |
|
1418 |
|
1419 x = proj->lookup(0); |
|
1420 |
|
1421 if (!x || !x->is_MemBar()) |
|
1422 return false; |
|
1423 |
|
1424 MemBarNode *barrier = x->as_MemBar(); |
|
1425 |
|
1426 // if the barrier is a release membar we have what we want. if it is |
|
1427 // a cpuorder membar then we need to ensure that it is fed by a |
|
1428 // release membar in which case we proceed to check the graph below |
|
1429 // this cpuorder membar as the feed |
|
1430 |
|
1431 if (x->Opcode() != Op_MemBarRelease) { |
|
1432 if (x->Opcode() != Op_MemBarCPUOrder) |
|
1433 return false; |
|
1434 ProjNode *ctl; |
|
1435 ProjNode *mem; |
|
1436 MemBarNode *b = has_parent_membar(x, ctl, mem); |
|
1437 if (!b || !b->Opcode() == Op_MemBarRelease) |
|
1438 return false; |
|
1439 } |
|
1440 |
|
1441 ProjNode *ctl = barrier->proj_out(TypeFunc::Control); |
|
1442 ProjNode *mem = barrier->proj_out(TypeFunc::Memory); |
|
1443 |
|
1444 // barrier needs to have both a Ctl and Mem projection |
|
1445 // and we need to have reached it via the Ctl projection |
|
1446 if (! ctl || ! mem || ctl != proj) |
|
1447 return false; |
|
1448 |
|
1449 StoreNode * st = NULL; |
|
1450 |
|
1451 // The Ctl ProjNode should have output to a MemBarVolatile and |
|
1452 // a Store marked as releasing |
|
1453 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { |
|
1454 x = ctl->fast_out(i); |
|
1455 if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { |
|
1456 if (x != mbvol) { |
|
1457 return false; |
|
1458 } |
|
1459 } else if (x->is_Store()) { |
|
1460 st = x->as_Store(); |
|
1461 if (! st->is_release()) { |
|
1462 return false; |
|
1463 } |
|
1464 } else if (!x->is_Mach()){ |
|
1465 // we may see mach nodes added during matching but nothing else |
|
1466 return false; |
|
1467 } |
|
1468 } |
|
1469 |
|
1470 if (!st) |
|
1471 return false; |
|
1472 |
|
1473 // the Mem ProjNode should output to a MergeMem and the same Store |
|
1474 Node *mm = NULL; |
|
1475 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
|
1476 x = mem->fast_out(i); |
|
1477 if (!mm && x->is_MergeMem()) { |
|
1478 mm = x; |
|
1479 } else if (x != st && !x->is_Mach()) { |
|
1480 // we may see mach nodes added during matching but nothing else |
|
1481 return false; |
|
1482 } |
|
1483 } |
|
1484 |
|
1485 if (!mm) |
|
1486 return false; |
|
1487 |
|
1488 // the MergeMem should output to the MemBarVolatile |
|
1489 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { |
|
1490 x = mm->fast_out(i); |
|
1491 if (x != mbvol && !x->is_Mach()) { |
|
1492 // we may see mach nodes added during matching but nothing else |
|
1493 return false; |
|
1494 } |
|
1495 } |
|
1496 |
|
1497 return true; |
|
1498 } |
|
1499 |
|
1500 |
|
1501 |
|
1502 bool needs_releasing_store(const Node *n) |
|
1503 { |
|
1504 // assert n->is_Store(); |
|
1505 if (UseBarriersForVolatile) |
|
1506 // we use a normal store and dmb combination |
|
1507 return false; |
|
1508 |
|
1509 StoreNode *st = n->as_Store(); |
|
1510 |
|
1511 if (!st->is_release()) |
|
1512 return false; |
|
1513 |
|
1514 // check if this store is bracketed by a release (or its dependent |
|
1515 // cpuorder membar) and a volatile membar |
|
1516 // |
|
1517 // MemBarRelease |
|
1518 // { || } |
|
1519 // {MemBarCPUOrder} -- optional |
|
1520 // || \\ |
|
1521 // || StoreX[mo_release] |
|
1522 // | \ / |
|
1523 // | MergeMem |
|
1524 // | / |
|
1525 // MemBarVolatile |
|
1526 // |
|
1527 // where |
|
1528 // || and \\ represent Ctl and Mem feeds via Proj nodes |
|
1529 // | \ and / indicate further routing of the Ctl and Mem feeds |
|
1530 // |
|
1531 |
|
1532 |
|
1533 Node *x = st->lookup(TypeFunc::Control); |
|
1534 |
|
1535 if (! x || !x->is_Proj()) |
|
1536 return false; |
|
1537 |
|
1538 ProjNode *proj = x->as_Proj(); |
|
1539 |
|
1540 x = proj->lookup(0); |
|
1541 |
|
1542 if (!x || !x->is_MemBar()) |
|
1543 return false; |
|
1544 |
|
1545 MemBarNode *barrier = x->as_MemBar(); |
|
1546 |
|
1547 // if the barrier is a release membar we have what we want. if it is |
|
1548 // a cpuorder membar then we need to ensure that it is fed by a |
|
1549 // release membar in which case we proceed to check the graph below |
|
1550 // this cpuorder membar as the feed |
|
1551 |
|
1552 if (x->Opcode() != Op_MemBarRelease) { |
|
1553 if (x->Opcode() != Op_MemBarCPUOrder) |
|
1554 return false; |
|
1555 Node *ctl = x->lookup(TypeFunc::Control); |
|
1556 Node *mem = x->lookup(TypeFunc::Memory); |
|
1557 if (!ctl || !ctl->is_Proj() || !mem || !mem->is_Proj()) |
|
1558 return false; |
|
1559 x = ctl->lookup(0); |
|
1560 if (!x || !x->is_MemBar() || !x->Opcode() == Op_MemBarRelease) |
|
1561 return false; |
|
1562 Node *y = mem->lookup(0); |
|
1563 if (!y || y != x) |
|
1564 return false; |
|
1565 } |
|
1566 |
|
1567 ProjNode *ctl = barrier->proj_out(TypeFunc::Control); |
|
1568 ProjNode *mem = barrier->proj_out(TypeFunc::Memory); |
|
1569 |
|
1570 // MemBarRelease needs to have both a Ctl and Mem projection |
|
1571 // and we need to have reached it via the Ctl projection |
|
1572 if (! ctl || ! mem || ctl != proj) |
|
1573 return false; |
|
1574 |
|
1575 MemBarNode *mbvol = NULL; |
|
1576 |
|
1577 // The Ctl ProjNode should have output to a MemBarVolatile and |
|
1578 // a Store marked as releasing |
|
1579 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { |
|
1580 x = ctl->fast_out(i); |
|
1581 if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { |
|
1582 mbvol = x->as_MemBar(); |
|
1583 } else if (x->is_Store()) { |
|
1584 if (x != st) { |
|
1585 return false; |
|
1586 } |
|
1587 } else if (!x->is_Mach()){ |
|
1588 return false; |
|
1589 } |
|
1590 } |
|
1591 |
|
1592 if (!mbvol) |
|
1593 return false; |
|
1594 |
|
1595 // the Mem ProjNode should output to a MergeMem and the same Store |
|
1596 Node *mm = NULL; |
|
1597 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
|
1598 x = mem->fast_out(i); |
|
1599 if (!mm && x->is_MergeMem()) { |
|
1600 mm = x; |
|
1601 } else if (x != st && !x->is_Mach()) { |
|
1602 return false; |
|
1603 } |
|
1604 } |
|
1605 |
|
1606 if (!mm) |
|
1607 return false; |
|
1608 |
|
1609 // the MergeMem should output to the MemBarVolatile |
|
1610 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { |
|
1611 x = mm->fast_out(i); |
|
1612 if (x != mbvol && !x->is_Mach()) { |
|
1613 return false; |
|
1614 } |
|
1615 } |
|
1616 |
|
1617 return true; |
|
1618 } |
|
1619 |
|
1620 |
827 |
1621 |
828 #define __ _masm. |
1622 #define __ _masm. |
829 |
1623 |
830 // advance declarations for helper functions to convert register |
1624 // advance declarations for helper functions to convert register |
831 // indices to register objects |
1625 // indices to register objects |