34 Generation* old_gen_, |
34 Generation* old_gen_, |
35 int thread_num_, |
35 int thread_num_, |
36 ObjToScanQueueSet* work_queue_set_, |
36 ObjToScanQueueSet* work_queue_set_, |
37 size_t desired_plab_sz_, |
37 size_t desired_plab_sz_, |
38 ParallelTaskTerminator& term_) : |
38 ParallelTaskTerminator& term_) : |
39 _to_space(to_space_), _old_gen(old_gen_), _thread_num(thread_num_), |
39 _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_), |
40 _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), |
40 _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), |
41 _ageTable(false), // false ==> not the global age table, no perf data. |
41 _ageTable(false), // false ==> not the global age table, no perf data. |
42 _to_space_alloc_buffer(desired_plab_sz_), |
42 _to_space_alloc_buffer(desired_plab_sz_), |
43 _to_space_closure(gen_, this), _old_gen_closure(gen_, this), |
43 _to_space_closure(gen_, this), _old_gen_closure(gen_, this), |
44 _to_space_root_closure(gen_, this), _old_gen_root_closure(gen_, this), |
44 _to_space_root_closure(gen_, this), _old_gen_root_closure(gen_, this), |
79 |
84 |
80 void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) { |
85 void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) { |
81 assert(old->is_objArray(), "must be obj array"); |
86 assert(old->is_objArray(), "must be obj array"); |
82 assert(old->is_forwarded(), "must be forwarded"); |
87 assert(old->is_forwarded(), "must be forwarded"); |
83 assert(Universe::heap()->is_in_reserved(old), "must be in heap."); |
88 assert(Universe::heap()->is_in_reserved(old), "must be in heap."); |
84 assert(!_old_gen->is_in(old), "must be in young generation."); |
89 assert(!old_gen()->is_in(old), "must be in young generation."); |
85 |
90 |
86 objArrayOop obj = objArrayOop(old->forwardee()); |
91 objArrayOop obj = objArrayOop(old->forwardee()); |
87 // Process ParGCArrayScanChunk elements now |
92 // Process ParGCArrayScanChunk elements now |
88 // and push the remainder back onto queue |
93 // and push the remainder back onto queue |
89 int start = arrayOop(old)->length(); |
94 int start = arrayOop(old)->length(); |
117 } |
122 } |
118 |
123 |
119 |
124 |
120 void ParScanThreadState::trim_queues(int max_size) { |
125 void ParScanThreadState::trim_queues(int max_size) { |
121 ObjToScanQueue* queue = work_queue(); |
126 ObjToScanQueue* queue = work_queue(); |
122 while (queue->size() > (juint)max_size) { |
127 do { |
123 oop obj_to_scan; |
128 while (queue->size() > (juint)max_size) { |
124 if (queue->pop_local(obj_to_scan)) { |
129 oop obj_to_scan; |
125 note_pop(); |
130 if (queue->pop_local(obj_to_scan)) { |
126 |
131 note_pop(); |
127 if ((HeapWord *)obj_to_scan < young_old_boundary()) { |
132 if ((HeapWord *)obj_to_scan < young_old_boundary()) { |
128 if (obj_to_scan->is_objArray() && |
133 if (obj_to_scan->is_objArray() && |
129 obj_to_scan->is_forwarded() && |
134 obj_to_scan->is_forwarded() && |
130 obj_to_scan->forwardee() != obj_to_scan) { |
135 obj_to_scan->forwardee() != obj_to_scan) { |
131 scan_partial_array_and_push_remainder(obj_to_scan); |
136 scan_partial_array_and_push_remainder(obj_to_scan); |
|
137 } else { |
|
138 // object is in to_space |
|
139 obj_to_scan->oop_iterate(&_to_space_closure); |
|
140 } |
132 } else { |
141 } else { |
133 // object is in to_space |
142 // object is in old generation |
134 obj_to_scan->oop_iterate(&_to_space_closure); |
143 obj_to_scan->oop_iterate(&_old_gen_closure); |
135 } |
144 } |
136 } else { |
|
137 // object is in old generation |
|
138 obj_to_scan->oop_iterate(&_old_gen_closure); |
|
139 } |
145 } |
140 } |
146 } |
141 } |
147 // For the case of compressed oops, we have a private, non-shared |
|
148 // overflow stack, so we eagerly drain it so as to more evenly |
|
149 // distribute load early. Note: this may be good to do in |
|
150 // general rather than delay for the final stealing phase. |
|
151 // If applicable, we'll transfer a set of objects over to our |
|
152 // work queue, allowing them to be stolen and draining our |
|
153 // private overflow stack. |
|
154 } while (ParGCTrimOverflow && young_gen()->take_from_overflow_list(this)); |
|
155 } |
|
156 |
|
157 bool ParScanThreadState::take_from_overflow_stack() { |
|
158 assert(UseCompressedOops, "Else should not call"); |
|
159 assert(young_gen()->overflow_list() == NULL, "Error"); |
|
160 ObjToScanQueue* queue = work_queue(); |
|
161 GrowableArray<oop>* of_stack = overflow_stack(); |
|
162 uint num_overflow_elems = of_stack->length(); |
|
163 uint num_take_elems = MIN2(MIN2((queue->max_elems() - queue->size())/4, |
|
164 (juint)ParGCDesiredObjsFromOverflowList), |
|
165 num_overflow_elems); |
|
166 // Transfer the most recent num_take_elems from the overflow |
|
167 // stack to our work queue. |
|
168 for (size_t i = 0; i != num_take_elems; i++) { |
|
169 oop cur = of_stack->pop(); |
|
170 oop obj_to_push = cur->forwardee(); |
|
171 assert(Universe::heap()->is_in_reserved(cur), "Should be in heap"); |
|
172 assert(!old_gen()->is_in_reserved(cur), "Should be in young gen"); |
|
173 assert(Universe::heap()->is_in_reserved(obj_to_push), "Should be in heap"); |
|
174 if (should_be_partially_scanned(obj_to_push, cur)) { |
|
175 assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned"); |
|
176 obj_to_push = cur; |
|
177 } |
|
178 bool ok = queue->push(obj_to_push); |
|
179 assert(ok, "Should have succeeded"); |
|
180 } |
|
181 assert(young_gen()->overflow_list() == NULL, "Error"); |
|
182 return num_take_elems > 0; // was something transferred? |
|
183 } |
|
184 |
|
185 void ParScanThreadState::push_on_overflow_stack(oop p) { |
|
186 assert(UseCompressedOops, "Else should not call"); |
|
187 overflow_stack()->push(p); |
|
188 assert(young_gen()->overflow_list() == NULL, "Error"); |
142 } |
189 } |
143 |
190 |
144 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) { |
191 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) { |
145 |
192 |
146 // Otherwise, if the object is small enough, try to reallocate the |
193 // Otherwise, if the object is small enough, try to reallocate the |
1167 return false; |
1213 return false; |
1168 } |
1214 } |
1169 } |
1215 } |
1170 #endif |
1216 #endif |
1171 |
1217 |
|
1218 // In case we are using compressed oops, we need to be careful. |
|
1219 // If the object being pushed is an object array, then its length |
|
1220 // field keeps track of the "grey boundary" at which the next |
|
1221 // incremental scan will be done (see ParGCArrayScanChunk). |
|
1222 // When using compressed oops, this length field is kept in the |
|
1223 // lower 32 bits of the erstwhile klass word and cannot be used |
|
1224 // for the overflow chaining pointer (OCP below). As such the OCP |
|
1225 // would itself need to be compressed into the top 32-bits in this |
|
1226 // case. Unfortunately, see below, in the event that we have a |
|
1227 // promotion failure, the node to be pushed on the list can be |
|
1228 // outside of the Java heap, so the heap-based pointer compression |
|
1229 // would not work (we would have potential aliasing between C-heap |
|
1230 // and Java-heap pointers). For this reason, when using compressed |
|
1231 // oops, we simply use a worker-thread-local, non-shared overflow |
|
1232 // list in the form of a growable array, with a slightly different |
|
1233 // overflow stack draining strategy. If/when we start using fat |
|
1234 // stacks here, we can go back to using (fat) pointer chains |
|
1235 // (although some performance comparisons would be useful since |
|
1236 // single global lists have their own performance disadvantages |
|
1237 // as we were made painfully aware not long ago, see 6786503). |
1172 #define BUSY (oop(0x1aff1aff)) |
1238 #define BUSY (oop(0x1aff1aff)) |
1173 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { |
1239 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { |
1174 // if the object has been forwarded to itself, then we cannot |
1240 assert(is_in_reserved(from_space_obj), "Should be from this generation"); |
1175 // use the klass pointer for the linked list. Instead we have |
1241 if (UseCompressedOops) { |
1176 // to allocate an oopDesc in the C-Heap and use that for the linked list. |
1242 // In the case of compressed oops, we use a private, not-shared |
1177 // XXX This is horribly inefficient when a promotion failure occurs |
1243 // overflow stack. |
1178 // and should be fixed. XXX FIX ME !!! |
1244 par_scan_state->push_on_overflow_stack(from_space_obj); |
|
1245 } else { |
|
1246 // if the object has been forwarded to itself, then we cannot |
|
1247 // use the klass pointer for the linked list. Instead we have |
|
1248 // to allocate an oopDesc in the C-Heap and use that for the linked list. |
|
1249 // XXX This is horribly inefficient when a promotion failure occurs |
|
1250 // and should be fixed. XXX FIX ME !!! |
1179 #ifndef PRODUCT |
1251 #ifndef PRODUCT |
1180 Atomic::inc_ptr(&_num_par_pushes); |
1252 Atomic::inc_ptr(&_num_par_pushes); |
1181 assert(_num_par_pushes > 0, "Tautology"); |
1253 assert(_num_par_pushes > 0, "Tautology"); |
1182 #endif |
1254 #endif |
1183 if (from_space_obj->forwardee() == from_space_obj) { |
1255 if (from_space_obj->forwardee() == from_space_obj) { |
1184 oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); |
1256 oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); |
1185 listhead->forward_to(from_space_obj); |
1257 listhead->forward_to(from_space_obj); |
1186 from_space_obj = listhead; |
1258 from_space_obj = listhead; |
1187 } |
1259 } |
1188 oop observed_overflow_list = _overflow_list; |
1260 oop observed_overflow_list = _overflow_list; |
1189 oop cur_overflow_list; |
1261 oop cur_overflow_list; |
1190 do { |
1262 do { |
1191 cur_overflow_list = observed_overflow_list; |
1263 cur_overflow_list = observed_overflow_list; |
1192 if (cur_overflow_list != BUSY) { |
1264 if (cur_overflow_list != BUSY) { |
1193 from_space_obj->set_klass_to_list_ptr(cur_overflow_list); |
1265 from_space_obj->set_klass_to_list_ptr(cur_overflow_list); |
1194 } else { |
1266 } else { |
1195 from_space_obj->set_klass_to_list_ptr(NULL); |
1267 from_space_obj->set_klass_to_list_ptr(NULL); |
1196 } |
1268 } |
1197 observed_overflow_list = |
1269 observed_overflow_list = |
1198 (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); |
1270 (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); |
1199 } while (cur_overflow_list != observed_overflow_list); |
1271 } while (cur_overflow_list != observed_overflow_list); |
1200 } |
1272 } |
|
1273 } |
|
1274 |
|
1275 bool ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { |
|
1276 bool res; |
|
1277 |
|
1278 if (UseCompressedOops) { |
|
1279 res = par_scan_state->take_from_overflow_stack(); |
|
1280 } else { |
|
1281 res = take_from_overflow_list_work(par_scan_state); |
|
1282 } |
|
1283 return res; |
|
1284 } |
|
1285 |
1201 |
1286 |
1202 // *NOTE*: The overflow list manipulation code here and |
1287 // *NOTE*: The overflow list manipulation code here and |
1203 // in CMSCollector:: are very similar in shape, |
1288 // in CMSCollector:: are very similar in shape, |
1204 // except that in the CMS case we thread the objects |
1289 // except that in the CMS case we thread the objects |
1205 // directly into the list via their mark word, and do |
1290 // directly into the list via their mark word, and do |
1211 // Because of the common code, if you make any changes in |
1296 // Because of the common code, if you make any changes in |
1212 // the code below, please check the CMS version to see if |
1297 // the code below, please check the CMS version to see if |
1213 // similar changes might be needed. |
1298 // similar changes might be needed. |
1214 // See CMSCollector::par_take_from_overflow_list() for |
1299 // See CMSCollector::par_take_from_overflow_list() for |
1215 // more extensive documentation comments. |
1300 // more extensive documentation comments. |
1216 bool |
1301 bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan_state) { |
1217 ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { |
|
1218 ObjToScanQueue* work_q = par_scan_state->work_queue(); |
1302 ObjToScanQueue* work_q = par_scan_state->work_queue(); |
1219 assert(work_q->size() == 0, "Should first empty local work queue"); |
|
1220 // How many to take? |
1303 // How many to take? |
1221 size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4, |
1304 size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, |
1222 (size_t)ParGCDesiredObjsFromOverflowList); |
1305 (size_t)ParGCDesiredObjsFromOverflowList); |
1223 |
1306 |
|
1307 assert(par_scan_state->overflow_stack() == NULL, "Error"); |
1224 if (_overflow_list == NULL) return false; |
1308 if (_overflow_list == NULL) return false; |
1225 |
1309 |
1226 // Otherwise, there was something there; try claiming the list. |
1310 // Otherwise, there was something there; try claiming the list. |
1227 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); |
1311 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); |
1228 // Trim off a prefix of at most objsFromOverflow items |
1312 // Trim off a prefix of at most objsFromOverflow items |