author | coleenp |
Sun, 13 Apr 2008 17:43:42 -0400 | |
changeset 360 | 21d113ecbf6a |
parent 236 | 9a04268c8eea |
child 589 | a44a1e70a3e4 |
permissions | -rw-r--r-- |
1 | 1 |
/* |
2 |
* Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. |
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. |
|
8 |
* |
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
13 |
* accompanied this code). |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License version |
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
* |
|
19 |
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
|
20 |
* CA 95054 USA or visit www.sun.com if you need additional information or |
|
21 |
* have any questions. |
|
22 |
* |
|
23 |
*/ |
|
24 |
||
25 |
#include "incls/_precompiled.incl" |
|
26 |
#include "incls/_matcher.cpp.incl" |
|
27 |
||
28 |
OptoReg::Name OptoReg::c_frame_pointer; |
|
29 |
||
30 |
||
31 |
||
32 |
const int Matcher::base2reg[Type::lastype] = { |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
33 |
Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, |
1 | 34 |
Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ |
35 |
Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ |
|
36 |
0, 0/*abio*/, |
|
37 |
Op_RegP /* Return address */, 0, /* the memories */ |
|
38 |
Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, |
|
39 |
0 /*bottom*/ |
|
40 |
}; |
|
41 |
||
42 |
const RegMask *Matcher::idealreg2regmask[_last_machine_leaf]; |
|
43 |
RegMask Matcher::mreg2regmask[_last_Mach_Reg]; |
|
44 |
RegMask Matcher::STACK_ONLY_mask; |
|
45 |
RegMask Matcher::c_frame_ptr_mask; |
|
46 |
const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE; |
|
47 |
const uint Matcher::_end_rematerialize = _END_REMATERIALIZE; |
|
48 |
||
49 |
//---------------------------Matcher------------------------------------------- |
|
50 |
Matcher::Matcher( Node_List &proj_list ) : |
|
51 |
PhaseTransform( Phase::Ins_Select ), |
|
52 |
#ifdef ASSERT |
|
53 |
_old2new_map(C->comp_arena()), |
|
54 |
#endif |
|
55 |
_shared_constants(C->comp_arena()), |
|
56 |
_reduceOp(reduceOp), _leftOp(leftOp), _rightOp(rightOp), |
|
57 |
_swallowed(swallowed), |
|
58 |
_begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE), |
|
59 |
_end_inst_chain_rule(_END_INST_CHAIN_RULE), |
|
60 |
_must_clone(must_clone), _proj_list(proj_list), |
|
61 |
_register_save_policy(register_save_policy), |
|
62 |
_c_reg_save_policy(c_reg_save_policy), |
|
63 |
_register_save_type(register_save_type), |
|
64 |
_ruleName(ruleName), |
|
65 |
_allocation_started(false), |
|
66 |
_states_arena(Chunk::medium_size), |
|
67 |
_visited(&_states_arena), |
|
68 |
_shared(&_states_arena), |
|
69 |
_dontcare(&_states_arena) { |
|
70 |
C->set_matcher(this); |
|
71 |
||
72 |
idealreg2spillmask[Op_RegI] = NULL; |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
73 |
idealreg2spillmask[Op_RegN] = NULL; |
1 | 74 |
idealreg2spillmask[Op_RegL] = NULL; |
75 |
idealreg2spillmask[Op_RegF] = NULL; |
|
76 |
idealreg2spillmask[Op_RegD] = NULL; |
|
77 |
idealreg2spillmask[Op_RegP] = NULL; |
|
78 |
||
79 |
idealreg2debugmask[Op_RegI] = NULL; |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
80 |
idealreg2debugmask[Op_RegN] = NULL; |
1 | 81 |
idealreg2debugmask[Op_RegL] = NULL; |
82 |
idealreg2debugmask[Op_RegF] = NULL; |
|
83 |
idealreg2debugmask[Op_RegD] = NULL; |
|
84 |
idealreg2debugmask[Op_RegP] = NULL; |
|
85 |
} |
|
86 |
||
87 |
//------------------------------warp_incoming_stk_arg------------------------ |
|
88 |
// This warps a VMReg into an OptoReg::Name |
|
89 |
OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) { |
|
90 |
OptoReg::Name warped; |
|
91 |
if( reg->is_stack() ) { // Stack slot argument? |
|
92 |
warped = OptoReg::add(_old_SP, reg->reg2stack() ); |
|
93 |
warped = OptoReg::add(warped, C->out_preserve_stack_slots()); |
|
94 |
if( warped >= _in_arg_limit ) |
|
95 |
_in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen |
|
96 |
if (!RegMask::can_represent(warped)) { |
|
97 |
// the compiler cannot represent this method's calling sequence |
|
98 |
C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence"); |
|
99 |
return OptoReg::Bad; |
|
100 |
} |
|
101 |
return warped; |
|
102 |
} |
|
103 |
return OptoReg::as_OptoReg(reg); |
|
104 |
} |
|
105 |
||
106 |
//---------------------------compute_old_SP------------------------------------ |
|
107 |
OptoReg::Name Compile::compute_old_SP() { |
|
108 |
int fixed = fixed_slots(); |
|
109 |
int preserve = in_preserve_stack_slots(); |
|
110 |
return OptoReg::stack2reg(round_to(fixed + preserve, Matcher::stack_alignment_in_slots())); |
|
111 |
} |
|
112 |
||
113 |
||
114 |
||
115 |
#ifdef ASSERT |
|
116 |
void Matcher::verify_new_nodes_only(Node* xroot) { |
|
117 |
// Make sure that the new graph only references new nodes |
|
118 |
ResourceMark rm; |
|
119 |
Unique_Node_List worklist; |
|
120 |
VectorSet visited(Thread::current()->resource_area()); |
|
121 |
worklist.push(xroot); |
|
122 |
while (worklist.size() > 0) { |
|
123 |
Node* n = worklist.pop(); |
|
124 |
visited <<= n->_idx; |
|
125 |
assert(C->node_arena()->contains(n), "dead node"); |
|
126 |
for (uint j = 0; j < n->req(); j++) { |
|
127 |
Node* in = n->in(j); |
|
128 |
if (in != NULL) { |
|
129 |
assert(C->node_arena()->contains(in), "dead node"); |
|
130 |
if (!visited.test(in->_idx)) { |
|
131 |
worklist.push(in); |
|
132 |
} |
|
133 |
} |
|
134 |
} |
|
135 |
} |
|
136 |
} |
|
137 |
#endif |
|
138 |
||
139 |
||
140 |
//---------------------------match--------------------------------------------- |
|
141 |
void Matcher::match( ) { |
|
142 |
// One-time initialization of some register masks. |
|
143 |
init_spill_mask( C->root()->in(1) ); |
|
144 |
_return_addr_mask = return_addr(); |
|
145 |
#ifdef _LP64 |
|
146 |
// Pointers take 2 slots in 64-bit land |
|
147 |
_return_addr_mask.Insert(OptoReg::add(return_addr(),1)); |
|
148 |
#endif |
|
149 |
||
150 |
// Map a Java-signature return type into return register-value |
|
151 |
// machine registers for 0, 1 and 2 returned values. |
|
152 |
const TypeTuple *range = C->tf()->range(); |
|
153 |
if( range->cnt() > TypeFunc::Parms ) { // If not a void function |
|
154 |
// Get ideal-register return type |
|
155 |
int ireg = base2reg[range->field_at(TypeFunc::Parms)->base()]; |
|
156 |
// Get machine return register |
|
157 |
uint sop = C->start()->Opcode(); |
|
158 |
OptoRegPair regs = return_value(ireg, false); |
|
159 |
||
160 |
// And mask for same |
|
161 |
_return_value_mask = RegMask(regs.first()); |
|
162 |
if( OptoReg::is_valid(regs.second()) ) |
|
163 |
_return_value_mask.Insert(regs.second()); |
|
164 |
} |
|
165 |
||
166 |
// --------------- |
|
167 |
// Frame Layout |
|
168 |
||
169 |
// Need the method signature to determine the incoming argument types, |
|
170 |
// because the types determine which registers the incoming arguments are |
|
171 |
// in, and this affects the matched code. |
|
172 |
const TypeTuple *domain = C->tf()->domain(); |
|
173 |
uint argcnt = domain->cnt() - TypeFunc::Parms; |
|
174 |
BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt ); |
|
175 |
VMRegPair *vm_parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt ); |
|
176 |
_parm_regs = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt ); |
|
177 |
_calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt ); |
|
178 |
uint i; |
|
179 |
for( i = 0; i<argcnt; i++ ) { |
|
180 |
sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type(); |
|
181 |
} |
|
182 |
||
183 |
// Pass array of ideal registers and length to USER code (from the AD file) |
|
184 |
// that will convert this to an array of register numbers. |
|
185 |
const StartNode *start = C->start(); |
|
186 |
start->calling_convention( sig_bt, vm_parm_regs, argcnt ); |
|
187 |
#ifdef ASSERT |
|
188 |
// Sanity check users' calling convention. Real handy while trying to |
|
189 |
// get the initial port correct. |
|
190 |
{ for (uint i = 0; i<argcnt; i++) { |
|
191 |
if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) { |
|
192 |
assert(domain->field_at(i+TypeFunc::Parms)==Type::HALF, "only allowed on halve" ); |
|
193 |
_parm_regs[i].set_bad(); |
|
194 |
continue; |
|
195 |
} |
|
196 |
VMReg parm_reg = vm_parm_regs[i].first(); |
|
197 |
assert(parm_reg->is_valid(), "invalid arg?"); |
|
198 |
if (parm_reg->is_reg()) { |
|
199 |
OptoReg::Name opto_parm_reg = OptoReg::as_OptoReg(parm_reg); |
|
200 |
assert(can_be_java_arg(opto_parm_reg) || |
|
201 |
C->stub_function() == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C) || |
|
202 |
opto_parm_reg == inline_cache_reg(), |
|
203 |
"parameters in register must be preserved by runtime stubs"); |
|
204 |
} |
|
205 |
for (uint j = 0; j < i; j++) { |
|
206 |
assert(parm_reg != vm_parm_regs[j].first(), |
|
207 |
"calling conv. must produce distinct regs"); |
|
208 |
} |
|
209 |
} |
|
210 |
} |
|
211 |
#endif |
|
212 |
||
213 |
// Do some initial frame layout. |
|
214 |
||
215 |
// Compute the old incoming SP (may be called FP) as |
|
216 |
// OptoReg::stack0() + locks + in_preserve_stack_slots + pad2. |
|
217 |
_old_SP = C->compute_old_SP(); |
|
218 |
assert( is_even(_old_SP), "must be even" ); |
|
219 |
||
220 |
// Compute highest incoming stack argument as |
|
221 |
// _old_SP + out_preserve_stack_slots + incoming argument size. |
|
222 |
_in_arg_limit = OptoReg::add(_old_SP, C->out_preserve_stack_slots()); |
|
223 |
assert( is_even(_in_arg_limit), "out_preserve must be even" ); |
|
224 |
for( i = 0; i < argcnt; i++ ) { |
|
225 |
// Permit args to have no register |
|
226 |
_calling_convention_mask[i].Clear(); |
|
227 |
if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) { |
|
228 |
continue; |
|
229 |
} |
|
230 |
// calling_convention returns stack arguments as a count of |
|
231 |
// slots beyond OptoReg::stack0()/VMRegImpl::stack0. We need to convert this to |
|
232 |
// the allocators point of view, taking into account all the |
|
233 |
// preserve area, locks & pad2. |
|
234 |
||
235 |
OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first()); |
|
236 |
if( OptoReg::is_valid(reg1)) |
|
237 |
_calling_convention_mask[i].Insert(reg1); |
|
238 |
||
239 |
OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second()); |
|
240 |
if( OptoReg::is_valid(reg2)) |
|
241 |
_calling_convention_mask[i].Insert(reg2); |
|
242 |
||
243 |
// Saved biased stack-slot register number |
|
244 |
_parm_regs[i].set_pair(reg2, reg1); |
|
245 |
} |
|
246 |
||
247 |
// Finally, make sure the incoming arguments take up an even number of |
|
248 |
// words, in case the arguments or locals need to contain doubleword stack |
|
249 |
// slots. The rest of the system assumes that stack slot pairs (in |
|
250 |
// particular, in the spill area) which look aligned will in fact be |
|
251 |
// aligned relative to the stack pointer in the target machine. Double |
|
252 |
// stack slots will always be allocated aligned. |
|
253 |
_new_SP = OptoReg::Name(round_to(_in_arg_limit, RegMask::SlotsPerLong)); |
|
254 |
||
255 |
// Compute highest outgoing stack argument as |
|
256 |
// _new_SP + out_preserve_stack_slots + max(outgoing argument size). |
|
257 |
_out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); |
|
258 |
assert( is_even(_out_arg_limit), "out_preserve must be even" ); |
|
259 |
||
260 |
if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) { |
|
261 |
// the compiler cannot represent this method's calling sequence |
|
262 |
C->record_method_not_compilable("must be able to represent all call arguments in reg mask"); |
|
263 |
} |
|
264 |
||
265 |
if (C->failing()) return; // bailed out on incoming arg failure |
|
266 |
||
267 |
// --------------- |
|
268 |
// Collect roots of matcher trees. Every node for which |
|
269 |
// _shared[_idx] is cleared is guaranteed to not be shared, and thus |
|
270 |
// can be a valid interior of some tree. |
|
271 |
find_shared( C->root() ); |
|
272 |
find_shared( C->top() ); |
|
273 |
||
274 |
C->print_method("Before Matching", 2); |
|
275 |
||
276 |
// Swap out to old-space; emptying new-space |
|
277 |
Arena *old = C->node_arena()->move_contents(C->old_arena()); |
|
278 |
||
279 |
// Save debug and profile information for nodes in old space: |
|
280 |
_old_node_note_array = C->node_note_array(); |
|
281 |
if (_old_node_note_array != NULL) { |
|
282 |
C->set_node_note_array(new(C->comp_arena()) GrowableArray<Node_Notes*> |
|
283 |
(C->comp_arena(), _old_node_note_array->length(), |
|
284 |
0, NULL)); |
|
285 |
} |
|
286 |
||
287 |
// Pre-size the new_node table to avoid the need for range checks. |
|
288 |
grow_new_node_array(C->unique()); |
|
289 |
||
290 |
// Reset node counter so MachNodes start with _idx at 0 |
|
291 |
int nodes = C->unique(); // save value |
|
292 |
C->set_unique(0); |
|
293 |
||
294 |
// Recursively match trees from old space into new space. |
|
295 |
// Correct leaves of new-space Nodes; they point to old-space. |
|
296 |
_visited.Clear(); // Clear visit bits for xform call |
|
297 |
C->set_cached_top_node(xform( C->top(), nodes )); |
|
298 |
if (!C->failing()) { |
|
299 |
Node* xroot = xform( C->root(), 1 ); |
|
300 |
if (xroot == NULL) { |
|
301 |
Matcher::soft_match_failure(); // recursive matching process failed |
|
302 |
C->record_method_not_compilable("instruction match failed"); |
|
303 |
} else { |
|
304 |
// During matching shared constants were attached to C->root() |
|
305 |
// because xroot wasn't available yet, so transfer the uses to |
|
306 |
// the xroot. |
|
307 |
for( DUIterator_Fast jmax, j = C->root()->fast_outs(jmax); j < jmax; j++ ) { |
|
308 |
Node* n = C->root()->fast_out(j); |
|
309 |
if (C->node_arena()->contains(n)) { |
|
310 |
assert(n->in(0) == C->root(), "should be control user"); |
|
311 |
n->set_req(0, xroot); |
|
312 |
--j; |
|
313 |
--jmax; |
|
314 |
} |
|
315 |
} |
|
316 |
||
317 |
C->set_root(xroot->is_Root() ? xroot->as_Root() : NULL); |
|
318 |
#ifdef ASSERT |
|
319 |
verify_new_nodes_only(xroot); |
|
320 |
#endif |
|
321 |
} |
|
322 |
} |
|
323 |
if (C->top() == NULL || C->root() == NULL) { |
|
324 |
C->record_method_not_compilable("graph lost"); // %%% cannot happen? |
|
325 |
} |
|
326 |
if (C->failing()) { |
|
327 |
// delete old; |
|
328 |
old->destruct_contents(); |
|
329 |
return; |
|
330 |
} |
|
331 |
assert( C->top(), "" ); |
|
332 |
assert( C->root(), "" ); |
|
333 |
validate_null_checks(); |
|
334 |
||
335 |
// Now smoke old-space |
|
336 |
NOT_DEBUG( old->destruct_contents() ); |
|
337 |
||
338 |
// ------------------------ |
|
339 |
// Set up save-on-entry registers |
|
340 |
Fixup_Save_On_Entry( ); |
|
341 |
} |
|
342 |
||
343 |
||
344 |
//------------------------------Fixup_Save_On_Entry---------------------------- |
|
345 |
// The stated purpose of this routine is to take care of save-on-entry |
|
346 |
// registers. However, the overall goal of the Match phase is to convert into |
|
347 |
// machine-specific instructions which have RegMasks to guide allocation. |
|
348 |
// So what this procedure really does is put a valid RegMask on each input |
|
349 |
// to the machine-specific variations of all Return, TailCall and Halt |
|
350 |
// instructions. It also adds edgs to define the save-on-entry values (and of |
|
351 |
// course gives them a mask). |
|
352 |
||
353 |
static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { |
|
354 |
RegMask *rms = NEW_RESOURCE_ARRAY( RegMask, size ); |
|
355 |
// Do all the pre-defined register masks |
|
356 |
rms[TypeFunc::Control ] = RegMask::Empty; |
|
357 |
rms[TypeFunc::I_O ] = RegMask::Empty; |
|
358 |
rms[TypeFunc::Memory ] = RegMask::Empty; |
|
359 |
rms[TypeFunc::ReturnAdr] = ret_adr; |
|
360 |
rms[TypeFunc::FramePtr ] = fp; |
|
361 |
return rms; |
|
362 |
} |
|
363 |
||
364 |
//---------------------------init_first_stack_mask----------------------------- |
|
365 |
// Create the initial stack mask used by values spilling to the stack. |
|
366 |
// Disallow any debug info in outgoing argument areas by setting the |
|
367 |
// initial mask accordingly. |
|
368 |
void Matcher::init_first_stack_mask() { |
|
369 |
||
370 |
// Allocate storage for spill masks as masks for the appropriate load type. |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
371 |
RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask)*12); |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
372 |
idealreg2spillmask[Op_RegN] = &rms[0]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
373 |
idealreg2spillmask[Op_RegI] = &rms[1]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
374 |
idealreg2spillmask[Op_RegL] = &rms[2]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
375 |
idealreg2spillmask[Op_RegF] = &rms[3]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
376 |
idealreg2spillmask[Op_RegD] = &rms[4]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
377 |
idealreg2spillmask[Op_RegP] = &rms[5]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
378 |
idealreg2debugmask[Op_RegN] = &rms[6]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
379 |
idealreg2debugmask[Op_RegI] = &rms[7]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
380 |
idealreg2debugmask[Op_RegL] = &rms[8]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
381 |
idealreg2debugmask[Op_RegF] = &rms[9]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
382 |
idealreg2debugmask[Op_RegD] = &rms[10]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
383 |
idealreg2debugmask[Op_RegP] = &rms[11]; |
1 | 384 |
|
385 |
OptoReg::Name i; |
|
386 |
||
387 |
// At first, start with the empty mask |
|
388 |
C->FIRST_STACK_mask().Clear(); |
|
389 |
||
390 |
// Add in the incoming argument area |
|
391 |
OptoReg::Name init = OptoReg::add(_old_SP, C->out_preserve_stack_slots()); |
|
392 |
for (i = init; i < _in_arg_limit; i = OptoReg::add(i,1)) |
|
393 |
C->FIRST_STACK_mask().Insert(i); |
|
394 |
||
395 |
// Add in all bits past the outgoing argument area |
|
396 |
guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)), |
|
397 |
"must be able to represent all call arguments in reg mask"); |
|
398 |
init = _out_arg_limit; |
|
399 |
for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) |
|
400 |
C->FIRST_STACK_mask().Insert(i); |
|
401 |
||
402 |
// Finally, set the "infinite stack" bit. |
|
403 |
C->FIRST_STACK_mask().set_AllStack(); |
|
404 |
||
405 |
// Make spill masks. Registers for their class, plus FIRST_STACK_mask. |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
406 |
#ifdef _LP64 |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
407 |
*idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN]; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
408 |
idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask()); |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
409 |
#endif |
1 | 410 |
*idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI]; |
411 |
idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask()); |
|
412 |
*idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL]; |
|
413 |
idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask()); |
|
414 |
*idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF]; |
|
415 |
idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask()); |
|
416 |
*idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD]; |
|
417 |
idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask()); |
|
418 |
*idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; |
|
419 |
idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); |
|
420 |
||
421 |
// Make up debug masks. Any spill slot plus callee-save registers. |
|
422 |
// Caller-save registers are assumed to be trashable by the various |
|
423 |
// inline-cache fixup routines. |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
424 |
*idealreg2debugmask[Op_RegN]= *idealreg2spillmask[Op_RegN]; |
1 | 425 |
*idealreg2debugmask[Op_RegI]= *idealreg2spillmask[Op_RegI]; |
426 |
*idealreg2debugmask[Op_RegL]= *idealreg2spillmask[Op_RegL]; |
|
427 |
*idealreg2debugmask[Op_RegF]= *idealreg2spillmask[Op_RegF]; |
|
428 |
*idealreg2debugmask[Op_RegD]= *idealreg2spillmask[Op_RegD]; |
|
429 |
*idealreg2debugmask[Op_RegP]= *idealreg2spillmask[Op_RegP]; |
|
430 |
||
431 |
// Prevent stub compilations from attempting to reference |
|
432 |
// callee-saved registers from debug info |
|
433 |
bool exclude_soe = !Compile::current()->is_method_compilation(); |
|
434 |
||
435 |
for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) { |
|
436 |
// registers the caller has to save do not work |
|
437 |
if( _register_save_policy[i] == 'C' || |
|
438 |
_register_save_policy[i] == 'A' || |
|
439 |
(_register_save_policy[i] == 'E' && exclude_soe) ) { |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
440 |
idealreg2debugmask[Op_RegN]->Remove(i); |
1 | 441 |
idealreg2debugmask[Op_RegI]->Remove(i); // Exclude save-on-call |
442 |
idealreg2debugmask[Op_RegL]->Remove(i); // registers from debug |
|
443 |
idealreg2debugmask[Op_RegF]->Remove(i); // masks |
|
444 |
idealreg2debugmask[Op_RegD]->Remove(i); |
|
445 |
idealreg2debugmask[Op_RegP]->Remove(i); |
|
446 |
} |
|
447 |
} |
|
448 |
} |
|
449 |
||
450 |
//---------------------------is_save_on_entry---------------------------------- |
|
451 |
bool Matcher::is_save_on_entry( int reg ) { |
|
452 |
return |
|
453 |
_register_save_policy[reg] == 'E' || |
|
454 |
_register_save_policy[reg] == 'A' || // Save-on-entry register? |
|
455 |
// Also save argument registers in the trampolining stubs |
|
456 |
(C->save_argument_registers() && is_spillable_arg(reg)); |
|
457 |
} |
|
458 |
||
459 |
//---------------------------Fixup_Save_On_Entry------------------------------- |
|
460 |
void Matcher::Fixup_Save_On_Entry( ) { |
|
461 |
init_first_stack_mask(); |
|
462 |
||
463 |
Node *root = C->root(); // Short name for root |
|
464 |
// Count number of save-on-entry registers. |
|
465 |
uint soe_cnt = number_of_saved_registers(); |
|
466 |
uint i; |
|
467 |
||
468 |
// Find the procedure Start Node |
|
469 |
StartNode *start = C->start(); |
|
470 |
assert( start, "Expect a start node" ); |
|
471 |
||
472 |
// Save argument registers in the trampolining stubs |
|
473 |
if( C->save_argument_registers() ) |
|
474 |
for( i = 0; i < _last_Mach_Reg; i++ ) |
|
475 |
if( is_spillable_arg(i) ) |
|
476 |
soe_cnt++; |
|
477 |
||
478 |
// Input RegMask array shared by all Returns. |
|
479 |
// The type for doubles and longs has a count of 2, but |
|
480 |
// there is only 1 returned value |
|
481 |
uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1); |
|
482 |
RegMask *ret_rms = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); |
|
483 |
// Returns have 0 or 1 returned values depending on call signature. |
|
484 |
// Return register is specified by return_value in the AD file. |
|
485 |
if (ret_edge_cnt > TypeFunc::Parms) |
|
486 |
ret_rms[TypeFunc::Parms+0] = _return_value_mask; |
|
487 |
||
488 |
// Input RegMask array shared by all Rethrows. |
|
489 |
uint reth_edge_cnt = TypeFunc::Parms+1; |
|
490 |
RegMask *reth_rms = init_input_masks( reth_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); |
|
491 |
// Rethrow takes exception oop only, but in the argument 0 slot. |
|
492 |
reth_rms[TypeFunc::Parms] = mreg2regmask[find_receiver(false)]; |
|
493 |
#ifdef _LP64 |
|
494 |
// Need two slots for ptrs in 64-bit land |
|
495 |
reth_rms[TypeFunc::Parms].Insert(OptoReg::add(OptoReg::Name(find_receiver(false)),1)); |
|
496 |
#endif |
|
497 |
||
498 |
// Input RegMask array shared by all TailCalls |
|
499 |
uint tail_call_edge_cnt = TypeFunc::Parms+2; |
|
500 |
RegMask *tail_call_rms = init_input_masks( tail_call_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); |
|
501 |
||
502 |
// Input RegMask array shared by all TailJumps |
|
503 |
uint tail_jump_edge_cnt = TypeFunc::Parms+2; |
|
504 |
RegMask *tail_jump_rms = init_input_masks( tail_jump_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); |
|
505 |
||
506 |
// TailCalls have 2 returned values (target & moop), whose masks come |
|
507 |
// from the usual MachNode/MachOper mechanism. Find a sample |
|
508 |
// TailCall to extract these masks and put the correct masks into |
|
509 |
// the tail_call_rms array. |
|
510 |
for( i=1; i < root->req(); i++ ) { |
|
511 |
MachReturnNode *m = root->in(i)->as_MachReturn(); |
|
512 |
if( m->ideal_Opcode() == Op_TailCall ) { |
|
513 |
tail_call_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0); |
|
514 |
tail_call_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1); |
|
515 |
break; |
|
516 |
} |
|
517 |
} |
|
518 |
||
519 |
// TailJumps have 2 returned values (target & ex_oop), whose masks come |
|
520 |
// from the usual MachNode/MachOper mechanism. Find a sample |
|
521 |
// TailJump to extract these masks and put the correct masks into |
|
522 |
// the tail_jump_rms array. |
|
523 |
for( i=1; i < root->req(); i++ ) { |
|
524 |
MachReturnNode *m = root->in(i)->as_MachReturn(); |
|
525 |
if( m->ideal_Opcode() == Op_TailJump ) { |
|
526 |
tail_jump_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0); |
|
527 |
tail_jump_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1); |
|
528 |
break; |
|
529 |
} |
|
530 |
} |
|
531 |
||
532 |
// Input RegMask array shared by all Halts |
|
533 |
uint halt_edge_cnt = TypeFunc::Parms; |
|
534 |
RegMask *halt_rms = init_input_masks( halt_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); |
|
535 |
||
536 |
// Capture the return input masks into each exit flavor |
|
537 |
for( i=1; i < root->req(); i++ ) { |
|
538 |
MachReturnNode *exit = root->in(i)->as_MachReturn(); |
|
539 |
switch( exit->ideal_Opcode() ) { |
|
540 |
case Op_Return : exit->_in_rms = ret_rms; break; |
|
541 |
case Op_Rethrow : exit->_in_rms = reth_rms; break; |
|
542 |
case Op_TailCall : exit->_in_rms = tail_call_rms; break; |
|
543 |
case Op_TailJump : exit->_in_rms = tail_jump_rms; break; |
|
544 |
case Op_Halt : exit->_in_rms = halt_rms; break; |
|
545 |
default : ShouldNotReachHere(); |
|
546 |
} |
|
547 |
} |
|
548 |
||
549 |
// Next unused projection number from Start. |
|
550 |
int proj_cnt = C->tf()->domain()->cnt(); |
|
551 |
||
552 |
// Do all the save-on-entry registers. Make projections from Start for |
|
553 |
// them, and give them a use at the exit points. To the allocator, they |
|
554 |
// look like incoming register arguments. |
|
555 |
for( i = 0; i < _last_Mach_Reg; i++ ) { |
|
556 |
if( is_save_on_entry(i) ) { |
|
557 |
||
558 |
// Add the save-on-entry to the mask array |
|
559 |
ret_rms [ ret_edge_cnt] = mreg2regmask[i]; |
|
560 |
reth_rms [ reth_edge_cnt] = mreg2regmask[i]; |
|
561 |
tail_call_rms[tail_call_edge_cnt] = mreg2regmask[i]; |
|
562 |
tail_jump_rms[tail_jump_edge_cnt] = mreg2regmask[i]; |
|
563 |
// Halts need the SOE registers, but only in the stack as debug info. |
|
564 |
// A just-prior uncommon-trap or deoptimization will use the SOE regs. |
|
565 |
halt_rms [ halt_edge_cnt] = *idealreg2spillmask[_register_save_type[i]]; |
|
566 |
||
567 |
Node *mproj; |
|
568 |
||
569 |
// Is this a RegF low half of a RegD? Double up 2 adjacent RegF's |
|
570 |
// into a single RegD. |
|
571 |
if( (i&1) == 0 && |
|
572 |
_register_save_type[i ] == Op_RegF && |
|
573 |
_register_save_type[i+1] == Op_RegF && |
|
574 |
is_save_on_entry(i+1) ) { |
|
575 |
// Add other bit for double |
|
576 |
ret_rms [ ret_edge_cnt].Insert(OptoReg::Name(i+1)); |
|
577 |
reth_rms [ reth_edge_cnt].Insert(OptoReg::Name(i+1)); |
|
578 |
tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1)); |
|
579 |
tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1)); |
|
580 |
halt_rms [ halt_edge_cnt].Insert(OptoReg::Name(i+1)); |
|
581 |
mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegD ); |
|
582 |
proj_cnt += 2; // Skip 2 for doubles |
|
583 |
} |
|
584 |
else if( (i&1) == 1 && // Else check for high half of double |
|
585 |
_register_save_type[i-1] == Op_RegF && |
|
586 |
_register_save_type[i ] == Op_RegF && |
|
587 |
is_save_on_entry(i-1) ) { |
|
588 |
ret_rms [ ret_edge_cnt] = RegMask::Empty; |
|
589 |
reth_rms [ reth_edge_cnt] = RegMask::Empty; |
|
590 |
tail_call_rms[tail_call_edge_cnt] = RegMask::Empty; |
|
591 |
tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty; |
|
592 |
halt_rms [ halt_edge_cnt] = RegMask::Empty; |
|
593 |
mproj = C->top(); |
|
594 |
} |
|
595 |
// Is this a RegI low half of a RegL? Double up 2 adjacent RegI's |
|
596 |
// into a single RegL. |
|
597 |
else if( (i&1) == 0 && |
|
598 |
_register_save_type[i ] == Op_RegI && |
|
599 |
_register_save_type[i+1] == Op_RegI && |
|
600 |
is_save_on_entry(i+1) ) { |
|
601 |
// Add other bit for long |
|
602 |
ret_rms [ ret_edge_cnt].Insert(OptoReg::Name(i+1)); |
|
603 |
reth_rms [ reth_edge_cnt].Insert(OptoReg::Name(i+1)); |
|
604 |
tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1)); |
|
605 |
tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1)); |
|
606 |
halt_rms [ halt_edge_cnt].Insert(OptoReg::Name(i+1)); |
|
607 |
mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegL ); |
|
608 |
proj_cnt += 2; // Skip 2 for longs |
|
609 |
} |
|
610 |
else if( (i&1) == 1 && // Else check for high half of long |
|
611 |
_register_save_type[i-1] == Op_RegI && |
|
612 |
_register_save_type[i ] == Op_RegI && |
|
613 |
is_save_on_entry(i-1) ) { |
|
614 |
ret_rms [ ret_edge_cnt] = RegMask::Empty; |
|
615 |
reth_rms [ reth_edge_cnt] = RegMask::Empty; |
|
616 |
tail_call_rms[tail_call_edge_cnt] = RegMask::Empty; |
|
617 |
tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty; |
|
618 |
halt_rms [ halt_edge_cnt] = RegMask::Empty; |
|
619 |
mproj = C->top(); |
|
620 |
} else { |
|
621 |
// Make a projection for it off the Start |
|
622 |
mproj = new (C, 1) MachProjNode( start, proj_cnt++, ret_rms[ret_edge_cnt], _register_save_type[i] ); |
|
623 |
} |
|
624 |
||
625 |
ret_edge_cnt ++; |
|
626 |
reth_edge_cnt ++; |
|
627 |
tail_call_edge_cnt ++; |
|
628 |
tail_jump_edge_cnt ++; |
|
629 |
halt_edge_cnt ++; |
|
630 |
||
631 |
// Add a use of the SOE register to all exit paths |
|
632 |
for( uint j=1; j < root->req(); j++ ) |
|
633 |
root->in(j)->add_req(mproj); |
|
634 |
} // End of if a save-on-entry register |
|
635 |
} // End of for all machine registers |
|
636 |
} |
|
637 |
||
638 |
//------------------------------init_spill_mask-------------------------------- |
|
639 |
void Matcher::init_spill_mask( Node *ret ) { |
|
640 |
if( idealreg2regmask[Op_RegI] ) return; // One time only init |
|
641 |
||
642 |
OptoReg::c_frame_pointer = c_frame_pointer(); |
|
643 |
c_frame_ptr_mask = c_frame_pointer(); |
|
644 |
#ifdef _LP64 |
|
645 |
// pointers are twice as big |
|
646 |
c_frame_ptr_mask.Insert(OptoReg::add(c_frame_pointer(),1)); |
|
647 |
#endif |
|
648 |
||
649 |
// Start at OptoReg::stack0() |
|
650 |
STACK_ONLY_mask.Clear(); |
|
651 |
OptoReg::Name init = OptoReg::stack2reg(0); |
|
652 |
// STACK_ONLY_mask is all stack bits |
|
653 |
OptoReg::Name i; |
|
654 |
for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) |
|
655 |
STACK_ONLY_mask.Insert(i); |
|
656 |
// Also set the "infinite stack" bit. |
|
657 |
STACK_ONLY_mask.set_AllStack(); |
|
658 |
||
659 |
// Copy the register names over into the shared world |
|
660 |
for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) { |
|
661 |
// SharedInfo::regName[i] = regName[i]; |
|
662 |
// Handy RegMasks per machine register |
|
663 |
mreg2regmask[i].Insert(i); |
|
664 |
} |
|
665 |
||
666 |
// Grab the Frame Pointer |
|
667 |
Node *fp = ret->in(TypeFunc::FramePtr); |
|
668 |
Node *mem = ret->in(TypeFunc::Memory); |
|
669 |
const TypePtr* atp = TypePtr::BOTTOM; |
|
670 |
// Share frame pointer while making spill ops |
|
671 |
set_shared(fp); |
|
672 |
||
673 |
// Compute generic short-offset Loads |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
674 |
#ifdef _LP64 |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
675 |
MachNode *spillCP = match_tree(new (C, 3) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM)); |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
676 |
#endif |
1 | 677 |
MachNode *spillI = match_tree(new (C, 3) LoadINode(NULL,mem,fp,atp)); |
678 |
MachNode *spillL = match_tree(new (C, 3) LoadLNode(NULL,mem,fp,atp)); |
|
679 |
MachNode *spillF = match_tree(new (C, 3) LoadFNode(NULL,mem,fp,atp)); |
|
680 |
MachNode *spillD = match_tree(new (C, 3) LoadDNode(NULL,mem,fp,atp)); |
|
681 |
MachNode *spillP = match_tree(new (C, 3) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM)); |
|
682 |
assert(spillI != NULL && spillL != NULL && spillF != NULL && |
|
683 |
spillD != NULL && spillP != NULL, ""); |
|
684 |
||
685 |
// Get the ADLC notion of the right regmask, for each basic type. |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
686 |
#ifdef _LP64 |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
687 |
idealreg2regmask[Op_RegN] = &spillCP->out_RegMask(); |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
688 |
#endif |
1 | 689 |
idealreg2regmask[Op_RegI] = &spillI->out_RegMask(); |
690 |
idealreg2regmask[Op_RegL] = &spillL->out_RegMask(); |
|
691 |
idealreg2regmask[Op_RegF] = &spillF->out_RegMask(); |
|
692 |
idealreg2regmask[Op_RegD] = &spillD->out_RegMask(); |
|
693 |
idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); |
|
694 |
} |
|
695 |
||
696 |
#ifdef ASSERT |
|
697 |
static void match_alias_type(Compile* C, Node* n, Node* m) { |
|
698 |
if (!VerifyAliases) return; // do not go looking for trouble by default |
|
699 |
const TypePtr* nat = n->adr_type(); |
|
700 |
const TypePtr* mat = m->adr_type(); |
|
701 |
int nidx = C->get_alias_index(nat); |
|
702 |
int midx = C->get_alias_index(mat); |
|
703 |
// Detune the assert for cases like (AndI 0xFF (LoadB p)). |
|
704 |
if (nidx == Compile::AliasIdxTop && midx >= Compile::AliasIdxRaw) { |
|
705 |
for (uint i = 1; i < n->req(); i++) { |
|
706 |
Node* n1 = n->in(i); |
|
707 |
const TypePtr* n1at = n1->adr_type(); |
|
708 |
if (n1at != NULL) { |
|
709 |
nat = n1at; |
|
710 |
nidx = C->get_alias_index(n1at); |
|
711 |
} |
|
712 |
} |
|
713 |
} |
|
714 |
// %%% Kludgery. Instead, fix ideal adr_type methods for all these cases: |
|
715 |
if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxRaw) { |
|
716 |
switch (n->Opcode()) { |
|
717 |
case Op_PrefetchRead: |
|
718 |
case Op_PrefetchWrite: |
|
719 |
nidx = Compile::AliasIdxRaw; |
|
720 |
nat = TypeRawPtr::BOTTOM; |
|
721 |
break; |
|
722 |
} |
|
723 |
} |
|
724 |
if (nidx == Compile::AliasIdxRaw && midx == Compile::AliasIdxTop) { |
|
725 |
switch (n->Opcode()) { |
|
726 |
case Op_ClearArray: |
|
727 |
midx = Compile::AliasIdxRaw; |
|
728 |
mat = TypeRawPtr::BOTTOM; |
|
729 |
break; |
|
730 |
} |
|
731 |
} |
|
732 |
if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxBot) { |
|
733 |
switch (n->Opcode()) { |
|
734 |
case Op_Return: |
|
735 |
case Op_Rethrow: |
|
736 |
case Op_Halt: |
|
737 |
case Op_TailCall: |
|
738 |
case Op_TailJump: |
|
739 |
nidx = Compile::AliasIdxBot; |
|
740 |
nat = TypePtr::BOTTOM; |
|
741 |
break; |
|
742 |
} |
|
743 |
} |
|
744 |
if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) { |
|
745 |
switch (n->Opcode()) { |
|
746 |
case Op_StrComp: |
|
747 |
case Op_MemBarVolatile: |
|
748 |
case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type? |
|
749 |
nidx = Compile::AliasIdxTop; |
|
750 |
nat = NULL; |
|
751 |
break; |
|
752 |
} |
|
753 |
} |
|
754 |
if (nidx != midx) { |
|
755 |
if (PrintOpto || (PrintMiscellaneous && (WizardMode || Verbose))) { |
|
756 |
tty->print_cr("==== Matcher alias shift %d => %d", nidx, midx); |
|
757 |
n->dump(); |
|
758 |
m->dump(); |
|
759 |
} |
|
760 |
assert(C->subsume_loads() && C->must_alias(nat, midx), |
|
761 |
"must not lose alias info when matching"); |
|
762 |
} |
|
763 |
} |
|
764 |
#endif |
|
765 |
||
766 |
||
767 |
//------------------------------MStack----------------------------------------- |
|
768 |
// State and MStack class used in xform() and find_shared() iterative methods. |
|
769 |
enum Node_State { Pre_Visit, // node has to be pre-visited |
|
770 |
Visit, // visit node |
|
771 |
Post_Visit, // post-visit node |
|
772 |
Alt_Post_Visit // alternative post-visit path |
|
773 |
}; |
|
774 |
||
775 |
class MStack: public Node_Stack { |
|
776 |
public: |
|
777 |
MStack(int size) : Node_Stack(size) { } |
|
778 |
||
779 |
void push(Node *n, Node_State ns) { |
|
780 |
Node_Stack::push(n, (uint)ns); |
|
781 |
} |
|
782 |
void push(Node *n, Node_State ns, Node *parent, int indx) { |
|
783 |
++_inode_top; |
|
784 |
if ((_inode_top + 1) >= _inode_max) grow(); |
|
785 |
_inode_top->node = parent; |
|
786 |
_inode_top->indx = (uint)indx; |
|
787 |
++_inode_top; |
|
788 |
_inode_top->node = n; |
|
789 |
_inode_top->indx = (uint)ns; |
|
790 |
} |
|
791 |
Node *parent() { |
|
792 |
pop(); |
|
793 |
return node(); |
|
794 |
} |
|
795 |
Node_State state() const { |
|
796 |
return (Node_State)index(); |
|
797 |
} |
|
798 |
void set_state(Node_State ns) { |
|
799 |
set_index((uint)ns); |
|
800 |
} |
|
801 |
}; |
|
802 |
||
803 |
||
804 |
//------------------------------xform------------------------------------------ |
|
805 |
// Given a Node in old-space, Match him (Label/Reduce) to produce a machine |
|
806 |
// Node in new-space. Given a new-space Node, recursively walk his children. |
|
807 |
Node *Matcher::transform( Node *n ) { ShouldNotCallThis(); return n; } |
|
808 |
Node *Matcher::xform( Node *n, int max_stack ) { |
|
809 |
// Use one stack to keep both: child's node/state and parent's node/index |
|
810 |
MStack mstack(max_stack * 2 * 2); // C->unique() * 2 * 2 |
|
811 |
mstack.push(n, Visit, NULL, -1); // set NULL as parent to indicate root |
|
812 |
||
813 |
while (mstack.is_nonempty()) { |
|
814 |
n = mstack.node(); // Leave node on stack |
|
815 |
Node_State nstate = mstack.state(); |
|
816 |
if (nstate == Visit) { |
|
817 |
mstack.set_state(Post_Visit); |
|
818 |
Node *oldn = n; |
|
819 |
// Old-space or new-space check |
|
820 |
if (!C->node_arena()->contains(n)) { |
|
821 |
// Old space! |
|
822 |
Node* m; |
|
823 |
if (has_new_node(n)) { // Not yet Label/Reduced |
|
824 |
m = new_node(n); |
|
825 |
} else { |
|
826 |
if (!is_dontcare(n)) { // Matcher can match this guy |
|
827 |
// Calls match special. They match alone with no children. |
|
828 |
// Their children, the incoming arguments, match normally. |
|
829 |
m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n); |
|
830 |
if (C->failing()) return NULL; |
|
831 |
if (m == NULL) { Matcher::soft_match_failure(); return NULL; } |
|
832 |
} else { // Nothing the matcher cares about |
|
833 |
if( n->is_Proj() && n->in(0)->is_Multi()) { // Projections? |
|
834 |
// Convert to machine-dependent projection |
|
835 |
m = n->in(0)->as_Multi()->match( n->as_Proj(), this ); |
|
836 |
if (m->in(0) != NULL) // m might be top |
|
837 |
collect_null_checks(m); |
|
838 |
} else { // Else just a regular 'ol guy |
|
839 |
m = n->clone(); // So just clone into new-space |
|
840 |
// Def-Use edges will be added incrementally as Uses |
|
841 |
// of this node are matched. |
|
842 |
assert(m->outcnt() == 0, "no Uses of this clone yet"); |
|
843 |
} |
|
844 |
} |
|
845 |
||
846 |
set_new_node(n, m); // Map old to new |
|
847 |
if (_old_node_note_array != NULL) { |
|
848 |
Node_Notes* nn = C->locate_node_notes(_old_node_note_array, |
|
849 |
n->_idx); |
|
850 |
C->set_node_notes_at(m->_idx, nn); |
|
851 |
} |
|
852 |
debug_only(match_alias_type(C, n, m)); |
|
853 |
} |
|
854 |
n = m; // n is now a new-space node |
|
855 |
mstack.set_node(n); |
|
856 |
} |
|
857 |
||
858 |
// New space! |
|
859 |
if (_visited.test_set(n->_idx)) continue; // while(mstack.is_nonempty()) |
|
860 |
||
861 |
int i; |
|
862 |
// Put precedence edges on stack first (match them last). |
|
863 |
for (i = oldn->req(); (uint)i < oldn->len(); i++) { |
|
864 |
Node *m = oldn->in(i); |
|
865 |
if (m == NULL) break; |
|
866 |
// set -1 to call add_prec() instead of set_req() during Step1 |
|
867 |
mstack.push(m, Visit, n, -1); |
|
868 |
} |
|
869 |
||
870 |
// For constant debug info, I'd rather have unmatched constants. |
|
871 |
int cnt = n->req(); |
|
872 |
JVMState* jvms = n->jvms(); |
|
873 |
int debug_cnt = jvms ? jvms->debug_start() : cnt; |
|
874 |
||
875 |
// Now do only debug info. Clone constants rather than matching. |
|
876 |
// Constants are represented directly in the debug info without |
|
877 |
// the need for executable machine instructions. |
|
878 |
// Monitor boxes are also represented directly. |
|
879 |
for (i = cnt - 1; i >= debug_cnt; --i) { // For all debug inputs do |
|
880 |
Node *m = n->in(i); // Get input |
|
881 |
int op = m->Opcode(); |
|
882 |
assert((op == Op_BoxLock) == jvms->is_monitor_use(i), "boxes only at monitor sites"); |
|
883 |
if( op == Op_ConI || op == Op_ConP || |
|
884 |
op == Op_ConF || op == Op_ConD || op == Op_ConL |
|
885 |
// || op == Op_BoxLock // %%%% enable this and remove (+++) in chaitin.cpp |
|
886 |
) { |
|
887 |
m = m->clone(); |
|
888 |
mstack.push(m, Post_Visit, n, i); // Don't neet to visit |
|
889 |
mstack.push(m->in(0), Visit, m, 0); |
|
890 |
} else { |
|
891 |
mstack.push(m, Visit, n, i); |
|
892 |
} |
|
893 |
} |
|
894 |
||
895 |
// And now walk his children, and convert his inputs to new-space. |
|
896 |
for( ; i >= 0; --i ) { // For all normal inputs do |
|
897 |
Node *m = n->in(i); // Get input |
|
898 |
if(m != NULL) |
|
899 |
mstack.push(m, Visit, n, i); |
|
900 |
} |
|
901 |
||
902 |
} |
|
903 |
else if (nstate == Post_Visit) { |
|
904 |
// Set xformed input |
|
905 |
Node *p = mstack.parent(); |
|
906 |
if (p != NULL) { // root doesn't have parent |
|
907 |
int i = (int)mstack.index(); |
|
908 |
if (i >= 0) |
|
909 |
p->set_req(i, n); // required input |
|
910 |
else if (i == -1) |
|
911 |
p->add_prec(n); // precedence input |
|
912 |
else |
|
913 |
ShouldNotReachHere(); |
|
914 |
} |
|
915 |
mstack.pop(); // remove processed node from stack |
|
916 |
} |
|
917 |
else { |
|
918 |
ShouldNotReachHere(); |
|
919 |
} |
|
920 |
} // while (mstack.is_nonempty()) |
|
921 |
return n; // Return new-space Node |
|
922 |
} |
|
923 |
||
924 |
//------------------------------warp_outgoing_stk_arg------------------------ |
|
925 |
OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ) { |
|
926 |
// Convert outgoing argument location to a pre-biased stack offset |
|
927 |
if (reg->is_stack()) { |
|
928 |
OptoReg::Name warped = reg->reg2stack(); |
|
929 |
// Adjust the stack slot offset to be the register number used |
|
930 |
// by the allocator. |
|
931 |
warped = OptoReg::add(begin_out_arg_area, warped); |
|
932 |
// Keep track of the largest numbered stack slot used for an arg. |
|
933 |
// Largest used slot per call-site indicates the amount of stack |
|
934 |
// that is killed by the call. |
|
935 |
if( warped >= out_arg_limit_per_call ) |
|
936 |
out_arg_limit_per_call = OptoReg::add(warped,1); |
|
937 |
if (!RegMask::can_represent(warped)) { |
|
938 |
C->record_method_not_compilable_all_tiers("unsupported calling sequence"); |
|
939 |
return OptoReg::Bad; |
|
940 |
} |
|
941 |
return warped; |
|
942 |
} |
|
943 |
return OptoReg::as_OptoReg(reg); |
|
944 |
} |
|
945 |
||
946 |
||
947 |
//------------------------------match_sfpt------------------------------------- |
|
948 |
// Helper function to match call instructions. Calls match special. |
|
949 |
// They match alone with no children. Their children, the incoming |
|
950 |
// arguments, match normally. |
|
951 |
MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) { |
|
952 |
MachSafePointNode *msfpt = NULL; |
|
953 |
MachCallNode *mcall = NULL; |
|
954 |
uint cnt; |
|
955 |
// Split out case for SafePoint vs Call |
|
956 |
CallNode *call; |
|
957 |
const TypeTuple *domain; |
|
958 |
ciMethod* method = NULL; |
|
959 |
if( sfpt->is_Call() ) { |
|
960 |
call = sfpt->as_Call(); |
|
961 |
domain = call->tf()->domain(); |
|
962 |
cnt = domain->cnt(); |
|
963 |
||
964 |
// Match just the call, nothing else |
|
965 |
MachNode *m = match_tree(call); |
|
966 |
if (C->failing()) return NULL; |
|
967 |
if( m == NULL ) { Matcher::soft_match_failure(); return NULL; } |
|
968 |
||
969 |
// Copy data from the Ideal SafePoint to the machine version |
|
970 |
mcall = m->as_MachCall(); |
|
971 |
||
972 |
mcall->set_tf( call->tf()); |
|
973 |
mcall->set_entry_point(call->entry_point()); |
|
974 |
mcall->set_cnt( call->cnt()); |
|
975 |
||
976 |
if( mcall->is_MachCallJava() ) { |
|
977 |
MachCallJavaNode *mcall_java = mcall->as_MachCallJava(); |
|
978 |
const CallJavaNode *call_java = call->as_CallJava(); |
|
979 |
method = call_java->method(); |
|
980 |
mcall_java->_method = method; |
|
981 |
mcall_java->_bci = call_java->_bci; |
|
982 |
mcall_java->_optimized_virtual = call_java->is_optimized_virtual(); |
|
983 |
if( mcall_java->is_MachCallStaticJava() ) |
|
984 |
mcall_java->as_MachCallStaticJava()->_name = |
|
985 |
call_java->as_CallStaticJava()->_name; |
|
986 |
if( mcall_java->is_MachCallDynamicJava() ) |
|
987 |
mcall_java->as_MachCallDynamicJava()->_vtable_index = |
|
988 |
call_java->as_CallDynamicJava()->_vtable_index; |
|
989 |
} |
|
990 |
else if( mcall->is_MachCallRuntime() ) { |
|
991 |
mcall->as_MachCallRuntime()->_name = call->as_CallRuntime()->_name; |
|
992 |
} |
|
993 |
msfpt = mcall; |
|
994 |
} |
|
995 |
// This is a non-call safepoint |
|
996 |
else { |
|
997 |
call = NULL; |
|
998 |
domain = NULL; |
|
999 |
MachNode *mn = match_tree(sfpt); |
|
1000 |
if (C->failing()) return NULL; |
|
1001 |
msfpt = mn->as_MachSafePoint(); |
|
1002 |
cnt = TypeFunc::Parms; |
|
1003 |
} |
|
1004 |
||
1005 |
// Advertise the correct memory effects (for anti-dependence computation). |
|
1006 |
msfpt->set_adr_type(sfpt->adr_type()); |
|
1007 |
||
1008 |
// Allocate a private array of RegMasks. These RegMasks are not shared. |
|
1009 |
msfpt->_in_rms = NEW_RESOURCE_ARRAY( RegMask, cnt ); |
|
1010 |
// Empty them all. |
|
1011 |
memset( msfpt->_in_rms, 0, sizeof(RegMask)*cnt ); |
|
1012 |
||
1013 |
// Do all the pre-defined non-Empty register masks |
|
1014 |
msfpt->_in_rms[TypeFunc::ReturnAdr] = _return_addr_mask; |
|
1015 |
msfpt->_in_rms[TypeFunc::FramePtr ] = c_frame_ptr_mask; |
|
1016 |
||
1017 |
// Place first outgoing argument can possibly be put. |
|
1018 |
OptoReg::Name begin_out_arg_area = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); |
|
1019 |
assert( is_even(begin_out_arg_area), "" ); |
|
1020 |
// Compute max outgoing register number per call site. |
|
1021 |
OptoReg::Name out_arg_limit_per_call = begin_out_arg_area; |
|
1022 |
// Calls to C may hammer extra stack slots above and beyond any arguments. |
|
1023 |
// These are usually backing store for register arguments for varargs. |
|
1024 |
if( call != NULL && call->is_CallRuntime() ) |
|
1025 |
out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed()); |
|
1026 |
||
1027 |
||
1028 |
// Do the normal argument list (parameters) register masks |
|
1029 |
int argcnt = cnt - TypeFunc::Parms; |
|
1030 |
if( argcnt > 0 ) { // Skip it all if we have no args |
|
1031 |
BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt ); |
|
1032 |
VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt ); |
|
1033 |
int i; |
|
1034 |
for( i = 0; i < argcnt; i++ ) { |
|
1035 |
sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type(); |
|
1036 |
} |
|
1037 |
// V-call to pick proper calling convention |
|
1038 |
call->calling_convention( sig_bt, parm_regs, argcnt ); |
|
1039 |
||
1040 |
#ifdef ASSERT |
|
1041 |
// Sanity check users' calling convention. Really handy during |
|
1042 |
// the initial porting effort. Fairly expensive otherwise. |
|
1043 |
{ for (int i = 0; i<argcnt; i++) { |
|
1044 |
if( !parm_regs[i].first()->is_valid() && |
|
1045 |
!parm_regs[i].second()->is_valid() ) continue; |
|
1046 |
VMReg reg1 = parm_regs[i].first(); |
|
1047 |
VMReg reg2 = parm_regs[i].second(); |
|
1048 |
for (int j = 0; j < i; j++) { |
|
1049 |
if( !parm_regs[j].first()->is_valid() && |
|
1050 |
!parm_regs[j].second()->is_valid() ) continue; |
|
1051 |
VMReg reg3 = parm_regs[j].first(); |
|
1052 |
VMReg reg4 = parm_regs[j].second(); |
|
1053 |
if( !reg1->is_valid() ) { |
|
1054 |
assert( !reg2->is_valid(), "valid halvsies" ); |
|
1055 |
} else if( !reg3->is_valid() ) { |
|
1056 |
assert( !reg4->is_valid(), "valid halvsies" ); |
|
1057 |
} else { |
|
1058 |
assert( reg1 != reg2, "calling conv. must produce distinct regs"); |
|
1059 |
assert( reg1 != reg3, "calling conv. must produce distinct regs"); |
|
1060 |
assert( reg1 != reg4, "calling conv. must produce distinct regs"); |
|
1061 |
assert( reg2 != reg3, "calling conv. must produce distinct regs"); |
|
1062 |
assert( reg2 != reg4 || !reg2->is_valid(), "calling conv. must produce distinct regs"); |
|
1063 |
assert( reg3 != reg4, "calling conv. must produce distinct regs"); |
|
1064 |
} |
|
1065 |
} |
|
1066 |
} |
|
1067 |
} |
|
1068 |
#endif |
|
1069 |
||
1070 |
// Visit each argument. Compute its outgoing register mask. |
|
1071 |
// Return results now can have 2 bits returned. |
|
1072 |
// Compute max over all outgoing arguments both per call-site |
|
1073 |
// and over the entire method. |
|
1074 |
for( i = 0; i < argcnt; i++ ) { |
|
1075 |
// Address of incoming argument mask to fill in |
|
1076 |
RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms]; |
|
1077 |
if( !parm_regs[i].first()->is_valid() && |
|
1078 |
!parm_regs[i].second()->is_valid() ) { |
|
1079 |
continue; // Avoid Halves |
|
1080 |
} |
|
1081 |
// Grab first register, adjust stack slots and insert in mask. |
|
1082 |
OptoReg::Name reg1 = warp_outgoing_stk_arg(parm_regs[i].first(), begin_out_arg_area, out_arg_limit_per_call ); |
|
1083 |
if (OptoReg::is_valid(reg1)) |
|
1084 |
rm->Insert( reg1 ); |
|
1085 |
// Grab second register (if any), adjust stack slots and insert in mask. |
|
1086 |
OptoReg::Name reg2 = warp_outgoing_stk_arg(parm_regs[i].second(), begin_out_arg_area, out_arg_limit_per_call ); |
|
1087 |
if (OptoReg::is_valid(reg2)) |
|
1088 |
rm->Insert( reg2 ); |
|
1089 |
} // End of for all arguments |
|
1090 |
||
1091 |
// Compute number of stack slots needed to restore stack in case of |
|
1092 |
// Pascal-style argument popping. |
|
1093 |
mcall->_argsize = out_arg_limit_per_call - begin_out_arg_area; |
|
1094 |
} |
|
1095 |
||
1096 |
// Compute the max stack slot killed by any call. These will not be |
|
1097 |
// available for debug info, and will be used to adjust FIRST_STACK_mask |
|
1098 |
// after all call sites have been visited. |
|
1099 |
if( _out_arg_limit < out_arg_limit_per_call) |
|
1100 |
_out_arg_limit = out_arg_limit_per_call; |
|
1101 |
||
1102 |
if (mcall) { |
|
1103 |
// Kill the outgoing argument area, including any non-argument holes and |
|
1104 |
// any legacy C-killed slots. Use Fat-Projections to do the killing. |
|
1105 |
// Since the max-per-method covers the max-per-call-site and debug info |
|
1106 |
// is excluded on the max-per-method basis, debug info cannot land in |
|
1107 |
// this killed area. |
|
1108 |
uint r_cnt = mcall->tf()->range()->cnt(); |
|
1109 |
MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj ); |
|
1110 |
if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) { |
|
1111 |
C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence"); |
|
1112 |
} else { |
|
1113 |
for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) |
|
1114 |
proj->_rout.Insert(OptoReg::Name(i)); |
|
1115 |
} |
|
1116 |
if( proj->_rout.is_NotEmpty() ) |
|
1117 |
_proj_list.push(proj); |
|
1118 |
} |
|
1119 |
// Transfer the safepoint information from the call to the mcall |
|
1120 |
// Move the JVMState list |
|
1121 |
msfpt->set_jvms(sfpt->jvms()); |
|
1122 |
for (JVMState* jvms = msfpt->jvms(); jvms; jvms = jvms->caller()) { |
|
1123 |
jvms->set_map(sfpt); |
|
1124 |
} |
|
1125 |
||
1126 |
// Debug inputs begin just after the last incoming parameter |
|
1127 |
assert( (mcall == NULL) || (mcall->jvms() == NULL) || |
|
1128 |
(mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "" ); |
|
1129 |
||
1130 |
// Move the OopMap |
|
1131 |
msfpt->_oop_map = sfpt->_oop_map; |
|
1132 |
||
1133 |
// Registers killed by the call are set in the local scheduling pass |
|
1134 |
// of Global Code Motion. |
|
1135 |
return msfpt; |
|
1136 |
} |
|
1137 |
||
1138 |
//---------------------------match_tree---------------------------------------- |
|
1139 |
// Match a Ideal Node DAG - turn it into a tree; Label & Reduce. Used as part |
|
1140 |
// of the whole-sale conversion from Ideal to Mach Nodes. Also used for |
|
1141 |
// making GotoNodes while building the CFG and in init_spill_mask() to identify |
|
1142 |
// a Load's result RegMask for memoization in idealreg2regmask[] |
|
1143 |
MachNode *Matcher::match_tree( const Node *n ) { |
|
1144 |
assert( n->Opcode() != Op_Phi, "cannot match" ); |
|
1145 |
assert( !n->is_block_start(), "cannot match" ); |
|
1146 |
// Set the mark for all locally allocated State objects. |
|
1147 |
// When this call returns, the _states_arena arena will be reset |
|
1148 |
// freeing all State objects. |
|
1149 |
ResourceMark rm( &_states_arena ); |
|
1150 |
||
1151 |
LabelRootDepth = 0; |
|
1152 |
||
1153 |
// StoreNodes require their Memory input to match any LoadNodes |
|
1154 |
Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ; |
|
1155 |
||
1156 |
// State object for root node of match tree |
|
1157 |
// Allocate it on _states_arena - stack allocation can cause stack overflow. |
|
1158 |
State *s = new (&_states_arena) State; |
|
1159 |
s->_kids[0] = NULL; |
|
1160 |
s->_kids[1] = NULL; |
|
1161 |
s->_leaf = (Node*)n; |
|
1162 |
// Label the input tree, allocating labels from top-level arena |
|
1163 |
Label_Root( n, s, n->in(0), mem ); |
|
1164 |
if (C->failing()) return NULL; |
|
1165 |
||
1166 |
// The minimum cost match for the whole tree is found at the root State |
|
1167 |
uint mincost = max_juint; |
|
1168 |
uint cost = max_juint; |
|
1169 |
uint i; |
|
1170 |
for( i = 0; i < NUM_OPERANDS; i++ ) { |
|
1171 |
if( s->valid(i) && // valid entry and |
|
1172 |
s->_cost[i] < cost && // low cost and |
|
1173 |
s->_rule[i] >= NUM_OPERANDS ) // not an operand |
|
1174 |
cost = s->_cost[mincost=i]; |
|
1175 |
} |
|
1176 |
if (mincost == max_juint) { |
|
1177 |
#ifndef PRODUCT |
|
1178 |
tty->print("No matching rule for:"); |
|
1179 |
s->dump(); |
|
1180 |
#endif |
|
1181 |
Matcher::soft_match_failure(); |
|
1182 |
return NULL; |
|
1183 |
} |
|
1184 |
// Reduce input tree based upon the state labels to machine Nodes |
|
1185 |
MachNode *m = ReduceInst( s, s->_rule[mincost], mem ); |
|
1186 |
#ifdef ASSERT |
|
1187 |
_old2new_map.map(n->_idx, m); |
|
1188 |
#endif |
|
1189 |
||
1190 |
// Add any Matcher-ignored edges |
|
1191 |
uint cnt = n->req(); |
|
1192 |
uint start = 1; |
|
1193 |
if( mem != (Node*)1 ) start = MemNode::Memory+1; |
|
1194 |
if( n->Opcode() == Op_AddP ) { |
|
1195 |
assert( mem == (Node*)1, "" ); |
|
1196 |
start = AddPNode::Base+1; |
|
1197 |
} |
|
1198 |
for( i = start; i < cnt; i++ ) { |
|
1199 |
if( !n->match_edge(i) ) { |
|
1200 |
if( i < m->req() ) |
|
1201 |
m->ins_req( i, n->in(i) ); |
|
1202 |
else |
|
1203 |
m->add_req( n->in(i) ); |
|
1204 |
} |
|
1205 |
} |
|
1206 |
||
1207 |
return m; |
|
1208 |
} |
|
1209 |
||
1210 |
||
1211 |
//------------------------------match_into_reg--------------------------------- |
|
1212 |
// Choose to either match this Node in a register or part of the current |
|
1213 |
// match tree. Return true for requiring a register and false for matching |
|
1214 |
// as part of the current match tree. |
|
1215 |
static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool shared ) { |
|
1216 |
||
1217 |
const Type *t = m->bottom_type(); |
|
1218 |
||
1219 |
if( t->singleton() ) { |
|
1220 |
// Never force constants into registers. Allow them to match as |
|
1221 |
// constants or registers. Copies of the same value will share |
|
1222 |
// the same register. See find_shared_constant. |
|
1223 |
return false; |
|
1224 |
} else { // Not a constant |
|
1225 |
// Stop recursion if they have different Controls. |
|
1226 |
// Slot 0 of constants is not really a Control. |
|
1227 |
if( control && m->in(0) && control != m->in(0) ) { |
|
1228 |
||
1229 |
// Actually, we can live with the most conservative control we |
|
1230 |
// find, if it post-dominates the others. This allows us to |
|
1231 |
// pick up load/op/store trees where the load can float a little |
|
1232 |
// above the store. |
|
1233 |
Node *x = control; |
|
1234 |
const uint max_scan = 6; // Arbitrary scan cutoff |
|
1235 |
uint j; |
|
1236 |
for( j=0; j<max_scan; j++ ) { |
|
1237 |
if( x->is_Region() ) // Bail out at merge points |
|
1238 |
return true; |
|
1239 |
x = x->in(0); |
|
1240 |
if( x == m->in(0) ) // Does 'control' post-dominate |
|
1241 |
break; // m->in(0)? If so, we can use it |
|
1242 |
} |
|
1243 |
if( j == max_scan ) // No post-domination before scan end? |
|
1244 |
return true; // Then break the match tree up |
|
1245 |
} |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1246 |
|
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1247 |
if (m->Opcode() == Op_DecodeN && m->outcnt() == 2) { |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1248 |
// These are commonly used in address expressions and can |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1249 |
// efficiently fold into them in some cases but because they are |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1250 |
// consumed by AddP they commonly have two users. |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1251 |
if (m->raw_out(0) == m->raw_out(1) && m->raw_out(0)->Opcode() == Op_AddP) return false; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1252 |
} |
1 | 1253 |
} |
1254 |
||
1255 |
// Not forceably cloning. If shared, put it into a register. |
|
1256 |
return shared; |
|
1257 |
} |
|
1258 |
||
1259 |
||
1260 |
//------------------------------Instruction Selection-------------------------- |
|
1261 |
// Label method walks a "tree" of nodes, using the ADLC generated DFA to match |
|
1262 |
// ideal nodes to machine instructions. Trees are delimited by shared Nodes, |
|
1263 |
// things the Matcher does not match (e.g., Memory), and things with different |
|
1264 |
// Controls (hence forced into different blocks). We pass in the Control |
|
1265 |
// selected for this entire State tree. |
|
1266 |
||
1267 |
// The Matcher works on Trees, but an Intel add-to-memory requires a DAG: the |
|
1268 |
// Store and the Load must have identical Memories (as well as identical |
|
1269 |
// pointers). Since the Matcher does not have anything for Memory (and |
|
1270 |
// does not handle DAGs), I have to match the Memory input myself. If the |
|
1271 |
// Tree root is a Store, I require all Loads to have the identical memory. |
|
1272 |
Node *Matcher::Label_Root( const Node *n, State *svec, Node *control, const Node *mem){ |
|
1273 |
// Since Label_Root is a recursive function, its possible that we might run |
|
1274 |
// out of stack space. See bugs 6272980 & 6227033 for more info. |
|
1275 |
LabelRootDepth++; |
|
1276 |
if (LabelRootDepth > MaxLabelRootDepth) { |
|
1277 |
C->record_method_not_compilable_all_tiers("Out of stack space, increase MaxLabelRootDepth"); |
|
1278 |
return NULL; |
|
1279 |
} |
|
1280 |
uint care = 0; // Edges matcher cares about |
|
1281 |
uint cnt = n->req(); |
|
1282 |
uint i = 0; |
|
1283 |
||
1284 |
// Examine children for memory state |
|
1285 |
// Can only subsume a child into your match-tree if that child's memory state |
|
1286 |
// is not modified along the path to another input. |
|
1287 |
// It is unsafe even if the other inputs are separate roots. |
|
1288 |
Node *input_mem = NULL; |
|
1289 |
for( i = 1; i < cnt; i++ ) { |
|
1290 |
if( !n->match_edge(i) ) continue; |
|
1291 |
Node *m = n->in(i); // Get ith input |
|
1292 |
assert( m, "expect non-null children" ); |
|
1293 |
if( m->is_Load() ) { |
|
1294 |
if( input_mem == NULL ) { |
|
1295 |
input_mem = m->in(MemNode::Memory); |
|
1296 |
} else if( input_mem != m->in(MemNode::Memory) ) { |
|
1297 |
input_mem = NodeSentinel; |
|
1298 |
} |
|
1299 |
} |
|
1300 |
} |
|
1301 |
||
1302 |
for( i = 1; i < cnt; i++ ){// For my children |
|
1303 |
if( !n->match_edge(i) ) continue; |
|
1304 |
Node *m = n->in(i); // Get ith input |
|
1305 |
// Allocate states out of a private arena |
|
1306 |
State *s = new (&_states_arena) State; |
|
1307 |
svec->_kids[care++] = s; |
|
1308 |
assert( care <= 2, "binary only for now" ); |
|
1309 |
||
1310 |
// Recursively label the State tree. |
|
1311 |
s->_kids[0] = NULL; |
|
1312 |
s->_kids[1] = NULL; |
|
1313 |
s->_leaf = m; |
|
1314 |
||
1315 |
// Check for leaves of the State Tree; things that cannot be a part of |
|
1316 |
// the current tree. If it finds any, that value is matched as a |
|
1317 |
// register operand. If not, then the normal matching is used. |
|
1318 |
if( match_into_reg(n, m, control, i, is_shared(m)) || |
|
1319 |
// |
|
1320 |
// Stop recursion if this is LoadNode and the root of this tree is a |
|
1321 |
// StoreNode and the load & store have different memories. |
|
1322 |
((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) || |
|
1323 |
// Can NOT include the match of a subtree when its memory state |
|
1324 |
// is used by any of the other subtrees |
|
1325 |
(input_mem == NodeSentinel) ) { |
|
1326 |
#ifndef PRODUCT |
|
1327 |
// Print when we exclude matching due to different memory states at input-loads |
|
1328 |
if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel) |
|
1329 |
&& !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) { |
|
1330 |
tty->print_cr("invalid input_mem"); |
|
1331 |
} |
|
1332 |
#endif |
|
1333 |
// Switch to a register-only opcode; this value must be in a register |
|
1334 |
// and cannot be subsumed as part of a larger instruction. |
|
1335 |
s->DFA( m->ideal_reg(), m ); |
|
1336 |
||
1337 |
} else { |
|
1338 |
// If match tree has no control and we do, adopt it for entire tree |
|
1339 |
if( control == NULL && m->in(0) != NULL && m->req() > 1 ) |
|
1340 |
control = m->in(0); // Pick up control |
|
1341 |
// Else match as a normal part of the match tree. |
|
1342 |
control = Label_Root(m,s,control,mem); |
|
1343 |
if (C->failing()) return NULL; |
|
1344 |
} |
|
1345 |
} |
|
1346 |
||
1347 |
||
1348 |
// Call DFA to match this node, and return |
|
1349 |
svec->DFA( n->Opcode(), n ); |
|
1350 |
||
1351 |
#ifdef ASSERT |
|
1352 |
uint x; |
|
1353 |
for( x = 0; x < _LAST_MACH_OPER; x++ ) |
|
1354 |
if( svec->valid(x) ) |
|
1355 |
break; |
|
1356 |
||
1357 |
if (x >= _LAST_MACH_OPER) { |
|
1358 |
n->dump(); |
|
1359 |
svec->dump(); |
|
1360 |
assert( false, "bad AD file" ); |
|
1361 |
} |
|
1362 |
#endif |
|
1363 |
return control; |
|
1364 |
} |
|
1365 |
||
1366 |
||
1367 |
// Con nodes reduced using the same rule can share their MachNode |
|
1368 |
// which reduces the number of copies of a constant in the final |
|
1369 |
// program. The register allocator is free to split uses later to |
|
1370 |
// split live ranges. |
|
1371 |
MachNode* Matcher::find_shared_constant(Node* leaf, uint rule) { |
|
1372 |
if (!leaf->is_Con()) return NULL; |
|
1373 |
||
1374 |
// See if this Con has already been reduced using this rule. |
|
1375 |
if (_shared_constants.Size() <= leaf->_idx) return NULL; |
|
1376 |
MachNode* last = (MachNode*)_shared_constants.at(leaf->_idx); |
|
1377 |
if (last != NULL && rule == last->rule()) { |
|
1378 |
// Get the new space root. |
|
1379 |
Node* xroot = new_node(C->root()); |
|
1380 |
if (xroot == NULL) { |
|
1381 |
// This shouldn't happen give the order of matching. |
|
1382 |
return NULL; |
|
1383 |
} |
|
1384 |
||
1385 |
// Shared constants need to have their control be root so they |
|
1386 |
// can be scheduled properly. |
|
1387 |
Node* control = last->in(0); |
|
1388 |
if (control != xroot) { |
|
1389 |
if (control == NULL || control == C->root()) { |
|
1390 |
last->set_req(0, xroot); |
|
1391 |
} else { |
|
1392 |
assert(false, "unexpected control"); |
|
1393 |
return NULL; |
|
1394 |
} |
|
1395 |
} |
|
1396 |
return last; |
|
1397 |
} |
|
1398 |
return NULL; |
|
1399 |
} |
|
1400 |
||
1401 |
||
1402 |
//------------------------------ReduceInst------------------------------------- |
|
1403 |
// Reduce a State tree (with given Control) into a tree of MachNodes. |
|
1404 |
// This routine (and it's cohort ReduceOper) convert Ideal Nodes into |
|
1405 |
// complicated machine Nodes. Each MachNode covers some tree of Ideal Nodes. |
|
1406 |
// Each MachNode has a number of complicated MachOper operands; each |
|
1407 |
// MachOper also covers a further tree of Ideal Nodes. |
|
1408 |
||
1409 |
// The root of the Ideal match tree is always an instruction, so we enter |
|
1410 |
// the recursion here. After building the MachNode, we need to recurse |
|
1411 |
// the tree checking for these cases: |
|
1412 |
// (1) Child is an instruction - |
|
1413 |
// Build the instruction (recursively), add it as an edge. |
|
1414 |
// Build a simple operand (register) to hold the result of the instruction. |
|
1415 |
// (2) Child is an interior part of an instruction - |
|
1416 |
// Skip over it (do nothing) |
|
1417 |
// (3) Child is the start of a operand - |
|
1418 |
// Build the operand, place it inside the instruction |
|
1419 |
// Call ReduceOper. |
|
1420 |
MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) { |
|
1421 |
assert( rule >= NUM_OPERANDS, "called with operand rule" ); |
|
1422 |
||
1423 |
MachNode* shared_con = find_shared_constant(s->_leaf, rule); |
|
1424 |
if (shared_con != NULL) { |
|
1425 |
return shared_con; |
|
1426 |
} |
|
1427 |
||
1428 |
// Build the object to represent this state & prepare for recursive calls |
|
1429 |
MachNode *mach = s->MachNodeGenerator( rule, C ); |
|
1430 |
mach->_opnds[0] = s->MachOperGenerator( _reduceOp[rule], C ); |
|
1431 |
assert( mach->_opnds[0] != NULL, "Missing result operand" ); |
|
1432 |
Node *leaf = s->_leaf; |
|
1433 |
// Check for instruction or instruction chain rule |
|
1434 |
if( rule >= _END_INST_CHAIN_RULE || rule < _BEGIN_INST_CHAIN_RULE ) { |
|
1435 |
// Instruction |
|
1436 |
mach->add_req( leaf->in(0) ); // Set initial control |
|
1437 |
// Reduce interior of complex instruction |
|
1438 |
ReduceInst_Interior( s, rule, mem, mach, 1 ); |
|
1439 |
} else { |
|
1440 |
// Instruction chain rules are data-dependent on their inputs |
|
1441 |
mach->add_req(0); // Set initial control to none |
|
1442 |
ReduceInst_Chain_Rule( s, rule, mem, mach ); |
|
1443 |
} |
|
1444 |
||
1445 |
// If a Memory was used, insert a Memory edge |
|
1446 |
if( mem != (Node*)1 ) |
|
1447 |
mach->ins_req(MemNode::Memory,mem); |
|
1448 |
||
1449 |
// If the _leaf is an AddP, insert the base edge |
|
1450 |
if( leaf->Opcode() == Op_AddP ) |
|
1451 |
mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base)); |
|
1452 |
||
1453 |
uint num_proj = _proj_list.size(); |
|
1454 |
||
1455 |
// Perform any 1-to-many expansions required |
|
1456 |
MachNode *ex = mach->Expand(s,_proj_list); |
|
1457 |
if( ex != mach ) { |
|
1458 |
assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match"); |
|
1459 |
if( ex->in(1)->is_Con() ) |
|
1460 |
ex->in(1)->set_req(0, C->root()); |
|
1461 |
// Remove old node from the graph |
|
1462 |
for( uint i=0; i<mach->req(); i++ ) { |
|
1463 |
mach->set_req(i,NULL); |
|
1464 |
} |
|
1465 |
} |
|
1466 |
||
1467 |
// PhaseChaitin::fixup_spills will sometimes generate spill code |
|
1468 |
// via the matcher. By the time, nodes have been wired into the CFG, |
|
1469 |
// and any further nodes generated by expand rules will be left hanging |
|
1470 |
// in space, and will not get emitted as output code. Catch this. |
|
1471 |
// Also, catch any new register allocation constraints ("projections") |
|
1472 |
// generated belatedly during spill code generation. |
|
1473 |
if (_allocation_started) { |
|
1474 |
guarantee(ex == mach, "no expand rules during spill generation"); |
|
1475 |
guarantee(_proj_list.size() == num_proj, "no allocation during spill generation"); |
|
1476 |
} |
|
1477 |
||
1478 |
if (leaf->is_Con()) { |
|
1479 |
// Record the con for sharing |
|
1480 |
_shared_constants.map(leaf->_idx, ex); |
|
1481 |
} |
|
1482 |
||
1483 |
return ex; |
|
1484 |
} |
|
1485 |
||
1486 |
void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) { |
|
1487 |
// 'op' is what I am expecting to receive |
|
1488 |
int op = _leftOp[rule]; |
|
1489 |
// Operand type to catch childs result |
|
1490 |
// This is what my child will give me. |
|
1491 |
int opnd_class_instance = s->_rule[op]; |
|
1492 |
// Choose between operand class or not. |
|
1493 |
// This is what I will recieve. |
|
1494 |
int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op; |
|
1495 |
// New rule for child. Chase operand classes to get the actual rule. |
|
1496 |
int newrule = s->_rule[catch_op]; |
|
1497 |
||
1498 |
if( newrule < NUM_OPERANDS ) { |
|
1499 |
// Chain from operand or operand class, may be output of shared node |
|
1500 |
assert( 0 <= opnd_class_instance && opnd_class_instance < NUM_OPERANDS, |
|
1501 |
"Bad AD file: Instruction chain rule must chain from operand"); |
|
1502 |
// Insert operand into array of operands for this instruction |
|
1503 |
mach->_opnds[1] = s->MachOperGenerator( opnd_class_instance, C ); |
|
1504 |
||
1505 |
ReduceOper( s, newrule, mem, mach ); |
|
1506 |
} else { |
|
1507 |
// Chain from the result of an instruction |
|
1508 |
assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand"); |
|
1509 |
mach->_opnds[1] = s->MachOperGenerator( _reduceOp[catch_op], C ); |
|
1510 |
Node *mem1 = (Node*)1; |
|
1511 |
mach->add_req( ReduceInst(s, newrule, mem1) ); |
|
1512 |
} |
|
1513 |
return; |
|
1514 |
} |
|
1515 |
||
1516 |
||
1517 |
uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) { |
|
1518 |
if( s->_leaf->is_Load() ) { |
|
1519 |
Node *mem2 = s->_leaf->in(MemNode::Memory); |
|
1520 |
assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" ); |
|
1521 |
mem = mem2; |
|
1522 |
} |
|
1523 |
if( s->_leaf->in(0) != NULL && s->_leaf->req() > 1) { |
|
1524 |
if( mach->in(0) == NULL ) |
|
1525 |
mach->set_req(0, s->_leaf->in(0)); |
|
1526 |
} |
|
1527 |
||
1528 |
// Now recursively walk the state tree & add operand list. |
|
1529 |
for( uint i=0; i<2; i++ ) { // binary tree |
|
1530 |
State *newstate = s->_kids[i]; |
|
1531 |
if( newstate == NULL ) break; // Might only have 1 child |
|
1532 |
// 'op' is what I am expecting to receive |
|
1533 |
int op; |
|
1534 |
if( i == 0 ) { |
|
1535 |
op = _leftOp[rule]; |
|
1536 |
} else { |
|
1537 |
op = _rightOp[rule]; |
|
1538 |
} |
|
1539 |
// Operand type to catch childs result |
|
1540 |
// This is what my child will give me. |
|
1541 |
int opnd_class_instance = newstate->_rule[op]; |
|
1542 |
// Choose between operand class or not. |
|
1543 |
// This is what I will receive. |
|
1544 |
int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op; |
|
1545 |
// New rule for child. Chase operand classes to get the actual rule. |
|
1546 |
int newrule = newstate->_rule[catch_op]; |
|
1547 |
||
1548 |
if( newrule < NUM_OPERANDS ) { // Operand/operandClass or internalOp/instruction? |
|
1549 |
// Operand/operandClass |
|
1550 |
// Insert operand into array of operands for this instruction |
|
1551 |
mach->_opnds[num_opnds++] = newstate->MachOperGenerator( opnd_class_instance, C ); |
|
1552 |
ReduceOper( newstate, newrule, mem, mach ); |
|
1553 |
||
1554 |
} else { // Child is internal operand or new instruction |
|
1555 |
if( newrule < _LAST_MACH_OPER ) { // internal operand or instruction? |
|
1556 |
// internal operand --> call ReduceInst_Interior |
|
1557 |
// Interior of complex instruction. Do nothing but recurse. |
|
1558 |
num_opnds = ReduceInst_Interior( newstate, newrule, mem, mach, num_opnds ); |
|
1559 |
} else { |
|
1560 |
// instruction --> call build operand( ) to catch result |
|
1561 |
// --> ReduceInst( newrule ) |
|
1562 |
mach->_opnds[num_opnds++] = s->MachOperGenerator( _reduceOp[catch_op], C ); |
|
1563 |
Node *mem1 = (Node*)1; |
|
1564 |
mach->add_req( ReduceInst( newstate, newrule, mem1 ) ); |
|
1565 |
} |
|
1566 |
} |
|
1567 |
assert( mach->_opnds[num_opnds-1], "" ); |
|
1568 |
} |
|
1569 |
return num_opnds; |
|
1570 |
} |
|
1571 |
||
1572 |
// This routine walks the interior of possible complex operands. |
|
1573 |
// At each point we check our children in the match tree: |
|
1574 |
// (1) No children - |
|
1575 |
// We are a leaf; add _leaf field as an input to the MachNode |
|
1576 |
// (2) Child is an internal operand - |
|
1577 |
// Skip over it ( do nothing ) |
|
1578 |
// (3) Child is an instruction - |
|
1579 |
// Call ReduceInst recursively and |
|
1580 |
// and instruction as an input to the MachNode |
|
1581 |
void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) { |
|
1582 |
assert( rule < _LAST_MACH_OPER, "called with operand rule" ); |
|
1583 |
State *kid = s->_kids[0]; |
|
1584 |
assert( kid == NULL || s->_leaf->in(0) == NULL, "internal operands have no control" ); |
|
1585 |
||
1586 |
// Leaf? And not subsumed? |
|
1587 |
if( kid == NULL && !_swallowed[rule] ) { |
|
1588 |
mach->add_req( s->_leaf ); // Add leaf pointer |
|
1589 |
return; // Bail out |
|
1590 |
} |
|
1591 |
||
1592 |
if( s->_leaf->is_Load() ) { |
|
1593 |
assert( mem == (Node*)1, "multiple Memories being matched at once?" ); |
|
1594 |
mem = s->_leaf->in(MemNode::Memory); |
|
1595 |
} |
|
1596 |
if( s->_leaf->in(0) && s->_leaf->req() > 1) { |
|
1597 |
if( !mach->in(0) ) |
|
1598 |
mach->set_req(0,s->_leaf->in(0)); |
|
1599 |
else { |
|
1600 |
assert( s->_leaf->in(0) == mach->in(0), "same instruction, differing controls?" ); |
|
1601 |
} |
|
1602 |
} |
|
1603 |
||
1604 |
for( uint i=0; kid != NULL && i<2; kid = s->_kids[1], i++ ) { // binary tree |
|
1605 |
int newrule; |
|
1606 |
if( i == 0 ) |
|
1607 |
newrule = kid->_rule[_leftOp[rule]]; |
|
1608 |
else |
|
1609 |
newrule = kid->_rule[_rightOp[rule]]; |
|
1610 |
||
1611 |
if( newrule < _LAST_MACH_OPER ) { // Operand or instruction? |
|
1612 |
// Internal operand; recurse but do nothing else |
|
1613 |
ReduceOper( kid, newrule, mem, mach ); |
|
1614 |
||
1615 |
} else { // Child is a new instruction |
|
1616 |
// Reduce the instruction, and add a direct pointer from this |
|
1617 |
// machine instruction to the newly reduced one. |
|
1618 |
Node *mem1 = (Node*)1; |
|
1619 |
mach->add_req( ReduceInst( kid, newrule, mem1 ) ); |
|
1620 |
} |
|
1621 |
} |
|
1622 |
} |
|
1623 |
||
1624 |
||
1625 |
// ------------------------------------------------------------------------- |
|
1626 |
// Java-Java calling convention |
|
1627 |
// (what you use when Java calls Java) |
|
1628 |
||
1629 |
//------------------------------find_receiver---------------------------------- |
|
1630 |
// For a given signature, return the OptoReg for parameter 0. |
|
1631 |
OptoReg::Name Matcher::find_receiver( bool is_outgoing ) { |
|
1632 |
VMRegPair regs; |
|
1633 |
BasicType sig_bt = T_OBJECT; |
|
1634 |
calling_convention(&sig_bt, ®s, 1, is_outgoing); |
|
1635 |
// Return argument 0 register. In the LP64 build pointers |
|
1636 |
// take 2 registers, but the VM wants only the 'main' name. |
|
1637 |
return OptoReg::as_OptoReg(regs.first()); |
|
1638 |
} |
|
1639 |
||
1640 |
// A method-klass-holder may be passed in the inline_cache_reg |
|
1641 |
// and then expanded into the inline_cache_reg and a method_oop register |
|
1642 |
// defined in ad_<arch>.cpp |
|
1643 |
||
1644 |
||
1645 |
//------------------------------find_shared------------------------------------ |
|
1646 |
// Set bits if Node is shared or otherwise a root |
|
1647 |
void Matcher::find_shared( Node *n ) { |
|
1648 |
// Allocate stack of size C->unique() * 2 to avoid frequent realloc |
|
1649 |
MStack mstack(C->unique() * 2); |
|
1650 |
mstack.push(n, Visit); // Don't need to pre-visit root node |
|
1651 |
while (mstack.is_nonempty()) { |
|
1652 |
n = mstack.node(); // Leave node on stack |
|
1653 |
Node_State nstate = mstack.state(); |
|
1654 |
if (nstate == Pre_Visit) { |
|
1655 |
if (is_visited(n)) { // Visited already? |
|
1656 |
// Node is shared and has no reason to clone. Flag it as shared. |
|
1657 |
// This causes it to match into a register for the sharing. |
|
1658 |
set_shared(n); // Flag as shared and |
|
1659 |
mstack.pop(); // remove node from stack |
|
1660 |
continue; |
|
1661 |
} |
|
1662 |
nstate = Visit; // Not already visited; so visit now |
|
1663 |
} |
|
1664 |
if (nstate == Visit) { |
|
1665 |
mstack.set_state(Post_Visit); |
|
1666 |
set_visited(n); // Flag as visited now |
|
1667 |
bool mem_op = false; |
|
1668 |
||
1669 |
switch( n->Opcode() ) { // Handle some opcodes special |
|
1670 |
case Op_Phi: // Treat Phis as shared roots |
|
1671 |
case Op_Parm: |
|
1672 |
case Op_Proj: // All handled specially during matching |
|
236
9a04268c8eea
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
1
diff
changeset
|
1673 |
case Op_SafePointScalarObject: |
1 | 1674 |
set_shared(n); |
1675 |
set_dontcare(n); |
|
1676 |
break; |
|
1677 |
case Op_If: |
|
1678 |
case Op_CountedLoopEnd: |
|
1679 |
mstack.set_state(Alt_Post_Visit); // Alternative way |
|
1680 |
// Convert (If (Bool (CmpX A B))) into (If (Bool) (CmpX A B)). Helps |
|
1681 |
// with matching cmp/branch in 1 instruction. The Matcher needs the |
|
1682 |
// Bool and CmpX side-by-side, because it can only get at constants |
|
1683 |
// that are at the leaves of Match trees, and the Bool's condition acts |
|
1684 |
// as a constant here. |
|
1685 |
mstack.push(n->in(1), Visit); // Clone the Bool |
|
1686 |
mstack.push(n->in(0), Pre_Visit); // Visit control input |
|
1687 |
continue; // while (mstack.is_nonempty()) |
|
1688 |
case Op_ConvI2D: // These forms efficiently match with a prior |
|
1689 |
case Op_ConvI2F: // Load but not a following Store |
|
1690 |
if( n->in(1)->is_Load() && // Prior load |
|
1691 |
n->outcnt() == 1 && // Not already shared |
|
1692 |
n->unique_out()->is_Store() ) // Following store |
|
1693 |
set_shared(n); // Force it to be a root |
|
1694 |
break; |
|
1695 |
case Op_ReverseBytesI: |
|
1696 |
case Op_ReverseBytesL: |
|
1697 |
if( n->in(1)->is_Load() && // Prior load |
|
1698 |
n->outcnt() == 1 ) // Not already shared |
|
1699 |
set_shared(n); // Force it to be a root |
|
1700 |
break; |
|
1701 |
case Op_BoxLock: // Cant match until we get stack-regs in ADLC |
|
1702 |
case Op_IfFalse: |
|
1703 |
case Op_IfTrue: |
|
1704 |
case Op_MachProj: |
|
1705 |
case Op_MergeMem: |
|
1706 |
case Op_Catch: |
|
1707 |
case Op_CatchProj: |
|
1708 |
case Op_CProj: |
|
1709 |
case Op_JumpProj: |
|
1710 |
case Op_JProj: |
|
1711 |
case Op_NeverBranch: |
|
1712 |
set_dontcare(n); |
|
1713 |
break; |
|
1714 |
case Op_Jump: |
|
1715 |
mstack.push(n->in(1), Visit); // Switch Value |
|
1716 |
mstack.push(n->in(0), Pre_Visit); // Visit Control input |
|
1717 |
continue; // while (mstack.is_nonempty()) |
|
1718 |
case Op_StrComp: |
|
1719 |
set_shared(n); // Force result into register (it will be anyways) |
|
1720 |
break; |
|
1721 |
case Op_ConP: { // Convert pointers above the centerline to NUL |
|
1722 |
TypeNode *tn = n->as_Type(); // Constants derive from type nodes |
|
1723 |
const TypePtr* tp = tn->type()->is_ptr(); |
|
1724 |
if (tp->_ptr == TypePtr::AnyNull) { |
|
1725 |
tn->set_type(TypePtr::NULL_PTR); |
|
1726 |
} |
|
1727 |
break; |
|
1728 |
} |
|
1729 |
case Op_Binary: // These are introduced in the Post_Visit state. |
|
1730 |
ShouldNotReachHere(); |
|
1731 |
break; |
|
1732 |
case Op_StoreB: // Do match these, despite no ideal reg |
|
1733 |
case Op_StoreC: |
|
1734 |
case Op_StoreCM: |
|
1735 |
case Op_StoreD: |
|
1736 |
case Op_StoreF: |
|
1737 |
case Op_StoreI: |
|
1738 |
case Op_StoreL: |
|
1739 |
case Op_StoreP: |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1740 |
case Op_StoreN: |
1 | 1741 |
case Op_Store16B: |
1742 |
case Op_Store8B: |
|
1743 |
case Op_Store4B: |
|
1744 |
case Op_Store8C: |
|
1745 |
case Op_Store4C: |
|
1746 |
case Op_Store2C: |
|
1747 |
case Op_Store4I: |
|
1748 |
case Op_Store2I: |
|
1749 |
case Op_Store2L: |
|
1750 |
case Op_Store4F: |
|
1751 |
case Op_Store2F: |
|
1752 |
case Op_Store2D: |
|
1753 |
case Op_ClearArray: |
|
1754 |
case Op_SafePoint: |
|
1755 |
mem_op = true; |
|
1756 |
break; |
|
1757 |
case Op_LoadB: |
|
1758 |
case Op_LoadC: |
|
1759 |
case Op_LoadD: |
|
1760 |
case Op_LoadF: |
|
1761 |
case Op_LoadI: |
|
1762 |
case Op_LoadKlass: |
|
1763 |
case Op_LoadL: |
|
1764 |
case Op_LoadS: |
|
1765 |
case Op_LoadP: |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1766 |
case Op_LoadN: |
1 | 1767 |
case Op_LoadRange: |
1768 |
case Op_LoadD_unaligned: |
|
1769 |
case Op_LoadL_unaligned: |
|
1770 |
case Op_Load16B: |
|
1771 |
case Op_Load8B: |
|
1772 |
case Op_Load4B: |
|
1773 |
case Op_Load4C: |
|
1774 |
case Op_Load2C: |
|
1775 |
case Op_Load8C: |
|
1776 |
case Op_Load8S: |
|
1777 |
case Op_Load4S: |
|
1778 |
case Op_Load2S: |
|
1779 |
case Op_Load4I: |
|
1780 |
case Op_Load2I: |
|
1781 |
case Op_Load2L: |
|
1782 |
case Op_Load4F: |
|
1783 |
case Op_Load2F: |
|
1784 |
case Op_Load2D: |
|
1785 |
mem_op = true; |
|
1786 |
// Must be root of match tree due to prior load conflict |
|
1787 |
if( C->subsume_loads() == false ) { |
|
1788 |
set_shared(n); |
|
1789 |
} |
|
1790 |
// Fall into default case |
|
1791 |
default: |
|
1792 |
if( !n->ideal_reg() ) |
|
1793 |
set_dontcare(n); // Unmatchable Nodes |
|
1794 |
} // end_switch |
|
1795 |
||
1796 |
for(int i = n->req() - 1; i >= 0; --i) { // For my children |
|
1797 |
Node *m = n->in(i); // Get ith input |
|
1798 |
if (m == NULL) continue; // Ignore NULLs |
|
1799 |
uint mop = m->Opcode(); |
|
1800 |
||
1801 |
// Must clone all producers of flags, or we will not match correctly. |
|
1802 |
// Suppose a compare setting int-flags is shared (e.g., a switch-tree) |
|
1803 |
// then it will match into an ideal Op_RegFlags. Alas, the fp-flags |
|
1804 |
// are also there, so we may match a float-branch to int-flags and |
|
1805 |
// expect the allocator to haul the flags from the int-side to the |
|
1806 |
// fp-side. No can do. |
|
1807 |
if( _must_clone[mop] ) { |
|
1808 |
mstack.push(m, Visit); |
|
1809 |
continue; // for(int i = ...) |
|
1810 |
} |
|
1811 |
||
1812 |
// Clone addressing expressions as they are "free" in most instructions |
|
1813 |
if( mem_op && i == MemNode::Address && mop == Op_AddP ) { |
|
1814 |
Node *off = m->in(AddPNode::Offset); |
|
1815 |
if( off->is_Con() ) { |
|
1816 |
set_visited(m); // Flag as visited now |
|
1817 |
Node *adr = m->in(AddPNode::Address); |
|
1818 |
||
1819 |
// Intel, ARM and friends can handle 2 adds in addressing mode |
|
1820 |
if( clone_shift_expressions && adr->Opcode() == Op_AddP && |
|
1821 |
// AtomicAdd is not an addressing expression. |
|
1822 |
// Cheap to find it by looking for screwy base. |
|
1823 |
!adr->in(AddPNode::Base)->is_top() ) { |
|
1824 |
set_visited(adr); // Flag as visited now |
|
1825 |
Node *shift = adr->in(AddPNode::Offset); |
|
1826 |
// Check for shift by small constant as well |
|
1827 |
if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && |
|
1828 |
shift->in(2)->get_int() <= 3 ) { |
|
1829 |
set_visited(shift); // Flag as visited now |
|
1830 |
mstack.push(shift->in(2), Visit); |
|
1831 |
#ifdef _LP64 |
|
1832 |
// Allow Matcher to match the rule which bypass |
|
1833 |
// ConvI2L operation for an array index on LP64 |
|
1834 |
// if the index value is positive. |
|
1835 |
if( shift->in(1)->Opcode() == Op_ConvI2L && |
|
1836 |
shift->in(1)->as_Type()->type()->is_long()->_lo >= 0 ) { |
|
1837 |
set_visited(shift->in(1)); // Flag as visited now |
|
1838 |
mstack.push(shift->in(1)->in(1), Pre_Visit); |
|
1839 |
} else |
|
1840 |
#endif |
|
1841 |
mstack.push(shift->in(1), Pre_Visit); |
|
1842 |
} else { |
|
1843 |
mstack.push(shift, Pre_Visit); |
|
1844 |
} |
|
1845 |
mstack.push(adr->in(AddPNode::Address), Pre_Visit); |
|
1846 |
mstack.push(adr->in(AddPNode::Base), Pre_Visit); |
|
1847 |
} else { // Sparc, Alpha, PPC and friends |
|
1848 |
mstack.push(adr, Pre_Visit); |
|
1849 |
} |
|
1850 |
||
1851 |
// Clone X+offset as it also folds into most addressing expressions |
|
1852 |
mstack.push(off, Visit); |
|
1853 |
mstack.push(m->in(AddPNode::Base), Pre_Visit); |
|
1854 |
continue; // for(int i = ...) |
|
1855 |
} // if( off->is_Con() ) |
|
1856 |
} // if( mem_op && |
|
1857 |
mstack.push(m, Pre_Visit); |
|
1858 |
} // for(int i = ...) |
|
1859 |
} |
|
1860 |
else if (nstate == Alt_Post_Visit) { |
|
1861 |
mstack.pop(); // Remove node from stack |
|
1862 |
// We cannot remove the Cmp input from the Bool here, as the Bool may be |
|
1863 |
// shared and all users of the Bool need to move the Cmp in parallel. |
|
1864 |
// This leaves both the Bool and the If pointing at the Cmp. To |
|
1865 |
// prevent the Matcher from trying to Match the Cmp along both paths |
|
1866 |
// BoolNode::match_edge always returns a zero. |
|
1867 |
||
1868 |
// We reorder the Op_If in a pre-order manner, so we can visit without |
|
1869 |
// accidently sharing the Cmp (the Bool and the If make 2 users). |
|
1870 |
n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool |
|
1871 |
} |
|
1872 |
else if (nstate == Post_Visit) { |
|
1873 |
mstack.pop(); // Remove node from stack |
|
1874 |
||
1875 |
// Now hack a few special opcodes |
|
1876 |
switch( n->Opcode() ) { // Handle some opcodes special |
|
1877 |
case Op_StorePConditional: |
|
1878 |
case Op_StoreLConditional: |
|
1879 |
case Op_CompareAndSwapI: |
|
1880 |
case Op_CompareAndSwapL: |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1881 |
case Op_CompareAndSwapP: |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1882 |
case Op_CompareAndSwapN: { // Convert trinary to binary-tree |
1 | 1883 |
Node *newval = n->in(MemNode::ValueIn ); |
1884 |
Node *oldval = n->in(LoadStoreNode::ExpectedIn); |
|
1885 |
Node *pair = new (C, 3) BinaryNode( oldval, newval ); |
|
1886 |
n->set_req(MemNode::ValueIn,pair); |
|
1887 |
n->del_req(LoadStoreNode::ExpectedIn); |
|
1888 |
break; |
|
1889 |
} |
|
1890 |
case Op_CMoveD: // Convert trinary to binary-tree |
|
1891 |
case Op_CMoveF: |
|
1892 |
case Op_CMoveI: |
|
1893 |
case Op_CMoveL: |
|
1894 |
case Op_CMoveP: { |
|
1895 |
// Restructure into a binary tree for Matching. It's possible that |
|
1896 |
// we could move this code up next to the graph reshaping for IfNodes |
|
1897 |
// or vice-versa, but I do not want to debug this for Ladybird. |
|
1898 |
// 10/2/2000 CNC. |
|
1899 |
Node *pair1 = new (C, 3) BinaryNode(n->in(1),n->in(1)->in(1)); |
|
1900 |
n->set_req(1,pair1); |
|
1901 |
Node *pair2 = new (C, 3) BinaryNode(n->in(2),n->in(3)); |
|
1902 |
n->set_req(2,pair2); |
|
1903 |
n->del_req(3); |
|
1904 |
break; |
|
1905 |
} |
|
1906 |
default: |
|
1907 |
break; |
|
1908 |
} |
|
1909 |
} |
|
1910 |
else { |
|
1911 |
ShouldNotReachHere(); |
|
1912 |
} |
|
1913 |
} // end of while (mstack.is_nonempty()) |
|
1914 |
} |
|
1915 |
||
1916 |
#ifdef ASSERT |
|
1917 |
// machine-independent root to machine-dependent root |
|
1918 |
void Matcher::dump_old2new_map() { |
|
1919 |
_old2new_map.dump(); |
|
1920 |
} |
|
1921 |
#endif |
|
1922 |
||
1923 |
//---------------------------collect_null_checks------------------------------- |
|
1924 |
// Find null checks in the ideal graph; write a machine-specific node for |
|
1925 |
// it. Used by later implicit-null-check handling. Actually collects |
|
1926 |
// either an IfTrue or IfFalse for the common NOT-null path, AND the ideal |
|
1927 |
// value being tested. |
|
1928 |
void Matcher::collect_null_checks( Node *proj ) { |
|
1929 |
Node *iff = proj->in(0); |
|
1930 |
if( iff->Opcode() == Op_If ) { |
|
1931 |
// During matching If's have Bool & Cmp side-by-side |
|
1932 |
BoolNode *b = iff->in(1)->as_Bool(); |
|
1933 |
Node *cmp = iff->in(2); |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1934 |
int opc = cmp->Opcode(); |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1935 |
if (opc != Op_CmpP && opc != Op_CmpN) return; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1936 |
|
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1937 |
const Type* ct = cmp->in(2)->bottom_type(); |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1938 |
if (ct == TypePtr::NULL_PTR || |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1939 |
(opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) { |
1 | 1940 |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1941 |
if( proj->Opcode() == Op_IfTrue ) { |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1942 |
extern int all_null_checks_found; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1943 |
all_null_checks_found++; |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1944 |
if( b->_test._test == BoolTest::ne ) { |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1945 |
_null_check_tests.push(proj); |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1946 |
_null_check_tests.push(cmp->in(1)); |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1947 |
} |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1948 |
} else { |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1949 |
assert( proj->Opcode() == Op_IfFalse, "" ); |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1950 |
if( b->_test._test == BoolTest::eq ) { |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1951 |
_null_check_tests.push(proj); |
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
1952 |
_null_check_tests.push(cmp->in(1)); |
1 | 1953 |
} |
1954 |
} |
|
1955 |
} |
|
1956 |
} |
|
1957 |
} |
|
1958 |
||
1959 |
//---------------------------validate_null_checks------------------------------ |
|
1960 |
// Its possible that the value being NULL checked is not the root of a match |
|
1961 |
// tree. If so, I cannot use the value in an implicit null check. |
|
1962 |
void Matcher::validate_null_checks( ) { |
|
1963 |
uint cnt = _null_check_tests.size(); |
|
1964 |
for( uint i=0; i < cnt; i+=2 ) { |
|
1965 |
Node *test = _null_check_tests[i]; |
|
1966 |
Node *val = _null_check_tests[i+1]; |
|
1967 |
if (has_new_node(val)) { |
|
1968 |
// Is a match-tree root, so replace with the matched value |
|
1969 |
_null_check_tests.map(i+1, new_node(val)); |
|
1970 |
} else { |
|
1971 |
// Yank from candidate list |
|
1972 |
_null_check_tests.map(i+1,_null_check_tests[--cnt]); |
|
1973 |
_null_check_tests.map(i,_null_check_tests[--cnt]); |
|
1974 |
_null_check_tests.pop(); |
|
1975 |
_null_check_tests.pop(); |
|
1976 |
i-=2; |
|
1977 |
} |
|
1978 |
} |
|
1979 |
} |
|
1980 |
||
1981 |
||
1982 |
// Used by the DFA in dfa_sparc.cpp. Check for a prior FastLock |
|
1983 |
// acting as an Acquire and thus we don't need an Acquire here. We |
|
1984 |
// retain the Node to act as a compiler ordering barrier. |
|
1985 |
bool Matcher::prior_fast_lock( const Node *acq ) { |
|
1986 |
Node *r = acq->in(0); |
|
1987 |
if( !r->is_Region() || r->req() <= 1 ) return false; |
|
1988 |
Node *proj = r->in(1); |
|
1989 |
if( !proj->is_Proj() ) return false; |
|
1990 |
Node *call = proj->in(0); |
|
1991 |
if( !call->is_Call() || call->as_Call()->entry_point() != OptoRuntime::complete_monitor_locking_Java() ) |
|
1992 |
return false; |
|
1993 |
||
1994 |
return true; |
|
1995 |
} |
|
1996 |
||
1997 |
// Used by the DFA in dfa_sparc.cpp. Check for a following FastUnLock |
|
1998 |
// acting as a Release and thus we don't need a Release here. We |
|
1999 |
// retain the Node to act as a compiler ordering barrier. |
|
2000 |
bool Matcher::post_fast_unlock( const Node *rel ) { |
|
2001 |
Compile *C = Compile::current(); |
|
2002 |
assert( rel->Opcode() == Op_MemBarRelease, "" ); |
|
2003 |
const MemBarReleaseNode *mem = (const MemBarReleaseNode*)rel; |
|
2004 |
DUIterator_Fast imax, i = mem->fast_outs(imax); |
|
2005 |
Node *ctrl = NULL; |
|
2006 |
while( true ) { |
|
2007 |
ctrl = mem->fast_out(i); // Throw out-of-bounds if proj not found |
|
2008 |
assert( ctrl->is_Proj(), "only projections here" ); |
|
2009 |
ProjNode *proj = (ProjNode*)ctrl; |
|
2010 |
if( proj->_con == TypeFunc::Control && |
|
2011 |
!C->node_arena()->contains(ctrl) ) // Unmatched old-space only |
|
2012 |
break; |
|
2013 |
i++; |
|
2014 |
} |
|
2015 |
Node *iff = NULL; |
|
2016 |
for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) { |
|
2017 |
Node *x = ctrl->fast_out(j); |
|
2018 |
if( x->is_If() && x->req() > 1 && |
|
2019 |
!C->node_arena()->contains(x) ) { // Unmatched old-space only |
|
2020 |
iff = x; |
|
2021 |
break; |
|
2022 |
} |
|
2023 |
} |
|
2024 |
if( !iff ) return false; |
|
2025 |
Node *bol = iff->in(1); |
|
2026 |
// The iff might be some random subclass of If or bol might be Con-Top |
|
2027 |
if (!bol->is_Bool()) return false; |
|
2028 |
assert( bol->req() > 1, "" ); |
|
2029 |
return (bol->in(1)->Opcode() == Op_FastUnlock); |
|
2030 |
} |
|
2031 |
||
2032 |
// Used by the DFA in dfa_xxx.cpp. Check for a following barrier or |
|
2033 |
// atomic instruction acting as a store_load barrier without any |
|
2034 |
// intervening volatile load, and thus we don't need a barrier here. |
|
2035 |
// We retain the Node to act as a compiler ordering barrier. |
|
2036 |
bool Matcher::post_store_load_barrier(const Node *vmb) { |
|
2037 |
Compile *C = Compile::current(); |
|
2038 |
assert( vmb->is_MemBar(), "" ); |
|
2039 |
assert( vmb->Opcode() != Op_MemBarAcquire, "" ); |
|
2040 |
const MemBarNode *mem = (const MemBarNode*)vmb; |
|
2041 |
||
2042 |
// Get the Proj node, ctrl, that can be used to iterate forward |
|
2043 |
Node *ctrl = NULL; |
|
2044 |
DUIterator_Fast imax, i = mem->fast_outs(imax); |
|
2045 |
while( true ) { |
|
2046 |
ctrl = mem->fast_out(i); // Throw out-of-bounds if proj not found |
|
2047 |
assert( ctrl->is_Proj(), "only projections here" ); |
|
2048 |
ProjNode *proj = (ProjNode*)ctrl; |
|
2049 |
if( proj->_con == TypeFunc::Control && |
|
2050 |
!C->node_arena()->contains(ctrl) ) // Unmatched old-space only |
|
2051 |
break; |
|
2052 |
i++; |
|
2053 |
} |
|
2054 |
||
2055 |
for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) { |
|
2056 |
Node *x = ctrl->fast_out(j); |
|
2057 |
int xop = x->Opcode(); |
|
2058 |
||
2059 |
// We don't need current barrier if we see another or a lock |
|
2060 |
// before seeing volatile load. |
|
2061 |
// |
|
2062 |
// Op_Fastunlock previously appeared in the Op_* list below. |
|
2063 |
// With the advent of 1-0 lock operations we're no longer guaranteed |
|
2064 |
// that a monitor exit operation contains a serializing instruction. |
|
2065 |
||
2066 |
if (xop == Op_MemBarVolatile || |
|
2067 |
xop == Op_FastLock || |
|
2068 |
xop == Op_CompareAndSwapL || |
|
2069 |
xop == Op_CompareAndSwapP || |
|
360
21d113ecbf6a
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
236
diff
changeset
|
2070 |
xop == Op_CompareAndSwapN || |
1 | 2071 |
xop == Op_CompareAndSwapI) |
2072 |
return true; |
|
2073 |
||
2074 |
if (x->is_MemBar()) { |
|
2075 |
// We must retain this membar if there is an upcoming volatile |
|
2076 |
// load, which will be preceded by acquire membar. |
|
2077 |
if (xop == Op_MemBarAcquire) |
|
2078 |
return false; |
|
2079 |
// For other kinds of barriers, check by pretending we |
|
2080 |
// are them, and seeing if we can be removed. |
|
2081 |
else |
|
2082 |
return post_store_load_barrier((const MemBarNode*)x); |
|
2083 |
} |
|
2084 |
||
2085 |
// Delicate code to detect case of an upcoming fastlock block |
|
2086 |
if( x->is_If() && x->req() > 1 && |
|
2087 |
!C->node_arena()->contains(x) ) { // Unmatched old-space only |
|
2088 |
Node *iff = x; |
|
2089 |
Node *bol = iff->in(1); |
|
2090 |
// The iff might be some random subclass of If or bol might be Con-Top |
|
2091 |
if (!bol->is_Bool()) return false; |
|
2092 |
assert( bol->req() > 1, "" ); |
|
2093 |
return (bol->in(1)->Opcode() == Op_FastUnlock); |
|
2094 |
} |
|
2095 |
// probably not necessary to check for these |
|
2096 |
if (x->is_Call() || x->is_SafePoint() || x->is_block_proj()) |
|
2097 |
return false; |
|
2098 |
} |
|
2099 |
return false; |
|
2100 |
} |
|
2101 |
||
2102 |
//============================================================================= |
|
2103 |
//---------------------------State--------------------------------------------- |
|
2104 |
State::State(void) { |
|
2105 |
#ifdef ASSERT |
|
2106 |
_id = 0; |
|
2107 |
_kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe); |
|
2108 |
_leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d); |
|
2109 |
//memset(_cost, -1, sizeof(_cost)); |
|
2110 |
//memset(_rule, -1, sizeof(_rule)); |
|
2111 |
#endif |
|
2112 |
memset(_valid, 0, sizeof(_valid)); |
|
2113 |
} |
|
2114 |
||
2115 |
#ifdef ASSERT |
|
2116 |
State::~State() { |
|
2117 |
_id = 99; |
|
2118 |
_kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe); |
|
2119 |
_leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d); |
|
2120 |
memset(_cost, -3, sizeof(_cost)); |
|
2121 |
memset(_rule, -3, sizeof(_rule)); |
|
2122 |
} |
|
2123 |
#endif |
|
2124 |
||
2125 |
#ifndef PRODUCT |
|
2126 |
//---------------------------dump---------------------------------------------- |
|
2127 |
void State::dump() { |
|
2128 |
tty->print("\n"); |
|
2129 |
dump(0); |
|
2130 |
} |
|
2131 |
||
2132 |
void State::dump(int depth) { |
|
2133 |
for( int j = 0; j < depth; j++ ) |
|
2134 |
tty->print(" "); |
|
2135 |
tty->print("--N: "); |
|
2136 |
_leaf->dump(); |
|
2137 |
uint i; |
|
2138 |
for( i = 0; i < _LAST_MACH_OPER; i++ ) |
|
2139 |
// Check for valid entry |
|
2140 |
if( valid(i) ) { |
|
2141 |
for( int j = 0; j < depth; j++ ) |
|
2142 |
tty->print(" "); |
|
2143 |
assert(_cost[i] != max_juint, "cost must be a valid value"); |
|
2144 |
assert(_rule[i] < _last_Mach_Node, "rule[i] must be valid rule"); |
|
2145 |
tty->print_cr("%s %d %s", |
|
2146 |
ruleName[i], _cost[i], ruleName[_rule[i]] ); |
|
2147 |
} |
|
2148 |
tty->print_cr(""); |
|
2149 |
||
2150 |
for( i=0; i<2; i++ ) |
|
2151 |
if( _kids[i] ) |
|
2152 |
_kids[i]->dump(depth+1); |
|
2153 |
} |
|
2154 |
#endif |