]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===- GVN.cpp - Eliminate redundant values and loads ---------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This pass performs global value numbering to eliminate fully redundant | |
11 | // instructions. It also performs simple dead load elimination. | |
12 | // | |
13 | // Note that this pass does the value numbering itself; it does not use the | |
14 | // ValueNumbering analysis passes. | |
15 | // | |
16 | //===----------------------------------------------------------------------===// | |
17 | ||
223e47cc | 18 | #include "llvm/Transforms/Scalar.h" |
223e47cc LB |
19 | #include "llvm/ADT/DenseMap.h" |
20 | #include "llvm/ADT/DepthFirstIterator.h" | |
21 | #include "llvm/ADT/Hashing.h" | |
1a4d82fc | 22 | #include "llvm/ADT/MapVector.h" |
85aaf69f | 23 | #include "llvm/ADT/PostOrderIterator.h" |
1a4d82fc | 24 | #include "llvm/ADT/SetVector.h" |
223e47cc LB |
25 | #include "llvm/ADT/SmallPtrSet.h" |
26 | #include "llvm/ADT/Statistic.h" | |
27 | #include "llvm/Analysis/AliasAnalysis.h" | |
85aaf69f | 28 | #include "llvm/Analysis/AssumptionCache.h" |
1a4d82fc | 29 | #include "llvm/Analysis/CFG.h" |
223e47cc | 30 | #include "llvm/Analysis/ConstantFolding.h" |
223e47cc LB |
31 | #include "llvm/Analysis/InstructionSimplify.h" |
32 | #include "llvm/Analysis/Loads.h" | |
33 | #include "llvm/Analysis/MemoryBuiltins.h" | |
34 | #include "llvm/Analysis/MemoryDependenceAnalysis.h" | |
35 | #include "llvm/Analysis/PHITransAddr.h" | |
36 | #include "llvm/Analysis/ValueTracking.h" | |
970d7e83 | 37 | #include "llvm/IR/DataLayout.h" |
1a4d82fc | 38 | #include "llvm/IR/Dominators.h" |
970d7e83 LB |
39 | #include "llvm/IR/GlobalVariable.h" |
40 | #include "llvm/IR/IRBuilder.h" | |
41 | #include "llvm/IR/IntrinsicInst.h" | |
42 | #include "llvm/IR/LLVMContext.h" | |
43 | #include "llvm/IR/Metadata.h" | |
1a4d82fc | 44 | #include "llvm/IR/PatternMatch.h" |
223e47cc LB |
45 | #include "llvm/Support/Allocator.h" |
46 | #include "llvm/Support/CommandLine.h" | |
47 | #include "llvm/Support/Debug.h" | |
223e47cc LB |
48 | #include "llvm/Target/TargetLibraryInfo.h" |
49 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |
1a4d82fc | 50 | #include "llvm/Transforms/Utils/Local.h" |
223e47cc | 51 | #include "llvm/Transforms/Utils/SSAUpdater.h" |
1a4d82fc | 52 | #include <vector> |
223e47cc LB |
53 | using namespace llvm; |
54 | using namespace PatternMatch; | |
55 | ||
1a4d82fc JJ |
56 | #define DEBUG_TYPE "gvn" |
57 | ||
223e47cc LB |
58 | STATISTIC(NumGVNInstr, "Number of instructions deleted"); |
59 | STATISTIC(NumGVNLoad, "Number of loads deleted"); | |
60 | STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); | |
61 | STATISTIC(NumGVNBlocks, "Number of blocks merged"); | |
62 | STATISTIC(NumGVNSimpl, "Number of instructions simplified"); | |
63 | STATISTIC(NumGVNEqProp, "Number of equalities propagated"); | |
64 | STATISTIC(NumPRELoad, "Number of loads PRE'd"); | |
65 | ||
66 | static cl::opt<bool> EnablePRE("enable-pre", | |
67 | cl::init(true), cl::Hidden); | |
68 | static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); | |
69 | ||
70 | // Maximum allowed recursion depth. | |
71 | static cl::opt<uint32_t> | |
72 | MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, | |
73 | cl::desc("Max recurse depth (default = 1000)")); | |
74 | ||
75 | //===----------------------------------------------------------------------===// | |
76 | // ValueTable Class | |
77 | //===----------------------------------------------------------------------===// | |
78 | ||
79 | /// This class holds the mapping between values and value numbers. It is used | |
80 | /// as an efficient mechanism to determine the expression-wise equivalence of | |
81 | /// two values. | |
82 | namespace { | |
83 | struct Expression { | |
84 | uint32_t opcode; | |
85 | Type *type; | |
86 | SmallVector<uint32_t, 4> varargs; | |
87 | ||
88 | Expression(uint32_t o = ~2U) : opcode(o) { } | |
89 | ||
90 | bool operator==(const Expression &other) const { | |
91 | if (opcode != other.opcode) | |
92 | return false; | |
93 | if (opcode == ~0U || opcode == ~1U) | |
94 | return true; | |
95 | if (type != other.type) | |
96 | return false; | |
97 | if (varargs != other.varargs) | |
98 | return false; | |
99 | return true; | |
100 | } | |
101 | ||
102 | friend hash_code hash_value(const Expression &Value) { | |
103 | return hash_combine(Value.opcode, Value.type, | |
104 | hash_combine_range(Value.varargs.begin(), | |
105 | Value.varargs.end())); | |
106 | } | |
107 | }; | |
108 | ||
109 | class ValueTable { | |
110 | DenseMap<Value*, uint32_t> valueNumbering; | |
111 | DenseMap<Expression, uint32_t> expressionNumbering; | |
112 | AliasAnalysis *AA; | |
113 | MemoryDependenceAnalysis *MD; | |
114 | DominatorTree *DT; | |
115 | ||
116 | uint32_t nextValueNumber; | |
117 | ||
118 | Expression create_expression(Instruction* I); | |
119 | Expression create_cmp_expression(unsigned Opcode, | |
120 | CmpInst::Predicate Predicate, | |
121 | Value *LHS, Value *RHS); | |
122 | Expression create_extractvalue_expression(ExtractValueInst* EI); | |
123 | uint32_t lookup_or_add_call(CallInst* C); | |
124 | public: | |
125 | ValueTable() : nextValueNumber(1) { } | |
126 | uint32_t lookup_or_add(Value *V); | |
127 | uint32_t lookup(Value *V) const; | |
128 | uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred, | |
129 | Value *LHS, Value *RHS); | |
130 | void add(Value *V, uint32_t num); | |
131 | void clear(); | |
132 | void erase(Value *v); | |
133 | void setAliasAnalysis(AliasAnalysis* A) { AA = A; } | |
134 | AliasAnalysis *getAliasAnalysis() const { return AA; } | |
135 | void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } | |
136 | void setDomTree(DominatorTree* D) { DT = D; } | |
137 | uint32_t getNextUnusedValueNumber() { return nextValueNumber; } | |
138 | void verifyRemoved(const Value *) const; | |
139 | }; | |
140 | } | |
141 | ||
142 | namespace llvm { | |
143 | template <> struct DenseMapInfo<Expression> { | |
144 | static inline Expression getEmptyKey() { | |
145 | return ~0U; | |
146 | } | |
147 | ||
148 | static inline Expression getTombstoneKey() { | |
149 | return ~1U; | |
150 | } | |
151 | ||
152 | static unsigned getHashValue(const Expression e) { | |
153 | using llvm::hash_value; | |
154 | return static_cast<unsigned>(hash_value(e)); | |
155 | } | |
156 | static bool isEqual(const Expression &LHS, const Expression &RHS) { | |
157 | return LHS == RHS; | |
158 | } | |
159 | }; | |
160 | ||
161 | } | |
162 | ||
163 | //===----------------------------------------------------------------------===// | |
164 | // ValueTable Internal Functions | |
165 | //===----------------------------------------------------------------------===// | |
166 | ||
167 | Expression ValueTable::create_expression(Instruction *I) { | |
168 | Expression e; | |
169 | e.type = I->getType(); | |
170 | e.opcode = I->getOpcode(); | |
171 | for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); | |
172 | OI != OE; ++OI) | |
173 | e.varargs.push_back(lookup_or_add(*OI)); | |
174 | if (I->isCommutative()) { | |
175 | // Ensure that commutative instructions that only differ by a permutation | |
176 | // of their operands get the same value number by sorting the operand value | |
177 | // numbers. Since all commutative instructions have two operands it is more | |
178 | // efficient to sort by hand rather than using, say, std::sort. | |
179 | assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); | |
180 | if (e.varargs[0] > e.varargs[1]) | |
181 | std::swap(e.varargs[0], e.varargs[1]); | |
182 | } | |
183 | ||
184 | if (CmpInst *C = dyn_cast<CmpInst>(I)) { | |
185 | // Sort the operand value numbers so x<y and y>x get the same value number. | |
186 | CmpInst::Predicate Predicate = C->getPredicate(); | |
187 | if (e.varargs[0] > e.varargs[1]) { | |
188 | std::swap(e.varargs[0], e.varargs[1]); | |
189 | Predicate = CmpInst::getSwappedPredicate(Predicate); | |
190 | } | |
191 | e.opcode = (C->getOpcode() << 8) | Predicate; | |
192 | } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { | |
193 | for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); | |
194 | II != IE; ++II) | |
195 | e.varargs.push_back(*II); | |
196 | } | |
197 | ||
198 | return e; | |
199 | } | |
200 | ||
201 | Expression ValueTable::create_cmp_expression(unsigned Opcode, | |
202 | CmpInst::Predicate Predicate, | |
203 | Value *LHS, Value *RHS) { | |
204 | assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && | |
205 | "Not a comparison!"); | |
206 | Expression e; | |
207 | e.type = CmpInst::makeCmpResultType(LHS->getType()); | |
208 | e.varargs.push_back(lookup_or_add(LHS)); | |
209 | e.varargs.push_back(lookup_or_add(RHS)); | |
210 | ||
211 | // Sort the operand value numbers so x<y and y>x get the same value number. | |
212 | if (e.varargs[0] > e.varargs[1]) { | |
213 | std::swap(e.varargs[0], e.varargs[1]); | |
214 | Predicate = CmpInst::getSwappedPredicate(Predicate); | |
215 | } | |
216 | e.opcode = (Opcode << 8) | Predicate; | |
217 | return e; | |
218 | } | |
219 | ||
220 | Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { | |
1a4d82fc | 221 | assert(EI && "Not an ExtractValueInst?"); |
223e47cc LB |
222 | Expression e; |
223 | e.type = EI->getType(); | |
224 | e.opcode = 0; | |
225 | ||
226 | IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); | |
1a4d82fc | 227 | if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { |
223e47cc LB |
228 | // EI might be an extract from one of our recognised intrinsics. If it |
229 | // is we'll synthesize a semantically equivalent expression instead on | |
230 | // an extract value expression. | |
231 | switch (I->getIntrinsicID()) { | |
232 | case Intrinsic::sadd_with_overflow: | |
233 | case Intrinsic::uadd_with_overflow: | |
234 | e.opcode = Instruction::Add; | |
235 | break; | |
236 | case Intrinsic::ssub_with_overflow: | |
237 | case Intrinsic::usub_with_overflow: | |
238 | e.opcode = Instruction::Sub; | |
239 | break; | |
240 | case Intrinsic::smul_with_overflow: | |
241 | case Intrinsic::umul_with_overflow: | |
242 | e.opcode = Instruction::Mul; | |
243 | break; | |
244 | default: | |
245 | break; | |
246 | } | |
247 | ||
248 | if (e.opcode != 0) { | |
249 | // Intrinsic recognized. Grab its args to finish building the expression. | |
250 | assert(I->getNumArgOperands() == 2 && | |
251 | "Expect two args for recognised intrinsics."); | |
252 | e.varargs.push_back(lookup_or_add(I->getArgOperand(0))); | |
253 | e.varargs.push_back(lookup_or_add(I->getArgOperand(1))); | |
254 | return e; | |
255 | } | |
256 | } | |
257 | ||
258 | // Not a recognised intrinsic. Fall back to producing an extract value | |
259 | // expression. | |
260 | e.opcode = EI->getOpcode(); | |
261 | for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); | |
262 | OI != OE; ++OI) | |
263 | e.varargs.push_back(lookup_or_add(*OI)); | |
264 | ||
265 | for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); | |
266 | II != IE; ++II) | |
267 | e.varargs.push_back(*II); | |
268 | ||
269 | return e; | |
270 | } | |
271 | ||
272 | //===----------------------------------------------------------------------===// | |
273 | // ValueTable External Functions | |
274 | //===----------------------------------------------------------------------===// | |
275 | ||
276 | /// add - Insert a value into the table with a specified value number. | |
277 | void ValueTable::add(Value *V, uint32_t num) { | |
278 | valueNumbering.insert(std::make_pair(V, num)); | |
279 | } | |
280 | ||
281 | uint32_t ValueTable::lookup_or_add_call(CallInst *C) { | |
282 | if (AA->doesNotAccessMemory(C)) { | |
283 | Expression exp = create_expression(C); | |
284 | uint32_t &e = expressionNumbering[exp]; | |
285 | if (!e) e = nextValueNumber++; | |
286 | valueNumbering[C] = e; | |
287 | return e; | |
288 | } else if (AA->onlyReadsMemory(C)) { | |
289 | Expression exp = create_expression(C); | |
290 | uint32_t &e = expressionNumbering[exp]; | |
291 | if (!e) { | |
292 | e = nextValueNumber++; | |
293 | valueNumbering[C] = e; | |
294 | return e; | |
295 | } | |
296 | if (!MD) { | |
297 | e = nextValueNumber++; | |
298 | valueNumbering[C] = e; | |
299 | return e; | |
300 | } | |
301 | ||
302 | MemDepResult local_dep = MD->getDependency(C); | |
303 | ||
304 | if (!local_dep.isDef() && !local_dep.isNonLocal()) { | |
305 | valueNumbering[C] = nextValueNumber; | |
306 | return nextValueNumber++; | |
307 | } | |
308 | ||
309 | if (local_dep.isDef()) { | |
310 | CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); | |
311 | ||
312 | if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { | |
313 | valueNumbering[C] = nextValueNumber; | |
314 | return nextValueNumber++; | |
315 | } | |
316 | ||
317 | for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { | |
318 | uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); | |
319 | uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); | |
320 | if (c_vn != cd_vn) { | |
321 | valueNumbering[C] = nextValueNumber; | |
322 | return nextValueNumber++; | |
323 | } | |
324 | } | |
325 | ||
326 | uint32_t v = lookup_or_add(local_cdep); | |
327 | valueNumbering[C] = v; | |
328 | return v; | |
329 | } | |
330 | ||
331 | // Non-local case. | |
332 | const MemoryDependenceAnalysis::NonLocalDepInfo &deps = | |
333 | MD->getNonLocalCallDependency(CallSite(C)); | |
334 | // FIXME: Move the checking logic to MemDep! | |
1a4d82fc | 335 | CallInst* cdep = nullptr; |
223e47cc LB |
336 | |
337 | // Check to see if we have a single dominating call instruction that is | |
338 | // identical to C. | |
339 | for (unsigned i = 0, e = deps.size(); i != e; ++i) { | |
340 | const NonLocalDepEntry *I = &deps[i]; | |
341 | if (I->getResult().isNonLocal()) | |
342 | continue; | |
343 | ||
344 | // We don't handle non-definitions. If we already have a call, reject | |
345 | // instruction dependencies. | |
1a4d82fc JJ |
346 | if (!I->getResult().isDef() || cdep != nullptr) { |
347 | cdep = nullptr; | |
223e47cc LB |
348 | break; |
349 | } | |
350 | ||
351 | CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); | |
352 | // FIXME: All duplicated with non-local case. | |
353 | if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ | |
354 | cdep = NonLocalDepCall; | |
355 | continue; | |
356 | } | |
357 | ||
1a4d82fc | 358 | cdep = nullptr; |
223e47cc LB |
359 | break; |
360 | } | |
361 | ||
362 | if (!cdep) { | |
363 | valueNumbering[C] = nextValueNumber; | |
364 | return nextValueNumber++; | |
365 | } | |
366 | ||
367 | if (cdep->getNumArgOperands() != C->getNumArgOperands()) { | |
368 | valueNumbering[C] = nextValueNumber; | |
369 | return nextValueNumber++; | |
370 | } | |
371 | for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { | |
372 | uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); | |
373 | uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); | |
374 | if (c_vn != cd_vn) { | |
375 | valueNumbering[C] = nextValueNumber; | |
376 | return nextValueNumber++; | |
377 | } | |
378 | } | |
379 | ||
380 | uint32_t v = lookup_or_add(cdep); | |
381 | valueNumbering[C] = v; | |
382 | return v; | |
383 | ||
384 | } else { | |
385 | valueNumbering[C] = nextValueNumber; | |
386 | return nextValueNumber++; | |
387 | } | |
388 | } | |
389 | ||
390 | /// lookup_or_add - Returns the value number for the specified value, assigning | |
391 | /// it a new number if it did not have one before. | |
392 | uint32_t ValueTable::lookup_or_add(Value *V) { | |
393 | DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); | |
394 | if (VI != valueNumbering.end()) | |
395 | return VI->second; | |
396 | ||
397 | if (!isa<Instruction>(V)) { | |
398 | valueNumbering[V] = nextValueNumber; | |
399 | return nextValueNumber++; | |
400 | } | |
401 | ||
402 | Instruction* I = cast<Instruction>(V); | |
403 | Expression exp; | |
404 | switch (I->getOpcode()) { | |
405 | case Instruction::Call: | |
406 | return lookup_or_add_call(cast<CallInst>(I)); | |
407 | case Instruction::Add: | |
408 | case Instruction::FAdd: | |
409 | case Instruction::Sub: | |
410 | case Instruction::FSub: | |
411 | case Instruction::Mul: | |
412 | case Instruction::FMul: | |
413 | case Instruction::UDiv: | |
414 | case Instruction::SDiv: | |
415 | case Instruction::FDiv: | |
416 | case Instruction::URem: | |
417 | case Instruction::SRem: | |
418 | case Instruction::FRem: | |
419 | case Instruction::Shl: | |
420 | case Instruction::LShr: | |
421 | case Instruction::AShr: | |
422 | case Instruction::And: | |
423 | case Instruction::Or: | |
424 | case Instruction::Xor: | |
425 | case Instruction::ICmp: | |
426 | case Instruction::FCmp: | |
427 | case Instruction::Trunc: | |
428 | case Instruction::ZExt: | |
429 | case Instruction::SExt: | |
430 | case Instruction::FPToUI: | |
431 | case Instruction::FPToSI: | |
432 | case Instruction::UIToFP: | |
433 | case Instruction::SIToFP: | |
434 | case Instruction::FPTrunc: | |
435 | case Instruction::FPExt: | |
436 | case Instruction::PtrToInt: | |
437 | case Instruction::IntToPtr: | |
438 | case Instruction::BitCast: | |
439 | case Instruction::Select: | |
440 | case Instruction::ExtractElement: | |
441 | case Instruction::InsertElement: | |
442 | case Instruction::ShuffleVector: | |
443 | case Instruction::InsertValue: | |
444 | case Instruction::GetElementPtr: | |
445 | exp = create_expression(I); | |
446 | break; | |
447 | case Instruction::ExtractValue: | |
448 | exp = create_extractvalue_expression(cast<ExtractValueInst>(I)); | |
449 | break; | |
450 | default: | |
451 | valueNumbering[V] = nextValueNumber; | |
452 | return nextValueNumber++; | |
453 | } | |
454 | ||
455 | uint32_t& e = expressionNumbering[exp]; | |
456 | if (!e) e = nextValueNumber++; | |
457 | valueNumbering[V] = e; | |
458 | return e; | |
459 | } | |
460 | ||
461 | /// lookup - Returns the value number of the specified value. Fails if | |
462 | /// the value has not yet been numbered. | |
463 | uint32_t ValueTable::lookup(Value *V) const { | |
464 | DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); | |
465 | assert(VI != valueNumbering.end() && "Value not numbered?"); | |
466 | return VI->second; | |
467 | } | |
468 | ||
469 | /// lookup_or_add_cmp - Returns the value number of the given comparison, | |
470 | /// assigning it a new number if it did not have one before. Useful when | |
471 | /// we deduced the result of a comparison, but don't immediately have an | |
472 | /// instruction realizing that comparison to hand. | |
473 | uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode, | |
474 | CmpInst::Predicate Predicate, | |
475 | Value *LHS, Value *RHS) { | |
476 | Expression exp = create_cmp_expression(Opcode, Predicate, LHS, RHS); | |
477 | uint32_t& e = expressionNumbering[exp]; | |
478 | if (!e) e = nextValueNumber++; | |
479 | return e; | |
480 | } | |
481 | ||
482 | /// clear - Remove all entries from the ValueTable. | |
483 | void ValueTable::clear() { | |
484 | valueNumbering.clear(); | |
485 | expressionNumbering.clear(); | |
486 | nextValueNumber = 1; | |
487 | } | |
488 | ||
489 | /// erase - Remove a value from the value numbering. | |
490 | void ValueTable::erase(Value *V) { | |
491 | valueNumbering.erase(V); | |
492 | } | |
493 | ||
494 | /// verifyRemoved - Verify that the value is removed from all internal data | |
495 | /// structures. | |
496 | void ValueTable::verifyRemoved(const Value *V) const { | |
497 | for (DenseMap<Value*, uint32_t>::const_iterator | |
498 | I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { | |
499 | assert(I->first != V && "Inst still occurs in value numbering map!"); | |
500 | } | |
501 | } | |
502 | ||
503 | //===----------------------------------------------------------------------===// | |
504 | // GVN Pass | |
505 | //===----------------------------------------------------------------------===// | |
506 | ||
507 | namespace { | |
1a4d82fc JJ |
508 | class GVN; |
509 | struct AvailableValueInBlock { | |
510 | /// BB - The basic block in question. | |
511 | BasicBlock *BB; | |
512 | enum ValType { | |
513 | SimpleVal, // A simple offsetted value that is accessed. | |
514 | LoadVal, // A value produced by a load. | |
515 | MemIntrin, // A memory intrinsic which is loaded from. | |
516 | UndefVal // A UndefValue representing a value from dead block (which | |
517 | // is not yet physically removed from the CFG). | |
518 | }; | |
519 | ||
520 | /// V - The value that is live out of the block. | |
521 | PointerIntPair<Value *, 2, ValType> Val; | |
522 | ||
523 | /// Offset - The byte offset in Val that is interesting for the load query. | |
524 | unsigned Offset; | |
525 | ||
526 | static AvailableValueInBlock get(BasicBlock *BB, Value *V, | |
527 | unsigned Offset = 0) { | |
528 | AvailableValueInBlock Res; | |
529 | Res.BB = BB; | |
530 | Res.Val.setPointer(V); | |
531 | Res.Val.setInt(SimpleVal); | |
532 | Res.Offset = Offset; | |
533 | return Res; | |
534 | } | |
535 | ||
536 | static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, | |
537 | unsigned Offset = 0) { | |
538 | AvailableValueInBlock Res; | |
539 | Res.BB = BB; | |
540 | Res.Val.setPointer(MI); | |
541 | Res.Val.setInt(MemIntrin); | |
542 | Res.Offset = Offset; | |
543 | return Res; | |
544 | } | |
545 | ||
546 | static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, | |
547 | unsigned Offset = 0) { | |
548 | AvailableValueInBlock Res; | |
549 | Res.BB = BB; | |
550 | Res.Val.setPointer(LI); | |
551 | Res.Val.setInt(LoadVal); | |
552 | Res.Offset = Offset; | |
553 | return Res; | |
554 | } | |
555 | ||
556 | static AvailableValueInBlock getUndef(BasicBlock *BB) { | |
557 | AvailableValueInBlock Res; | |
558 | Res.BB = BB; | |
559 | Res.Val.setPointer(nullptr); | |
560 | Res.Val.setInt(UndefVal); | |
561 | Res.Offset = 0; | |
562 | return Res; | |
563 | } | |
564 | ||
565 | bool isSimpleValue() const { return Val.getInt() == SimpleVal; } | |
566 | bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } | |
567 | bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } | |
568 | bool isUndefValue() const { return Val.getInt() == UndefVal; } | |
569 | ||
570 | Value *getSimpleValue() const { | |
571 | assert(isSimpleValue() && "Wrong accessor"); | |
572 | return Val.getPointer(); | |
573 | } | |
574 | ||
575 | LoadInst *getCoercedLoadValue() const { | |
576 | assert(isCoercedLoadValue() && "Wrong accessor"); | |
577 | return cast<LoadInst>(Val.getPointer()); | |
578 | } | |
579 | ||
580 | MemIntrinsic *getMemIntrinValue() const { | |
581 | assert(isMemIntrinValue() && "Wrong accessor"); | |
582 | return cast<MemIntrinsic>(Val.getPointer()); | |
583 | } | |
584 | ||
585 | /// MaterializeAdjustedValue - Emit code into this block to adjust the value | |
586 | /// defined here to the specified type. This handles various coercion cases. | |
587 | Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const; | |
588 | }; | |
223e47cc LB |
589 | |
590 | class GVN : public FunctionPass { | |
591 | bool NoLoads; | |
592 | MemoryDependenceAnalysis *MD; | |
593 | DominatorTree *DT; | |
1a4d82fc | 594 | const DataLayout *DL; |
223e47cc | 595 | const TargetLibraryInfo *TLI; |
85aaf69f | 596 | AssumptionCache *AC; |
1a4d82fc | 597 | SetVector<BasicBlock *> DeadBlocks; |
223e47cc LB |
598 | |
599 | ValueTable VN; | |
600 | ||
601 | /// LeaderTable - A mapping from value numbers to lists of Value*'s that | |
602 | /// have that value number. Use findLeader to query it. | |
603 | struct LeaderTableEntry { | |
604 | Value *Val; | |
605 | const BasicBlock *BB; | |
606 | LeaderTableEntry *Next; | |
607 | }; | |
608 | DenseMap<uint32_t, LeaderTableEntry> LeaderTable; | |
609 | BumpPtrAllocator TableAllocator; | |
610 | ||
611 | SmallVector<Instruction*, 8> InstrsToErase; | |
1a4d82fc JJ |
612 | |
613 | typedef SmallVector<NonLocalDepResult, 64> LoadDepVect; | |
614 | typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect; | |
615 | typedef SmallVector<BasicBlock*, 64> UnavailBlkVect; | |
616 | ||
223e47cc LB |
617 | public: |
618 | static char ID; // Pass identification, replacement for typeid | |
619 | explicit GVN(bool noloads = false) | |
1a4d82fc | 620 | : FunctionPass(ID), NoLoads(noloads), MD(nullptr) { |
223e47cc LB |
621 | initializeGVNPass(*PassRegistry::getPassRegistry()); |
622 | } | |
623 | ||
1a4d82fc | 624 | bool runOnFunction(Function &F) override; |
223e47cc LB |
625 | |
626 | /// markInstructionForDeletion - This removes the specified instruction from | |
627 | /// our various maps and marks it for deletion. | |
628 | void markInstructionForDeletion(Instruction *I) { | |
629 | VN.erase(I); | |
630 | InstrsToErase.push_back(I); | |
631 | } | |
632 | ||
1a4d82fc | 633 | const DataLayout *getDataLayout() const { return DL; } |
223e47cc LB |
634 | DominatorTree &getDominatorTree() const { return *DT; } |
635 | AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } | |
636 | MemoryDependenceAnalysis &getMemDep() const { return *MD; } | |
637 | private: | |
638 | /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for | |
639 | /// its value number. | |
640 | void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { | |
641 | LeaderTableEntry &Curr = LeaderTable[N]; | |
642 | if (!Curr.Val) { | |
643 | Curr.Val = V; | |
644 | Curr.BB = BB; | |
645 | return; | |
646 | } | |
647 | ||
648 | LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>(); | |
649 | Node->Val = V; | |
650 | Node->BB = BB; | |
651 | Node->Next = Curr.Next; | |
652 | Curr.Next = Node; | |
653 | } | |
654 | ||
655 | /// removeFromLeaderTable - Scan the list of values corresponding to a given | |
656 | /// value number, and remove the given instruction if encountered. | |
657 | void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { | |
1a4d82fc | 658 | LeaderTableEntry* Prev = nullptr; |
223e47cc LB |
659 | LeaderTableEntry* Curr = &LeaderTable[N]; |
660 | ||
661 | while (Curr->Val != I || Curr->BB != BB) { | |
662 | Prev = Curr; | |
663 | Curr = Curr->Next; | |
664 | } | |
665 | ||
666 | if (Prev) { | |
667 | Prev->Next = Curr->Next; | |
668 | } else { | |
669 | if (!Curr->Next) { | |
1a4d82fc JJ |
670 | Curr->Val = nullptr; |
671 | Curr->BB = nullptr; | |
223e47cc LB |
672 | } else { |
673 | LeaderTableEntry* Next = Curr->Next; | |
674 | Curr->Val = Next->Val; | |
675 | Curr->BB = Next->BB; | |
676 | Curr->Next = Next->Next; | |
677 | } | |
678 | } | |
679 | } | |
680 | ||
681 | // List of critical edges to be split between iterations. | |
682 | SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; | |
683 | ||
684 | // This transformation requires dominator postdominator info | |
1a4d82fc | 685 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
85aaf69f | 686 | AU.addRequired<AssumptionCacheTracker>(); |
1a4d82fc | 687 | AU.addRequired<DominatorTreeWrapperPass>(); |
223e47cc LB |
688 | AU.addRequired<TargetLibraryInfo>(); |
689 | if (!NoLoads) | |
690 | AU.addRequired<MemoryDependenceAnalysis>(); | |
691 | AU.addRequired<AliasAnalysis>(); | |
692 | ||
1a4d82fc | 693 | AU.addPreserved<DominatorTreeWrapperPass>(); |
223e47cc LB |
694 | AU.addPreserved<AliasAnalysis>(); |
695 | } | |
696 | ||
697 | ||
1a4d82fc | 698 | // Helper fuctions of redundant load elimination |
223e47cc | 699 | bool processLoad(LoadInst *L); |
223e47cc | 700 | bool processNonLocalLoad(LoadInst *L); |
1a4d82fc JJ |
701 | void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, |
702 | AvailValInBlkVect &ValuesPerBlock, | |
703 | UnavailBlkVect &UnavailableBlocks); | |
704 | bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, | |
705 | UnavailBlkVect &UnavailableBlocks); | |
706 | ||
707 | // Other helper routines | |
708 | bool processInstruction(Instruction *I); | |
223e47cc LB |
709 | bool processBlock(BasicBlock *BB); |
710 | void dump(DenseMap<uint32_t, Value*> &d); | |
711 | bool iterateOnFunction(Function &F); | |
712 | bool performPRE(Function &F); | |
85aaf69f | 713 | bool performScalarPRE(Instruction *I); |
223e47cc LB |
714 | Value *findLeader(const BasicBlock *BB, uint32_t num); |
715 | void cleanupGlobalSets(); | |
716 | void verifyRemoved(const Instruction *I) const; | |
717 | bool splitCriticalEdges(); | |
1a4d82fc | 718 | BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); |
223e47cc LB |
719 | unsigned replaceAllDominatedUsesWith(Value *From, Value *To, |
720 | const BasicBlockEdge &Root); | |
721 | bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root); | |
1a4d82fc JJ |
722 | bool processFoldableCondBr(BranchInst *BI); |
723 | void addDeadBlock(BasicBlock *BB); | |
724 | void assignValNumForDeadCode(); | |
223e47cc LB |
725 | }; |
726 | ||
727 | char GVN::ID = 0; | |
728 | } | |
729 | ||
730 | // createGVNPass - The public interface to this file... | |
731 | FunctionPass *llvm::createGVNPass(bool NoLoads) { | |
732 | return new GVN(NoLoads); | |
733 | } | |
734 | ||
735 | INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) | |
85aaf69f | 736 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
223e47cc | 737 | INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) |
1a4d82fc | 738 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
223e47cc LB |
739 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) |
740 | INITIALIZE_AG_DEPENDENCY(AliasAnalysis) | |
741 | INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) | |
742 | ||
743 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |
744 | void GVN::dump(DenseMap<uint32_t, Value*>& d) { | |
745 | errs() << "{\n"; | |
746 | for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), | |
747 | E = d.end(); I != E; ++I) { | |
748 | errs() << I->first << "\n"; | |
749 | I->second->dump(); | |
750 | } | |
751 | errs() << "}\n"; | |
752 | } | |
753 | #endif | |
754 | ||
755 | /// IsValueFullyAvailableInBlock - Return true if we can prove that the value | |
756 | /// we're analyzing is fully available in the specified block. As we go, keep | |
757 | /// track of which blocks we know are fully alive in FullyAvailableBlocks. This | |
758 | /// map is actually a tri-state map with the following values: | |
759 | /// 0) we know the block *is not* fully available. | |
760 | /// 1) we know the block *is* fully available. | |
761 | /// 2) we do not know whether the block is fully available or not, but we are | |
762 | /// currently speculating that it will be. | |
763 | /// 3) we are speculating for this block and have used that to speculate for | |
764 | /// other blocks. | |
765 | static bool IsValueFullyAvailableInBlock(BasicBlock *BB, | |
766 | DenseMap<BasicBlock*, char> &FullyAvailableBlocks, | |
767 | uint32_t RecurseDepth) { | |
768 | if (RecurseDepth > MaxRecurseDepth) | |
769 | return false; | |
770 | ||
771 | // Optimistically assume that the block is fully available and check to see | |
772 | // if we already know about this block in one lookup. | |
773 | std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = | |
774 | FullyAvailableBlocks.insert(std::make_pair(BB, 2)); | |
775 | ||
776 | // If the entry already existed for this block, return the precomputed value. | |
777 | if (!IV.second) { | |
778 | // If this is a speculative "available" value, mark it as being used for | |
779 | // speculation of other blocks. | |
780 | if (IV.first->second == 2) | |
781 | IV.first->second = 3; | |
782 | return IV.first->second != 0; | |
783 | } | |
784 | ||
785 | // Otherwise, see if it is fully available in all predecessors. | |
786 | pred_iterator PI = pred_begin(BB), PE = pred_end(BB); | |
787 | ||
788 | // If this block has no predecessors, it isn't live-in here. | |
789 | if (PI == PE) | |
790 | goto SpeculationFailure; | |
791 | ||
792 | for (; PI != PE; ++PI) | |
793 | // If the value isn't fully available in one of our predecessors, then it | |
794 | // isn't fully available in this block either. Undo our previous | |
795 | // optimistic assumption and bail out. | |
796 | if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1)) | |
797 | goto SpeculationFailure; | |
798 | ||
799 | return true; | |
800 | ||
801 | // SpeculationFailure - If we get here, we found out that this is not, after | |
802 | // all, a fully-available block. We have a problem if we speculated on this and | |
803 | // used the speculation to mark other blocks as available. | |
804 | SpeculationFailure: | |
805 | char &BBVal = FullyAvailableBlocks[BB]; | |
806 | ||
807 | // If we didn't speculate on this, just return with it set to false. | |
808 | if (BBVal == 2) { | |
809 | BBVal = 0; | |
810 | return false; | |
811 | } | |
812 | ||
813 | // If we did speculate on this value, we could have blocks set to 1 that are | |
814 | // incorrect. Walk the (transitive) successors of this block and mark them as | |
815 | // 0 if set to one. | |
816 | SmallVector<BasicBlock*, 32> BBWorklist; | |
817 | BBWorklist.push_back(BB); | |
818 | ||
819 | do { | |
820 | BasicBlock *Entry = BBWorklist.pop_back_val(); | |
821 | // Note that this sets blocks to 0 (unavailable) if they happen to not | |
822 | // already be in FullyAvailableBlocks. This is safe. | |
823 | char &EntryVal = FullyAvailableBlocks[Entry]; | |
824 | if (EntryVal == 0) continue; // Already unavailable. | |
825 | ||
826 | // Mark as unavailable. | |
827 | EntryVal = 0; | |
828 | ||
1a4d82fc | 829 | BBWorklist.append(succ_begin(Entry), succ_end(Entry)); |
223e47cc LB |
830 | } while (!BBWorklist.empty()); |
831 | ||
832 | return false; | |
833 | } | |
834 | ||
835 | ||
836 | /// CanCoerceMustAliasedValueToLoad - Return true if | |
837 | /// CoerceAvailableValueToLoadType will succeed. | |
838 | static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, | |
839 | Type *LoadTy, | |
1a4d82fc | 840 | const DataLayout &DL) { |
223e47cc LB |
841 | // If the loaded or stored value is an first class array or struct, don't try |
842 | // to transform them. We need to be able to bitcast to integer. | |
843 | if (LoadTy->isStructTy() || LoadTy->isArrayTy() || | |
844 | StoredVal->getType()->isStructTy() || | |
845 | StoredVal->getType()->isArrayTy()) | |
846 | return false; | |
847 | ||
848 | // The store has to be at least as big as the load. | |
1a4d82fc JJ |
849 | if (DL.getTypeSizeInBits(StoredVal->getType()) < |
850 | DL.getTypeSizeInBits(LoadTy)) | |
223e47cc LB |
851 | return false; |
852 | ||
853 | return true; | |
854 | } | |
855 | ||
223e47cc LB |
856 | /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and |
857 | /// then a load from a must-aliased pointer of a different type, try to coerce | |
858 | /// the stored value. LoadedTy is the type of the load we want to replace and | |
859 | /// InsertPt is the place to insert new instructions. | |
860 | /// | |
861 | /// If we can't do it, return null. | |
862 | static Value *CoerceAvailableValueToLoadType(Value *StoredVal, | |
863 | Type *LoadedTy, | |
864 | Instruction *InsertPt, | |
1a4d82fc JJ |
865 | const DataLayout &DL) { |
866 | if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL)) | |
867 | return nullptr; | |
223e47cc LB |
868 | |
869 | // If this is already the right type, just return it. | |
870 | Type *StoredValTy = StoredVal->getType(); | |
871 | ||
1a4d82fc JJ |
872 | uint64_t StoreSize = DL.getTypeSizeInBits(StoredValTy); |
873 | uint64_t LoadSize = DL.getTypeSizeInBits(LoadedTy); | |
223e47cc LB |
874 | |
875 | // If the store and reload are the same size, we can always reuse it. | |
876 | if (StoreSize == LoadSize) { | |
877 | // Pointer to Pointer -> use bitcast. | |
970d7e83 LB |
878 | if (StoredValTy->getScalarType()->isPointerTy() && |
879 | LoadedTy->getScalarType()->isPointerTy()) | |
223e47cc LB |
880 | return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); |
881 | ||
882 | // Convert source pointers to integers, which can be bitcast. | |
970d7e83 | 883 | if (StoredValTy->getScalarType()->isPointerTy()) { |
1a4d82fc | 884 | StoredValTy = DL.getIntPtrType(StoredValTy); |
223e47cc LB |
885 | StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); |
886 | } | |
887 | ||
888 | Type *TypeToCastTo = LoadedTy; | |
970d7e83 | 889 | if (TypeToCastTo->getScalarType()->isPointerTy()) |
1a4d82fc | 890 | TypeToCastTo = DL.getIntPtrType(TypeToCastTo); |
223e47cc LB |
891 | |
892 | if (StoredValTy != TypeToCastTo) | |
893 | StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); | |
894 | ||
895 | // Cast to pointer if the load needs a pointer type. | |
970d7e83 | 896 | if (LoadedTy->getScalarType()->isPointerTy()) |
223e47cc LB |
897 | StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); |
898 | ||
899 | return StoredVal; | |
900 | } | |
901 | ||
902 | // If the loaded value is smaller than the available value, then we can | |
903 | // extract out a piece from it. If the available value is too small, then we | |
904 | // can't do anything. | |
905 | assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); | |
906 | ||
907 | // Convert source pointers to integers, which can be manipulated. | |
970d7e83 | 908 | if (StoredValTy->getScalarType()->isPointerTy()) { |
1a4d82fc | 909 | StoredValTy = DL.getIntPtrType(StoredValTy); |
223e47cc LB |
910 | StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); |
911 | } | |
912 | ||
913 | // Convert vectors and fp to integer, which can be manipulated. | |
914 | if (!StoredValTy->isIntegerTy()) { | |
915 | StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); | |
916 | StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); | |
917 | } | |
918 | ||
919 | // If this is a big-endian system, we need to shift the value down to the low | |
920 | // bits so that a truncate will work. | |
1a4d82fc | 921 | if (DL.isBigEndian()) { |
223e47cc LB |
922 | Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); |
923 | StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); | |
924 | } | |
925 | ||
926 | // Truncate the integer to the right size now. | |
927 | Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); | |
928 | StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); | |
929 | ||
930 | if (LoadedTy == NewIntTy) | |
931 | return StoredVal; | |
932 | ||
933 | // If the result is a pointer, inttoptr. | |
970d7e83 | 934 | if (LoadedTy->getScalarType()->isPointerTy()) |
223e47cc LB |
935 | return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); |
936 | ||
937 | // Otherwise, bitcast. | |
938 | return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); | |
939 | } | |
940 | ||
941 | /// AnalyzeLoadFromClobberingWrite - This function is called when we have a | |
942 | /// memdep query of a load that ends up being a clobbering memory write (store, | |
943 | /// memset, memcpy, memmove). This means that the write *may* provide bits used | |
944 | /// by the load but we can't be sure because the pointers don't mustalias. | |
945 | /// | |
946 | /// Check this case to see if there is anything more we can do before we give | |
947 | /// up. This returns -1 if we have to give up, or a byte number in the stored | |
948 | /// value of the piece that feeds the load. | |
949 | static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, | |
950 | Value *WritePtr, | |
951 | uint64_t WriteSizeInBits, | |
1a4d82fc | 952 | const DataLayout &DL) { |
223e47cc LB |
953 | // If the loaded or stored value is a first class array or struct, don't try |
954 | // to transform them. We need to be able to bitcast to integer. | |
955 | if (LoadTy->isStructTy() || LoadTy->isArrayTy()) | |
956 | return -1; | |
957 | ||
958 | int64_t StoreOffset = 0, LoadOffset = 0; | |
1a4d82fc JJ |
959 | Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&DL); |
960 | Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &DL); | |
223e47cc LB |
961 | if (StoreBase != LoadBase) |
962 | return -1; | |
963 | ||
964 | // If the load and store are to the exact same address, they should have been | |
965 | // a must alias. AA must have gotten confused. | |
966 | // FIXME: Study to see if/when this happens. One case is forwarding a memset | |
967 | // to a load from the base of the memset. | |
968 | #if 0 | |
969 | if (LoadOffset == StoreOffset) { | |
970 | dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" | |
971 | << "Base = " << *StoreBase << "\n" | |
972 | << "Store Ptr = " << *WritePtr << "\n" | |
973 | << "Store Offs = " << StoreOffset << "\n" | |
974 | << "Load Ptr = " << *LoadPtr << "\n"; | |
975 | abort(); | |
976 | } | |
977 | #endif | |
978 | ||
979 | // If the load and store don't overlap at all, the store doesn't provide | |
980 | // anything to the load. In this case, they really don't alias at all, AA | |
981 | // must have gotten confused. | |
1a4d82fc | 982 | uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); |
223e47cc LB |
983 | |
984 | if ((WriteSizeInBits & 7) | (LoadSize & 7)) | |
985 | return -1; | |
986 | uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. | |
987 | LoadSize >>= 3; | |
988 | ||
989 | ||
990 | bool isAAFailure = false; | |
991 | if (StoreOffset < LoadOffset) | |
992 | isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; | |
993 | else | |
994 | isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; | |
995 | ||
996 | if (isAAFailure) { | |
997 | #if 0 | |
998 | dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" | |
999 | << "Base = " << *StoreBase << "\n" | |
1000 | << "Store Ptr = " << *WritePtr << "\n" | |
1001 | << "Store Offs = " << StoreOffset << "\n" | |
1002 | << "Load Ptr = " << *LoadPtr << "\n"; | |
1003 | abort(); | |
1004 | #endif | |
1005 | return -1; | |
1006 | } | |
1007 | ||
1008 | // If the Load isn't completely contained within the stored bits, we don't | |
1009 | // have all the bits to feed it. We could do something crazy in the future | |
1010 | // (issue a smaller load then merge the bits in) but this seems unlikely to be | |
1011 | // valuable. | |
1012 | if (StoreOffset > LoadOffset || | |
1013 | StoreOffset+StoreSize < LoadOffset+LoadSize) | |
1014 | return -1; | |
1015 | ||
1016 | // Okay, we can do this transformation. Return the number of bytes into the | |
1017 | // store that the load is. | |
1018 | return LoadOffset-StoreOffset; | |
1019 | } | |
1020 | ||
1021 | /// AnalyzeLoadFromClobberingStore - This function is called when we have a | |
1022 | /// memdep query of a load that ends up being a clobbering store. | |
1023 | static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, | |
1024 | StoreInst *DepSI, | |
1a4d82fc | 1025 | const DataLayout &DL) { |
223e47cc LB |
1026 | // Cannot handle reading from store of first-class aggregate yet. |
1027 | if (DepSI->getValueOperand()->getType()->isStructTy() || | |
1028 | DepSI->getValueOperand()->getType()->isArrayTy()) | |
1029 | return -1; | |
1030 | ||
1031 | Value *StorePtr = DepSI->getPointerOperand(); | |
1a4d82fc | 1032 | uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); |
223e47cc | 1033 | return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, |
1a4d82fc | 1034 | StorePtr, StoreSize, DL); |
223e47cc LB |
1035 | } |
1036 | ||
1037 | /// AnalyzeLoadFromClobberingLoad - This function is called when we have a | |
1038 | /// memdep query of a load that ends up being clobbered by another load. See if | |
1039 | /// the other load can feed into the second load. | |
1040 | static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, | |
1a4d82fc | 1041 | LoadInst *DepLI, const DataLayout &DL){ |
223e47cc LB |
1042 | // Cannot handle reading from store of first-class aggregate yet. |
1043 | if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) | |
1044 | return -1; | |
1045 | ||
1046 | Value *DepPtr = DepLI->getPointerOperand(); | |
1a4d82fc JJ |
1047 | uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); |
1048 | int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); | |
223e47cc LB |
1049 | if (R != -1) return R; |
1050 | ||
1051 | // If we have a load/load clobber an DepLI can be widened to cover this load, | |
1052 | // then we should widen it! | |
1053 | int64_t LoadOffs = 0; | |
1054 | const Value *LoadBase = | |
1a4d82fc JJ |
1055 | GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &DL); |
1056 | unsigned LoadSize = DL.getTypeStoreSize(LoadTy); | |
223e47cc LB |
1057 | |
1058 | unsigned Size = MemoryDependenceAnalysis:: | |
1a4d82fc | 1059 | getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, DL); |
223e47cc LB |
1060 | if (Size == 0) return -1; |
1061 | ||
1a4d82fc | 1062 | return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL); |
223e47cc LB |
1063 | } |
1064 | ||
1065 | ||
1066 | ||
1067 | static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, | |
1068 | MemIntrinsic *MI, | |
1a4d82fc | 1069 | const DataLayout &DL) { |
223e47cc LB |
1070 | // If the mem operation is a non-constant size, we can't handle it. |
1071 | ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); | |
1a4d82fc | 1072 | if (!SizeCst) return -1; |
223e47cc LB |
1073 | uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; |
1074 | ||
1075 | // If this is memset, we just need to see if the offset is valid in the size | |
1076 | // of the memset.. | |
1077 | if (MI->getIntrinsicID() == Intrinsic::memset) | |
1078 | return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), | |
1a4d82fc | 1079 | MemSizeInBits, DL); |
223e47cc LB |
1080 | |
1081 | // If we have a memcpy/memmove, the only case we can handle is if this is a | |
1082 | // copy from constant memory. In that case, we can read directly from the | |
1083 | // constant memory. | |
1084 | MemTransferInst *MTI = cast<MemTransferInst>(MI); | |
1085 | ||
1086 | Constant *Src = dyn_cast<Constant>(MTI->getSource()); | |
1a4d82fc | 1087 | if (!Src) return -1; |
223e47cc | 1088 | |
1a4d82fc JJ |
1089 | GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL)); |
1090 | if (!GV || !GV->isConstant()) return -1; | |
223e47cc LB |
1091 | |
1092 | // See if the access is within the bounds of the transfer. | |
1093 | int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, | |
1a4d82fc | 1094 | MI->getDest(), MemSizeInBits, DL); |
223e47cc LB |
1095 | if (Offset == -1) |
1096 | return Offset; | |
1097 | ||
1a4d82fc | 1098 | unsigned AS = Src->getType()->getPointerAddressSpace(); |
223e47cc LB |
1099 | // Otherwise, see if we can constant fold a load from the constant with the |
1100 | // offset applied as appropriate. | |
1101 | Src = ConstantExpr::getBitCast(Src, | |
1a4d82fc | 1102 | Type::getInt8PtrTy(Src->getContext(), AS)); |
223e47cc LB |
1103 | Constant *OffsetCst = |
1104 | ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); | |
1105 | Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); | |
1a4d82fc JJ |
1106 | Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); |
1107 | if (ConstantFoldLoadFromConstPtr(Src, &DL)) | |
223e47cc LB |
1108 | return Offset; |
1109 | return -1; | |
1110 | } | |
1111 | ||
1112 | ||
1113 | /// GetStoreValueForLoad - This function is called when we have a | |
1114 | /// memdep query of a load that ends up being a clobbering store. This means | |
1115 | /// that the store provides bits used by the load but we the pointers don't | |
1116 | /// mustalias. Check this case to see if there is anything more we can do | |
1117 | /// before we give up. | |
1118 | static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, | |
1119 | Type *LoadTy, | |
1a4d82fc | 1120 | Instruction *InsertPt, const DataLayout &DL){ |
223e47cc LB |
1121 | LLVMContext &Ctx = SrcVal->getType()->getContext(); |
1122 | ||
1a4d82fc JJ |
1123 | uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; |
1124 | uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; | |
223e47cc LB |
1125 | |
1126 | IRBuilder<> Builder(InsertPt->getParent(), InsertPt); | |
1127 | ||
1128 | // Compute which bits of the stored value are being used by the load. Convert | |
1129 | // to an integer type to start with. | |
970d7e83 LB |
1130 | if (SrcVal->getType()->getScalarType()->isPointerTy()) |
1131 | SrcVal = Builder.CreatePtrToInt(SrcVal, | |
1a4d82fc | 1132 | DL.getIntPtrType(SrcVal->getType())); |
223e47cc LB |
1133 | if (!SrcVal->getType()->isIntegerTy()) |
1134 | SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); | |
1135 | ||
1136 | // Shift the bits to the least significant depending on endianness. | |
1137 | unsigned ShiftAmt; | |
1a4d82fc | 1138 | if (DL.isLittleEndian()) |
223e47cc LB |
1139 | ShiftAmt = Offset*8; |
1140 | else | |
1141 | ShiftAmt = (StoreSize-LoadSize-Offset)*8; | |
1142 | ||
1143 | if (ShiftAmt) | |
1144 | SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); | |
1145 | ||
1146 | if (LoadSize != StoreSize) | |
1147 | SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); | |
1148 | ||
1a4d82fc | 1149 | return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, DL); |
223e47cc LB |
1150 | } |
1151 | ||
1152 | /// GetLoadValueForLoad - This function is called when we have a | |
1153 | /// memdep query of a load that ends up being a clobbering load. This means | |
1154 | /// that the load *may* provide bits used by the load but we can't be sure | |
1155 | /// because the pointers don't mustalias. Check this case to see if there is | |
1156 | /// anything more we can do before we give up. | |
1157 | static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, | |
1158 | Type *LoadTy, Instruction *InsertPt, | |
1159 | GVN &gvn) { | |
1a4d82fc | 1160 | const DataLayout &DL = *gvn.getDataLayout(); |
223e47cc LB |
1161 | // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to |
1162 | // widen SrcVal out to a larger load. | |
1a4d82fc JJ |
1163 | unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()); |
1164 | unsigned LoadSize = DL.getTypeStoreSize(LoadTy); | |
223e47cc LB |
1165 | if (Offset+LoadSize > SrcValSize) { |
1166 | assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); | |
1167 | assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); | |
1168 | // If we have a load/load clobber an DepLI can be widened to cover this | |
1169 | // load, then we should widen it to the next power of 2 size big enough! | |
1170 | unsigned NewLoadSize = Offset+LoadSize; | |
1171 | if (!isPowerOf2_32(NewLoadSize)) | |
1172 | NewLoadSize = NextPowerOf2(NewLoadSize); | |
1173 | ||
1174 | Value *PtrVal = SrcVal->getPointerOperand(); | |
1175 | ||
1176 | // Insert the new load after the old load. This ensures that subsequent | |
1177 | // memdep queries will find the new load. We can't easily remove the old | |
1178 | // load completely because it is already in the value numbering table. | |
1179 | IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); | |
1180 | Type *DestPTy = | |
1181 | IntegerType::get(LoadTy->getContext(), NewLoadSize*8); | |
1182 | DestPTy = PointerType::get(DestPTy, | |
1a4d82fc | 1183 | PtrVal->getType()->getPointerAddressSpace()); |
223e47cc LB |
1184 | Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); |
1185 | PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); | |
1186 | LoadInst *NewLoad = Builder.CreateLoad(PtrVal); | |
1187 | NewLoad->takeName(SrcVal); | |
1188 | NewLoad->setAlignment(SrcVal->getAlignment()); | |
1189 | ||
1190 | DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); | |
1191 | DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); | |
1192 | ||
1193 | // Replace uses of the original load with the wider load. On a big endian | |
1194 | // system, we need to shift down to get the relevant bits. | |
1195 | Value *RV = NewLoad; | |
1a4d82fc | 1196 | if (DL.isBigEndian()) |
223e47cc LB |
1197 | RV = Builder.CreateLShr(RV, |
1198 | NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); | |
1199 | RV = Builder.CreateTrunc(RV, SrcVal->getType()); | |
1200 | SrcVal->replaceAllUsesWith(RV); | |
1201 | ||
1202 | // We would like to use gvn.markInstructionForDeletion here, but we can't | |
1203 | // because the load is already memoized into the leader map table that GVN | |
1204 | // tracks. It is potentially possible to remove the load from the table, | |
1205 | // but then there all of the operations based on it would need to be | |
1206 | // rehashed. Just leave the dead load around. | |
1207 | gvn.getMemDep().removeInstruction(SrcVal); | |
1208 | SrcVal = NewLoad; | |
1209 | } | |
1210 | ||
1a4d82fc | 1211 | return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); |
223e47cc LB |
1212 | } |
1213 | ||
1214 | ||
1215 | /// GetMemInstValueForLoad - This function is called when we have a | |
1216 | /// memdep query of a load that ends up being a clobbering mem intrinsic. | |
1217 | static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, | |
1218 | Type *LoadTy, Instruction *InsertPt, | |
1a4d82fc | 1219 | const DataLayout &DL){ |
223e47cc | 1220 | LLVMContext &Ctx = LoadTy->getContext(); |
1a4d82fc | 1221 | uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8; |
223e47cc LB |
1222 | |
1223 | IRBuilder<> Builder(InsertPt->getParent(), InsertPt); | |
1224 | ||
1225 | // We know that this method is only called when the mem transfer fully | |
1226 | // provides the bits for the load. | |
1227 | if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { | |
1228 | // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and | |
1229 | // independently of what the offset is. | |
1230 | Value *Val = MSI->getValue(); | |
1231 | if (LoadSize != 1) | |
1232 | Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); | |
1233 | ||
1234 | Value *OneElt = Val; | |
1235 | ||
1236 | // Splat the value out to the right number of bits. | |
1237 | for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { | |
1238 | // If we can double the number of bytes set, do it. | |
1239 | if (NumBytesSet*2 <= LoadSize) { | |
1240 | Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); | |
1241 | Val = Builder.CreateOr(Val, ShVal); | |
1242 | NumBytesSet <<= 1; | |
1243 | continue; | |
1244 | } | |
1245 | ||
1246 | // Otherwise insert one byte at a time. | |
1247 | Value *ShVal = Builder.CreateShl(Val, 1*8); | |
1248 | Val = Builder.CreateOr(OneElt, ShVal); | |
1249 | ++NumBytesSet; | |
1250 | } | |
1251 | ||
1a4d82fc | 1252 | return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, DL); |
223e47cc LB |
1253 | } |
1254 | ||
1255 | // Otherwise, this is a memcpy/memmove from a constant global. | |
1256 | MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); | |
1257 | Constant *Src = cast<Constant>(MTI->getSource()); | |
1a4d82fc | 1258 | unsigned AS = Src->getType()->getPointerAddressSpace(); |
223e47cc LB |
1259 | |
1260 | // Otherwise, see if we can constant fold a load from the constant with the | |
1261 | // offset applied as appropriate. | |
1262 | Src = ConstantExpr::getBitCast(Src, | |
1a4d82fc | 1263 | Type::getInt8PtrTy(Src->getContext(), AS)); |
223e47cc | 1264 | Constant *OffsetCst = |
1a4d82fc | 1265 | ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); |
223e47cc | 1266 | Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); |
1a4d82fc JJ |
1267 | Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); |
1268 | return ConstantFoldLoadFromConstPtr(Src, &DL); | |
223e47cc LB |
1269 | } |
1270 | ||
223e47cc LB |
1271 | |
1272 | /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, | |
1273 | /// construct SSA form, allowing us to eliminate LI. This returns the value | |
1274 | /// that should be used at LI's definition site. | |
1275 | static Value *ConstructSSAForLoadSet(LoadInst *LI, | |
1276 | SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, | |
1277 | GVN &gvn) { | |
1278 | // Check for the fully redundant, dominating load case. In this case, we can | |
1279 | // just use the dominating value directly. | |
1280 | if (ValuesPerBlock.size() == 1 && | |
1281 | gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, | |
1a4d82fc JJ |
1282 | LI->getParent())) { |
1283 | assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block"); | |
223e47cc | 1284 | return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); |
1a4d82fc | 1285 | } |
223e47cc LB |
1286 | |
1287 | // Otherwise, we have to construct SSA form. | |
1288 | SmallVector<PHINode*, 8> NewPHIs; | |
1289 | SSAUpdater SSAUpdate(&NewPHIs); | |
1290 | SSAUpdate.Initialize(LI->getType(), LI->getName()); | |
1291 | ||
1292 | Type *LoadTy = LI->getType(); | |
1293 | ||
1294 | for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { | |
1295 | const AvailableValueInBlock &AV = ValuesPerBlock[i]; | |
1296 | BasicBlock *BB = AV.BB; | |
1297 | ||
1298 | if (SSAUpdate.HasValueForBlock(BB)) | |
1299 | continue; | |
1300 | ||
1301 | SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); | |
1302 | } | |
1303 | ||
1304 | // Perform PHI construction. | |
1305 | Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); | |
1306 | ||
1307 | // If new PHI nodes were created, notify alias analysis. | |
970d7e83 | 1308 | if (V->getType()->getScalarType()->isPointerTy()) { |
223e47cc LB |
1309 | AliasAnalysis *AA = gvn.getAliasAnalysis(); |
1310 | ||
1311 | for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) | |
1312 | AA->copyValue(LI, NewPHIs[i]); | |
1313 | ||
1314 | // Now that we've copied information to the new PHIs, scan through | |
1315 | // them again and inform alias analysis that we've added potentially | |
1316 | // escaping uses to any values that are operands to these PHIs. | |
1317 | for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { | |
1318 | PHINode *P = NewPHIs[i]; | |
1319 | for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) { | |
1320 | unsigned jj = PHINode::getOperandNumForIncomingValue(ii); | |
1321 | AA->addEscapingUse(P->getOperandUse(jj)); | |
1322 | } | |
1323 | } | |
1324 | } | |
1325 | ||
1326 | return V; | |
1327 | } | |
1328 | ||
1a4d82fc JJ |
1329 | Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { |
1330 | Value *Res; | |
1331 | if (isSimpleValue()) { | |
1332 | Res = getSimpleValue(); | |
1333 | if (Res->getType() != LoadTy) { | |
1334 | const DataLayout *DL = gvn.getDataLayout(); | |
1335 | assert(DL && "Need target data to handle type mismatch case"); | |
1336 | Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), | |
1337 | *DL); | |
1338 | ||
1339 | DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " | |
1340 | << *getSimpleValue() << '\n' | |
1341 | << *Res << '\n' << "\n\n\n"); | |
1342 | } | |
1343 | } else if (isCoercedLoadValue()) { | |
1344 | LoadInst *Load = getCoercedLoadValue(); | |
1345 | if (Load->getType() == LoadTy && Offset == 0) { | |
1346 | Res = Load; | |
1347 | } else { | |
1348 | Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), | |
1349 | gvn); | |
1350 | ||
1351 | DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " | |
1352 | << *getCoercedLoadValue() << '\n' | |
1353 | << *Res << '\n' << "\n\n\n"); | |
1354 | } | |
1355 | } else if (isMemIntrinValue()) { | |
1356 | const DataLayout *DL = gvn.getDataLayout(); | |
1357 | assert(DL && "Need target data to handle type mismatch case"); | |
1358 | Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, | |
1359 | LoadTy, BB->getTerminator(), *DL); | |
1360 | DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset | |
1361 | << " " << *getMemIntrinValue() << '\n' | |
1362 | << *Res << '\n' << "\n\n\n"); | |
1363 | } else { | |
1364 | assert(isUndefValue() && "Should be UndefVal"); | |
1365 | DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";); | |
1366 | return UndefValue::get(LoadTy); | |
1367 | } | |
1368 | return Res; | |
1369 | } | |
1370 | ||
223e47cc LB |
1371 | static bool isLifetimeStart(const Instruction *Inst) { |
1372 | if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) | |
1373 | return II->getIntrinsicID() == Intrinsic::lifetime_start; | |
1374 | return false; | |
1375 | } | |
1376 | ||
1a4d82fc JJ |
1377 | void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, |
1378 | AvailValInBlkVect &ValuesPerBlock, | |
1379 | UnavailBlkVect &UnavailableBlocks) { | |
223e47cc LB |
1380 | |
1381 | // Filter out useless results (non-locals, etc). Keep track of the blocks | |
1382 | // where we have a value available in repl, also keep track of whether we see | |
1383 | // dependencies that produce an unknown value for the load (such as a call | |
1384 | // that could potentially clobber the load). | |
1a4d82fc | 1385 | unsigned NumDeps = Deps.size(); |
223e47cc LB |
1386 | for (unsigned i = 0, e = NumDeps; i != e; ++i) { |
1387 | BasicBlock *DepBB = Deps[i].getBB(); | |
1388 | MemDepResult DepInfo = Deps[i].getResult(); | |
1389 | ||
1a4d82fc JJ |
1390 | if (DeadBlocks.count(DepBB)) { |
1391 | // Dead dependent mem-op disguise as a load evaluating the same value | |
1392 | // as the load in question. | |
1393 | ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB)); | |
1394 | continue; | |
1395 | } | |
1396 | ||
223e47cc LB |
1397 | if (!DepInfo.isDef() && !DepInfo.isClobber()) { |
1398 | UnavailableBlocks.push_back(DepBB); | |
1399 | continue; | |
1400 | } | |
1401 | ||
1402 | if (DepInfo.isClobber()) { | |
1403 | // The address being loaded in this non-local block may not be the same as | |
1404 | // the pointer operand of the load if PHI translation occurs. Make sure | |
1405 | // to consider the right address. | |
1406 | Value *Address = Deps[i].getAddress(); | |
1407 | ||
1408 | // If the dependence is to a store that writes to a superset of the bits | |
1409 | // read by the load, we can extract the bits we need for the load from the | |
1410 | // stored value. | |
1411 | if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { | |
1a4d82fc | 1412 | if (DL && Address) { |
223e47cc | 1413 | int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, |
1a4d82fc | 1414 | DepSI, *DL); |
223e47cc LB |
1415 | if (Offset != -1) { |
1416 | ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, | |
1417 | DepSI->getValueOperand(), | |
1418 | Offset)); | |
1419 | continue; | |
1420 | } | |
1421 | } | |
1422 | } | |
1423 | ||
1424 | // Check to see if we have something like this: | |
1425 | // load i32* P | |
1426 | // load i8* (P+1) | |
1427 | // if we have this, replace the later with an extraction from the former. | |
1428 | if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { | |
1429 | // If this is a clobber and L is the first instruction in its block, then | |
1430 | // we have the first instruction in the entry block. | |
1a4d82fc JJ |
1431 | if (DepLI != LI && Address && DL) { |
1432 | int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address, | |
1433 | DepLI, *DL); | |
223e47cc LB |
1434 | |
1435 | if (Offset != -1) { | |
1436 | ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, | |
1437 | Offset)); | |
1438 | continue; | |
1439 | } | |
1440 | } | |
1441 | } | |
1442 | ||
1443 | // If the clobbering value is a memset/memcpy/memmove, see if we can | |
1444 | // forward a value on from it. | |
1445 | if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { | |
1a4d82fc | 1446 | if (DL && Address) { |
223e47cc | 1447 | int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, |
1a4d82fc | 1448 | DepMI, *DL); |
223e47cc LB |
1449 | if (Offset != -1) { |
1450 | ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, | |
1451 | Offset)); | |
1452 | continue; | |
1453 | } | |
1454 | } | |
1455 | } | |
1456 | ||
1457 | UnavailableBlocks.push_back(DepBB); | |
1458 | continue; | |
1459 | } | |
1460 | ||
1461 | // DepInfo.isDef() here | |
1462 | ||
1463 | Instruction *DepInst = DepInfo.getInst(); | |
1464 | ||
1465 | // Loading the allocation -> undef. | |
1466 | if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) || | |
1467 | // Loading immediately after lifetime begin -> undef. | |
1468 | isLifetimeStart(DepInst)) { | |
1469 | ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, | |
1470 | UndefValue::get(LI->getType()))); | |
1471 | continue; | |
1472 | } | |
1473 | ||
1a4d82fc JJ |
1474 | // Loading from calloc (which zero initializes memory) -> zero |
1475 | if (isCallocLikeFn(DepInst, TLI)) { | |
1476 | ValuesPerBlock.push_back(AvailableValueInBlock::get( | |
1477 | DepBB, Constant::getNullValue(LI->getType()))); | |
1478 | continue; | |
1479 | } | |
1480 | ||
223e47cc LB |
1481 | if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { |
1482 | // Reject loads and stores that are to the same address but are of | |
1483 | // different types if we have to. | |
1484 | if (S->getValueOperand()->getType() != LI->getType()) { | |
1485 | // If the stored value is larger or equal to the loaded value, we can | |
1486 | // reuse it. | |
1a4d82fc JJ |
1487 | if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), |
1488 | LI->getType(), *DL)) { | |
223e47cc LB |
1489 | UnavailableBlocks.push_back(DepBB); |
1490 | continue; | |
1491 | } | |
1492 | } | |
1493 | ||
1494 | ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, | |
1495 | S->getValueOperand())); | |
1496 | continue; | |
1497 | } | |
1498 | ||
1499 | if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { | |
1500 | // If the types mismatch and we can't handle it, reject reuse of the load. | |
1501 | if (LD->getType() != LI->getType()) { | |
1502 | // If the stored value is larger or equal to the loaded value, we can | |
1503 | // reuse it. | |
1a4d82fc | 1504 | if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) { |
223e47cc LB |
1505 | UnavailableBlocks.push_back(DepBB); |
1506 | continue; | |
1507 | } | |
1508 | } | |
1509 | ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); | |
1510 | continue; | |
1511 | } | |
1512 | ||
1513 | UnavailableBlocks.push_back(DepBB); | |
223e47cc | 1514 | } |
1a4d82fc | 1515 | } |
223e47cc | 1516 | |
1a4d82fc JJ |
1517 | bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, |
1518 | UnavailBlkVect &UnavailableBlocks) { | |
223e47cc LB |
1519 | // Okay, we have *some* definitions of the value. This means that the value |
1520 | // is available in some of our (transitive) predecessors. Lets think about | |
1521 | // doing PRE of this load. This will involve inserting a new load into the | |
1522 | // predecessor when it's not available. We could do this in general, but | |
1523 | // prefer to not increase code size. As such, we only do this when we know | |
1524 | // that we only have to insert *one* load (which means we're basically moving | |
1525 | // the load, not inserting a new one). | |
1526 | ||
1527 | SmallPtrSet<BasicBlock *, 4> Blockers; | |
1528 | for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) | |
1529 | Blockers.insert(UnavailableBlocks[i]); | |
1530 | ||
1531 | // Let's find the first basic block with more than one predecessor. Walk | |
1532 | // backwards through predecessors if needed. | |
1533 | BasicBlock *LoadBB = LI->getParent(); | |
1534 | BasicBlock *TmpBB = LoadBB; | |
1535 | ||
223e47cc | 1536 | while (TmpBB->getSinglePredecessor()) { |
223e47cc LB |
1537 | TmpBB = TmpBB->getSinglePredecessor(); |
1538 | if (TmpBB == LoadBB) // Infinite (unreachable) loop. | |
1539 | return false; | |
1540 | if (Blockers.count(TmpBB)) | |
1541 | return false; | |
1542 | ||
1543 | // If any of these blocks has more than one successor (i.e. if the edge we | |
1544 | // just traversed was critical), then there are other paths through this | |
1545 | // block along which the load may not be anticipated. Hoisting the load | |
1546 | // above this block would be adding the load to execution paths along | |
1547 | // which it was not previously executed. | |
1548 | if (TmpBB->getTerminator()->getNumSuccessors() != 1) | |
1549 | return false; | |
1550 | } | |
1551 | ||
1552 | assert(TmpBB); | |
1553 | LoadBB = TmpBB; | |
1554 | ||
223e47cc LB |
1555 | // Check to see how many predecessors have the loaded value fully |
1556 | // available. | |
1a4d82fc | 1557 | MapVector<BasicBlock *, Value *> PredLoads; |
223e47cc LB |
1558 | DenseMap<BasicBlock*, char> FullyAvailableBlocks; |
1559 | for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) | |
1560 | FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; | |
1561 | for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) | |
1562 | FullyAvailableBlocks[UnavailableBlocks[i]] = false; | |
1563 | ||
1a4d82fc | 1564 | SmallVector<BasicBlock *, 4> CriticalEdgePred; |
223e47cc LB |
1565 | for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); |
1566 | PI != E; ++PI) { | |
1567 | BasicBlock *Pred = *PI; | |
1568 | if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { | |
1569 | continue; | |
1570 | } | |
223e47cc LB |
1571 | |
1572 | if (Pred->getTerminator()->getNumSuccessors() != 1) { | |
1573 | if (isa<IndirectBrInst>(Pred->getTerminator())) { | |
1574 | DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" | |
1575 | << Pred->getName() << "': " << *LI << '\n'); | |
1576 | return false; | |
1577 | } | |
1578 | ||
1579 | if (LoadBB->isLandingPad()) { | |
1580 | DEBUG(dbgs() | |
1581 | << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '" | |
1582 | << Pred->getName() << "': " << *LI << '\n'); | |
1583 | return false; | |
1584 | } | |
1585 | ||
1a4d82fc JJ |
1586 | CriticalEdgePred.push_back(Pred); |
1587 | } else { | |
1588 | // Only add the predecessors that will not be split for now. | |
1589 | PredLoads[Pred] = nullptr; | |
223e47cc LB |
1590 | } |
1591 | } | |
1592 | ||
223e47cc | 1593 | // Decide whether PRE is profitable for this load. |
1a4d82fc | 1594 | unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size(); |
223e47cc | 1595 | assert(NumUnavailablePreds != 0 && |
1a4d82fc | 1596 | "Fully available value should already be eliminated!"); |
223e47cc LB |
1597 | |
1598 | // If this load is unavailable in multiple predecessors, reject it. | |
1599 | // FIXME: If we could restructure the CFG, we could make a common pred with | |
1600 | // all the preds that don't have an available LI and insert a new load into | |
1601 | // that one block. | |
1602 | if (NumUnavailablePreds != 1) | |
1603 | return false; | |
1604 | ||
1a4d82fc JJ |
1605 | // Split critical edges, and update the unavailable predecessors accordingly. |
1606 | for (BasicBlock *OrigPred : CriticalEdgePred) { | |
1607 | BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); | |
1608 | assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!"); | |
1609 | PredLoads[NewPred] = nullptr; | |
1610 | DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" | |
1611 | << LoadBB->getName() << '\n'); | |
1612 | } | |
1613 | ||
223e47cc LB |
1614 | // Check if the load can safely be moved to all the unavailable predecessors. |
1615 | bool CanDoPRE = true; | |
1616 | SmallVector<Instruction*, 8> NewInsts; | |
1a4d82fc JJ |
1617 | for (auto &PredLoad : PredLoads) { |
1618 | BasicBlock *UnavailablePred = PredLoad.first; | |
223e47cc LB |
1619 | |
1620 | // Do PHI translation to get its value in the predecessor if necessary. The | |
1621 | // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. | |
1622 | ||
1623 | // If all preds have a single successor, then we know it is safe to insert | |
1624 | // the load on the pred (?!?), so we can insert code to materialize the | |
1625 | // pointer if it is not available. | |
85aaf69f | 1626 | PHITransAddr Address(LI->getPointerOperand(), DL, AC); |
1a4d82fc JJ |
1627 | Value *LoadPtr = nullptr; |
1628 | LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, | |
1629 | *DT, NewInsts); | |
223e47cc LB |
1630 | |
1631 | // If we couldn't find or insert a computation of this phi translated value, | |
1632 | // we fail PRE. | |
1a4d82fc | 1633 | if (!LoadPtr) { |
223e47cc LB |
1634 | DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " |
1635 | << *LI->getPointerOperand() << "\n"); | |
1636 | CanDoPRE = false; | |
1637 | break; | |
1638 | } | |
1639 | ||
1a4d82fc | 1640 | PredLoad.second = LoadPtr; |
223e47cc LB |
1641 | } |
1642 | ||
1643 | if (!CanDoPRE) { | |
1644 | while (!NewInsts.empty()) { | |
1645 | Instruction *I = NewInsts.pop_back_val(); | |
1646 | if (MD) MD->removeInstruction(I); | |
1647 | I->eraseFromParent(); | |
1648 | } | |
1a4d82fc JJ |
1649 | // HINT: Don't revert the edge-splitting as following transformation may |
1650 | // also need to split these critical edges. | |
1651 | return !CriticalEdgePred.empty(); | |
223e47cc LB |
1652 | } |
1653 | ||
1654 | // Okay, we can eliminate this load by inserting a reload in the predecessor | |
1655 | // and using PHI construction to get the value in the other predecessors, do | |
1656 | // it. | |
1657 | DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); | |
1658 | DEBUG(if (!NewInsts.empty()) | |
1659 | dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " | |
1660 | << *NewInsts.back() << '\n'); | |
1661 | ||
1662 | // Assign value numbers to the new instructions. | |
1663 | for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { | |
1664 | // FIXME: We really _ought_ to insert these value numbers into their | |
1665 | // parent's availability map. However, in doing so, we risk getting into | |
1666 | // ordering issues. If a block hasn't been processed yet, we would be | |
1667 | // marking a value as AVAIL-IN, which isn't what we intend. | |
1668 | VN.lookup_or_add(NewInsts[i]); | |
1669 | } | |
1670 | ||
1a4d82fc JJ |
1671 | for (const auto &PredLoad : PredLoads) { |
1672 | BasicBlock *UnavailablePred = PredLoad.first; | |
1673 | Value *LoadPtr = PredLoad.second; | |
223e47cc LB |
1674 | |
1675 | Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, | |
1676 | LI->getAlignment(), | |
1677 | UnavailablePred->getTerminator()); | |
1678 | ||
1a4d82fc JJ |
1679 | // Transfer the old load's AA tags to the new load. |
1680 | AAMDNodes Tags; | |
1681 | LI->getAAMetadata(Tags); | |
1682 | if (Tags) | |
1683 | NewLoad->setAAMetadata(Tags); | |
223e47cc LB |
1684 | |
1685 | // Transfer DebugLoc. | |
1686 | NewLoad->setDebugLoc(LI->getDebugLoc()); | |
1687 | ||
1688 | // Add the newly created load. | |
1689 | ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, | |
1690 | NewLoad)); | |
1691 | MD->invalidateCachedPointerInfo(LoadPtr); | |
1692 | DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); | |
1693 | } | |
1694 | ||
1695 | // Perform PHI construction. | |
1696 | Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); | |
1697 | LI->replaceAllUsesWith(V); | |
1698 | if (isa<PHINode>(V)) | |
1699 | V->takeName(LI); | |
970d7e83 | 1700 | if (V->getType()->getScalarType()->isPointerTy()) |
223e47cc LB |
1701 | MD->invalidateCachedPointerInfo(V); |
1702 | markInstructionForDeletion(LI); | |
1703 | ++NumPRELoad; | |
1704 | return true; | |
1705 | } | |
1706 | ||
1a4d82fc JJ |
1707 | /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are |
1708 | /// non-local by performing PHI construction. | |
1709 | bool GVN::processNonLocalLoad(LoadInst *LI) { | |
1710 | // Step 1: Find the non-local dependencies of the load. | |
1711 | LoadDepVect Deps; | |
85aaf69f | 1712 | MD->getNonLocalPointerDependency(LI, Deps); |
1a4d82fc JJ |
1713 | |
1714 | // If we had to process more than one hundred blocks to find the | |
1715 | // dependencies, this load isn't worth worrying about. Optimizing | |
1716 | // it will be too expensive. | |
1717 | unsigned NumDeps = Deps.size(); | |
1718 | if (NumDeps > 100) | |
1719 | return false; | |
1720 | ||
1721 | // If we had a phi translation failure, we'll have a single entry which is a | |
1722 | // clobber in the current block. Reject this early. | |
1723 | if (NumDeps == 1 && | |
1724 | !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { | |
1725 | DEBUG( | |
1726 | dbgs() << "GVN: non-local load "; | |
1727 | LI->printAsOperand(dbgs()); | |
1728 | dbgs() << " has unknown dependencies\n"; | |
1729 | ); | |
1730 | return false; | |
1731 | } | |
1732 | ||
85aaf69f SL |
1733 | // If this load follows a GEP, see if we can PRE the indices before analyzing. |
1734 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) { | |
1735 | for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(), | |
1736 | OE = GEP->idx_end(); | |
1737 | OI != OE; ++OI) | |
1738 | if (Instruction *I = dyn_cast<Instruction>(OI->get())) | |
1739 | performScalarPRE(I); | |
1740 | } | |
1741 | ||
1a4d82fc JJ |
1742 | // Step 2: Analyze the availability of the load |
1743 | AvailValInBlkVect ValuesPerBlock; | |
1744 | UnavailBlkVect UnavailableBlocks; | |
1745 | AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks); | |
1746 | ||
1747 | // If we have no predecessors that produce a known value for this load, exit | |
1748 | // early. | |
1749 | if (ValuesPerBlock.empty()) | |
1750 | return false; | |
1751 | ||
1752 | // Step 3: Eliminate fully redundancy. | |
1753 | // | |
1754 | // If all of the instructions we depend on produce a known value for this | |
1755 | // load, then it is fully redundant and we can use PHI insertion to compute | |
1756 | // its value. Insert PHIs and remove the fully redundant value now. | |
1757 | if (UnavailableBlocks.empty()) { | |
1758 | DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); | |
1759 | ||
1760 | // Perform PHI construction. | |
1761 | Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); | |
1762 | LI->replaceAllUsesWith(V); | |
1763 | ||
1764 | if (isa<PHINode>(V)) | |
1765 | V->takeName(LI); | |
1766 | if (V->getType()->getScalarType()->isPointerTy()) | |
1767 | MD->invalidateCachedPointerInfo(V); | |
1768 | markInstructionForDeletion(LI); | |
1769 | ++NumGVNLoad; | |
1770 | return true; | |
1771 | } | |
1772 | ||
1773 | // Step 4: Eliminate partial redundancy. | |
1774 | if (!EnablePRE || !EnableLoadPRE) | |
1775 | return false; | |
1776 | ||
1777 | return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); | |
1778 | } | |
1779 | ||
1780 | ||
970d7e83 | 1781 | static void patchReplacementInstruction(Instruction *I, Value *Repl) { |
223e47cc LB |
1782 | // Patch the replacement so that it is not more restrictive than the value |
1783 | // being replaced. | |
1784 | BinaryOperator *Op = dyn_cast<BinaryOperator>(I); | |
1785 | BinaryOperator *ReplOp = dyn_cast<BinaryOperator>(Repl); | |
1786 | if (Op && ReplOp && isa<OverflowingBinaryOperator>(Op) && | |
1787 | isa<OverflowingBinaryOperator>(ReplOp)) { | |
1788 | if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap()) | |
1789 | ReplOp->setHasNoSignedWrap(false); | |
1790 | if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap()) | |
1791 | ReplOp->setHasNoUnsignedWrap(false); | |
1792 | } | |
1793 | if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) { | |
1a4d82fc JJ |
1794 | // FIXME: If both the original and replacement value are part of the |
1795 | // same control-flow region (meaning that the execution of one | |
1796 | // guarentees the executation of the other), then we can combine the | |
1797 | // noalias scopes here and do better than the general conservative | |
1798 | // answer used in combineMetadata(). | |
1799 | ||
1800 | // In general, GVN unifies expressions over different control-flow | |
1801 | // regions, and so we need a conservative combination of the noalias | |
1802 | // scopes. | |
1803 | unsigned KnownIDs[] = { | |
1804 | LLVMContext::MD_tbaa, | |
1805 | LLVMContext::MD_alias_scope, | |
1806 | LLVMContext::MD_noalias, | |
1807 | LLVMContext::MD_range, | |
1808 | LLVMContext::MD_fpmath, | |
1809 | LLVMContext::MD_invariant_load, | |
1810 | }; | |
1811 | combineMetadata(ReplInst, I, KnownIDs); | |
223e47cc LB |
1812 | } |
1813 | } | |
1814 | ||
970d7e83 LB |
1815 | static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { |
1816 | patchReplacementInstruction(I, Repl); | |
223e47cc LB |
1817 | I->replaceAllUsesWith(Repl); |
1818 | } | |
1819 | ||
1820 | /// processLoad - Attempt to eliminate a load, first by eliminating it | |
1821 | /// locally, and then attempting non-local elimination if that fails. | |
1822 | bool GVN::processLoad(LoadInst *L) { | |
1823 | if (!MD) | |
1824 | return false; | |
1825 | ||
1826 | if (!L->isSimple()) | |
1827 | return false; | |
1828 | ||
1829 | if (L->use_empty()) { | |
1830 | markInstructionForDeletion(L); | |
1831 | return true; | |
1832 | } | |
1833 | ||
1834 | // ... to a pointer that has been loaded from before... | |
1835 | MemDepResult Dep = MD->getDependency(L); | |
1836 | ||
1837 | // If we have a clobber and target data is around, see if this is a clobber | |
1838 | // that we can fix up through code synthesis. | |
1a4d82fc | 1839 | if (Dep.isClobber() && DL) { |
223e47cc LB |
1840 | // Check to see if we have something like this: |
1841 | // store i32 123, i32* %P | |
1842 | // %A = bitcast i32* %P to i8* | |
1843 | // %B = gep i8* %A, i32 1 | |
1844 | // %C = load i8* %B | |
1845 | // | |
1846 | // We could do that by recognizing if the clobber instructions are obviously | |
1847 | // a common base + constant offset, and if the previous store (or memset) | |
1848 | // completely covers this load. This sort of thing can happen in bitfield | |
1849 | // access code. | |
1a4d82fc | 1850 | Value *AvailVal = nullptr; |
223e47cc LB |
1851 | if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { |
1852 | int Offset = AnalyzeLoadFromClobberingStore(L->getType(), | |
1853 | L->getPointerOperand(), | |
1a4d82fc | 1854 | DepSI, *DL); |
223e47cc LB |
1855 | if (Offset != -1) |
1856 | AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, | |
1a4d82fc | 1857 | L->getType(), L, *DL); |
223e47cc LB |
1858 | } |
1859 | ||
1860 | // Check to see if we have something like this: | |
1861 | // load i32* P | |
1862 | // load i8* (P+1) | |
1863 | // if we have this, replace the later with an extraction from the former. | |
1864 | if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) { | |
1865 | // If this is a clobber and L is the first instruction in its block, then | |
1866 | // we have the first instruction in the entry block. | |
1867 | if (DepLI == L) | |
1868 | return false; | |
1869 | ||
1870 | int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), | |
1871 | L->getPointerOperand(), | |
1a4d82fc | 1872 | DepLI, *DL); |
223e47cc LB |
1873 | if (Offset != -1) |
1874 | AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); | |
1875 | } | |
1876 | ||
1877 | // If the clobbering value is a memset/memcpy/memmove, see if we can forward | |
1878 | // a value on from it. | |
1879 | if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { | |
1880 | int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), | |
1881 | L->getPointerOperand(), | |
1a4d82fc | 1882 | DepMI, *DL); |
223e47cc | 1883 | if (Offset != -1) |
1a4d82fc | 1884 | AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *DL); |
223e47cc LB |
1885 | } |
1886 | ||
1887 | if (AvailVal) { | |
1888 | DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' | |
1889 | << *AvailVal << '\n' << *L << "\n\n\n"); | |
1890 | ||
1891 | // Replace the load! | |
1892 | L->replaceAllUsesWith(AvailVal); | |
970d7e83 | 1893 | if (AvailVal->getType()->getScalarType()->isPointerTy()) |
223e47cc LB |
1894 | MD->invalidateCachedPointerInfo(AvailVal); |
1895 | markInstructionForDeletion(L); | |
1896 | ++NumGVNLoad; | |
1897 | return true; | |
1898 | } | |
1899 | } | |
1900 | ||
1901 | // If the value isn't available, don't do anything! | |
1902 | if (Dep.isClobber()) { | |
1903 | DEBUG( | |
1904 | // fast print dep, using operator<< on instruction is too slow. | |
1905 | dbgs() << "GVN: load "; | |
1a4d82fc | 1906 | L->printAsOperand(dbgs()); |
223e47cc LB |
1907 | Instruction *I = Dep.getInst(); |
1908 | dbgs() << " is clobbered by " << *I << '\n'; | |
1909 | ); | |
1910 | return false; | |
1911 | } | |
1912 | ||
1913 | // If it is defined in another block, try harder. | |
1914 | if (Dep.isNonLocal()) | |
1915 | return processNonLocalLoad(L); | |
1916 | ||
1917 | if (!Dep.isDef()) { | |
1918 | DEBUG( | |
1919 | // fast print dep, using operator<< on instruction is too slow. | |
1920 | dbgs() << "GVN: load "; | |
1a4d82fc | 1921 | L->printAsOperand(dbgs()); |
223e47cc LB |
1922 | dbgs() << " has unknown dependence\n"; |
1923 | ); | |
1924 | return false; | |
1925 | } | |
1926 | ||
1927 | Instruction *DepInst = Dep.getInst(); | |
1928 | if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { | |
1929 | Value *StoredVal = DepSI->getValueOperand(); | |
1930 | ||
1931 | // The store and load are to a must-aliased pointer, but they may not | |
1932 | // actually have the same type. See if we know how to reuse the stored | |
1933 | // value (depending on its type). | |
1934 | if (StoredVal->getType() != L->getType()) { | |
1a4d82fc | 1935 | if (DL) { |
223e47cc | 1936 | StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), |
1a4d82fc JJ |
1937 | L, *DL); |
1938 | if (!StoredVal) | |
223e47cc LB |
1939 | return false; |
1940 | ||
1941 | DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal | |
1942 | << '\n' << *L << "\n\n\n"); | |
1943 | } | |
1944 | else | |
1945 | return false; | |
1946 | } | |
1947 | ||
1948 | // Remove it! | |
1949 | L->replaceAllUsesWith(StoredVal); | |
970d7e83 | 1950 | if (StoredVal->getType()->getScalarType()->isPointerTy()) |
223e47cc LB |
1951 | MD->invalidateCachedPointerInfo(StoredVal); |
1952 | markInstructionForDeletion(L); | |
1953 | ++NumGVNLoad; | |
1954 | return true; | |
1955 | } | |
1956 | ||
1957 | if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { | |
1958 | Value *AvailableVal = DepLI; | |
1959 | ||
1960 | // The loads are of a must-aliased pointer, but they may not actually have | |
1961 | // the same type. See if we know how to reuse the previously loaded value | |
1962 | // (depending on its type). | |
1963 | if (DepLI->getType() != L->getType()) { | |
1a4d82fc | 1964 | if (DL) { |
223e47cc | 1965 | AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), |
1a4d82fc JJ |
1966 | L, *DL); |
1967 | if (!AvailableVal) | |
223e47cc LB |
1968 | return false; |
1969 | ||
1970 | DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal | |
1971 | << "\n" << *L << "\n\n\n"); | |
1972 | } | |
1973 | else | |
1974 | return false; | |
1975 | } | |
1976 | ||
1977 | // Remove it! | |
970d7e83 LB |
1978 | patchAndReplaceAllUsesWith(L, AvailableVal); |
1979 | if (DepLI->getType()->getScalarType()->isPointerTy()) | |
223e47cc LB |
1980 | MD->invalidateCachedPointerInfo(DepLI); |
1981 | markInstructionForDeletion(L); | |
1982 | ++NumGVNLoad; | |
1983 | return true; | |
1984 | } | |
1985 | ||
1986 | // If this load really doesn't depend on anything, then we must be loading an | |
1987 | // undef value. This can happen when loading for a fresh allocation with no | |
1988 | // intervening stores, for example. | |
1989 | if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) { | |
1990 | L->replaceAllUsesWith(UndefValue::get(L->getType())); | |
1991 | markInstructionForDeletion(L); | |
1992 | ++NumGVNLoad; | |
1993 | return true; | |
1994 | } | |
1995 | ||
1996 | // If this load occurs either right after a lifetime begin, | |
1997 | // then the loaded value is undefined. | |
1998 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) { | |
1999 | if (II->getIntrinsicID() == Intrinsic::lifetime_start) { | |
2000 | L->replaceAllUsesWith(UndefValue::get(L->getType())); | |
2001 | markInstructionForDeletion(L); | |
2002 | ++NumGVNLoad; | |
2003 | return true; | |
2004 | } | |
2005 | } | |
2006 | ||
1a4d82fc JJ |
2007 | // If this load follows a calloc (which zero initializes memory), |
2008 | // then the loaded value is zero | |
2009 | if (isCallocLikeFn(DepInst, TLI)) { | |
2010 | L->replaceAllUsesWith(Constant::getNullValue(L->getType())); | |
2011 | markInstructionForDeletion(L); | |
2012 | ++NumGVNLoad; | |
2013 | return true; | |
2014 | } | |
2015 | ||
223e47cc LB |
2016 | return false; |
2017 | } | |
2018 | ||
2019 | // findLeader - In order to find a leader for a given value number at a | |
2020 | // specific basic block, we first obtain the list of all Values for that number, | |
2021 | // and then scan the list to find one whose block dominates the block in | |
2022 | // question. This is fast because dominator tree queries consist of only | |
2023 | // a few comparisons of DFS numbers. | |
2024 | Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { | |
2025 | LeaderTableEntry Vals = LeaderTable[num]; | |
1a4d82fc | 2026 | if (!Vals.Val) return nullptr; |
223e47cc | 2027 | |
1a4d82fc | 2028 | Value *Val = nullptr; |
223e47cc LB |
2029 | if (DT->dominates(Vals.BB, BB)) { |
2030 | Val = Vals.Val; | |
2031 | if (isa<Constant>(Val)) return Val; | |
2032 | } | |
2033 | ||
2034 | LeaderTableEntry* Next = Vals.Next; | |
2035 | while (Next) { | |
2036 | if (DT->dominates(Next->BB, BB)) { | |
2037 | if (isa<Constant>(Next->Val)) return Next->Val; | |
2038 | if (!Val) Val = Next->Val; | |
2039 | } | |
2040 | ||
2041 | Next = Next->Next; | |
2042 | } | |
2043 | ||
2044 | return Val; | |
2045 | } | |
2046 | ||
2047 | /// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the | |
2048 | /// use is dominated by the given basic block. Returns the number of uses that | |
2049 | /// were replaced. | |
2050 | unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, | |
2051 | const BasicBlockEdge &Root) { | |
2052 | unsigned Count = 0; | |
2053 | for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); | |
2054 | UI != UE; ) { | |
1a4d82fc | 2055 | Use &U = *UI++; |
223e47cc LB |
2056 | |
2057 | if (DT->dominates(Root, U)) { | |
2058 | U.set(To); | |
2059 | ++Count; | |
2060 | } | |
2061 | } | |
2062 | return Count; | |
2063 | } | |
2064 | ||
2065 | /// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return | |
2066 | /// true if every path from the entry block to 'Dst' passes via this edge. In | |
2067 | /// particular 'Dst' must not be reachable via another edge from 'Src'. | |
2068 | static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, | |
2069 | DominatorTree *DT) { | |
2070 | // While in theory it is interesting to consider the case in which Dst has | |
2071 | // more than one predecessor, because Dst might be part of a loop which is | |
2072 | // only reachable from Src, in practice it is pointless since at the time | |
2073 | // GVN runs all such loops have preheaders, which means that Dst will have | |
2074 | // been changed to have only one predecessor, namely Src. | |
2075 | const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); | |
2076 | const BasicBlock *Src = E.getStart(); | |
2077 | assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); | |
2078 | (void)Src; | |
1a4d82fc | 2079 | return Pred != nullptr; |
223e47cc LB |
2080 | } |
2081 | ||
2082 | /// propagateEquality - The given values are known to be equal in every block | |
2083 | /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with | |
2084 | /// 'RHS' everywhere in the scope. Returns whether a change was made. | |
2085 | bool GVN::propagateEquality(Value *LHS, Value *RHS, | |
2086 | const BasicBlockEdge &Root) { | |
2087 | SmallVector<std::pair<Value*, Value*>, 4> Worklist; | |
2088 | Worklist.push_back(std::make_pair(LHS, RHS)); | |
2089 | bool Changed = false; | |
2090 | // For speed, compute a conservative fast approximation to | |
2091 | // DT->dominates(Root, Root.getEnd()); | |
2092 | bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); | |
2093 | ||
2094 | while (!Worklist.empty()) { | |
2095 | std::pair<Value*, Value*> Item = Worklist.pop_back_val(); | |
2096 | LHS = Item.first; RHS = Item.second; | |
2097 | ||
2098 | if (LHS == RHS) continue; | |
2099 | assert(LHS->getType() == RHS->getType() && "Equality but unequal types!"); | |
2100 | ||
2101 | // Don't try to propagate equalities between constants. | |
2102 | if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue; | |
2103 | ||
2104 | // Prefer a constant on the right-hand side, or an Argument if no constants. | |
2105 | if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS))) | |
2106 | std::swap(LHS, RHS); | |
2107 | assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!"); | |
2108 | ||
85aaf69f SL |
2109 | // If there is no obvious reason to prefer the left-hand side over the |
2110 | // right-hand side, ensure the longest lived term is on the right-hand side, | |
2111 | // so the shortest lived term will be replaced by the longest lived. | |
2112 | // This tends to expose more simplifications. | |
223e47cc LB |
2113 | uint32_t LVN = VN.lookup_or_add(LHS); |
2114 | if ((isa<Argument>(LHS) && isa<Argument>(RHS)) || | |
2115 | (isa<Instruction>(LHS) && isa<Instruction>(RHS))) { | |
85aaf69f SL |
2116 | // Move the 'oldest' value to the right-hand side, using the value number |
2117 | // as a proxy for age. | |
223e47cc LB |
2118 | uint32_t RVN = VN.lookup_or_add(RHS); |
2119 | if (LVN < RVN) { | |
2120 | std::swap(LHS, RHS); | |
2121 | LVN = RVN; | |
2122 | } | |
2123 | } | |
2124 | ||
2125 | // If value numbering later sees that an instruction in the scope is equal | |
2126 | // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve | |
2127 | // the invariant that instructions only occur in the leader table for their | |
2128 | // own value number (this is used by removeFromLeaderTable), do not do this | |
2129 | // if RHS is an instruction (if an instruction in the scope is morphed into | |
2130 | // LHS then it will be turned into RHS by the next GVN iteration anyway, so | |
2131 | // using the leader table is about compiling faster, not optimizing better). | |
2132 | // The leader table only tracks basic blocks, not edges. Only add to if we | |
2133 | // have the simple case where the edge dominates the end. | |
2134 | if (RootDominatesEnd && !isa<Instruction>(RHS)) | |
2135 | addToLeaderTable(LVN, RHS, Root.getEnd()); | |
2136 | ||
2137 | // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As | |
2138 | // LHS always has at least one use that is not dominated by Root, this will | |
2139 | // never do anything if LHS has only one use. | |
2140 | if (!LHS->hasOneUse()) { | |
2141 | unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root); | |
2142 | Changed |= NumReplacements > 0; | |
2143 | NumGVNEqProp += NumReplacements; | |
2144 | } | |
2145 | ||
85aaf69f SL |
2146 | // Now try to deduce additional equalities from this one. For example, if |
2147 | // the known equality was "(A != B)" == "false" then it follows that A and B | |
2148 | // are equal in the scope. Only boolean equalities with an explicit true or | |
2149 | // false RHS are currently supported. | |
223e47cc LB |
2150 | if (!RHS->getType()->isIntegerTy(1)) |
2151 | // Not a boolean equality - bail out. | |
2152 | continue; | |
2153 | ConstantInt *CI = dyn_cast<ConstantInt>(RHS); | |
2154 | if (!CI) | |
2155 | // RHS neither 'true' nor 'false' - bail out. | |
2156 | continue; | |
2157 | // Whether RHS equals 'true'. Otherwise it equals 'false'. | |
2158 | bool isKnownTrue = CI->isAllOnesValue(); | |
2159 | bool isKnownFalse = !isKnownTrue; | |
2160 | ||
2161 | // If "A && B" is known true then both A and B are known true. If "A || B" | |
2162 | // is known false then both A and B are known false. | |
2163 | Value *A, *B; | |
2164 | if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || | |
2165 | (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { | |
2166 | Worklist.push_back(std::make_pair(A, RHS)); | |
2167 | Worklist.push_back(std::make_pair(B, RHS)); | |
2168 | continue; | |
2169 | } | |
2170 | ||
2171 | // If we are propagating an equality like "(A == B)" == "true" then also | |
2172 | // propagate the equality A == B. When propagating a comparison such as | |
2173 | // "(A >= B)" == "true", replace all instances of "A < B" with "false". | |
85aaf69f | 2174 | if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) { |
223e47cc LB |
2175 | Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); |
2176 | ||
2177 | // If "A == B" is known true, or "A != B" is known false, then replace | |
2178 | // A with B everywhere in the scope. | |
2179 | if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || | |
2180 | (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) | |
2181 | Worklist.push_back(std::make_pair(Op0, Op1)); | |
2182 | ||
85aaf69f SL |
2183 | // Handle the floating point versions of equality comparisons too. |
2184 | if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) || | |
2185 | (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) { | |
2186 | // Floating point -0.0 and 0.0 compare equal, so we can't | |
2187 | // propagate a constant based on that comparison. | |
2188 | // FIXME: We should do this optimization if 'no signed zeros' is | |
2189 | // applicable via an instruction-level fast-math-flag or some other | |
2190 | // indicator that relaxed FP semantics are being used. | |
2191 | if (!isa<ConstantFP>(Op1) || !cast<ConstantFP>(Op1)->isZero()) | |
2192 | Worklist.push_back(std::make_pair(Op0, Op1)); | |
2193 | } | |
2194 | ||
223e47cc LB |
2195 | // If "A >= B" is known true, replace "A < B" with false everywhere. |
2196 | CmpInst::Predicate NotPred = Cmp->getInversePredicate(); | |
2197 | Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); | |
85aaf69f SL |
2198 | // Since we don't have the instruction "A < B" immediately to hand, work |
2199 | // out the value number that it would have and use that to find an | |
2200 | // appropriate instruction (if any). | |
223e47cc LB |
2201 | uint32_t NextNum = VN.getNextUnusedValueNumber(); |
2202 | uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1); | |
2203 | // If the number we were assigned was brand new then there is no point in | |
2204 | // looking for an instruction realizing it: there cannot be one! | |
2205 | if (Num < NextNum) { | |
2206 | Value *NotCmp = findLeader(Root.getEnd(), Num); | |
2207 | if (NotCmp && isa<Instruction>(NotCmp)) { | |
2208 | unsigned NumReplacements = | |
2209 | replaceAllDominatedUsesWith(NotCmp, NotVal, Root); | |
2210 | Changed |= NumReplacements > 0; | |
2211 | NumGVNEqProp += NumReplacements; | |
2212 | } | |
2213 | } | |
2214 | // Ensure that any instruction in scope that gets the "A < B" value number | |
2215 | // is replaced with false. | |
2216 | // The leader table only tracks basic blocks, not edges. Only add to if we | |
2217 | // have the simple case where the edge dominates the end. | |
2218 | if (RootDominatesEnd) | |
2219 | addToLeaderTable(Num, NotVal, Root.getEnd()); | |
2220 | ||
2221 | continue; | |
2222 | } | |
2223 | } | |
2224 | ||
2225 | return Changed; | |
2226 | } | |
2227 | ||
2228 | /// processInstruction - When calculating availability, handle an instruction | |
2229 | /// by inserting it into the appropriate sets | |
2230 | bool GVN::processInstruction(Instruction *I) { | |
2231 | // Ignore dbg info intrinsics. | |
2232 | if (isa<DbgInfoIntrinsic>(I)) | |
2233 | return false; | |
2234 | ||
2235 | // If the instruction can be easily simplified then do so now in preference | |
2236 | // to value numbering it. Value numbering often exposes redundancies, for | |
2237 | // example if it determines that %y is equal to %x then the instruction | |
2238 | // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. | |
85aaf69f | 2239 | if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { |
223e47cc | 2240 | I->replaceAllUsesWith(V); |
970d7e83 | 2241 | if (MD && V->getType()->getScalarType()->isPointerTy()) |
223e47cc LB |
2242 | MD->invalidateCachedPointerInfo(V); |
2243 | markInstructionForDeletion(I); | |
2244 | ++NumGVNSimpl; | |
2245 | return true; | |
2246 | } | |
2247 | ||
2248 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) { | |
2249 | if (processLoad(LI)) | |
2250 | return true; | |
2251 | ||
2252 | unsigned Num = VN.lookup_or_add(LI); | |
2253 | addToLeaderTable(Num, LI, LI->getParent()); | |
2254 | return false; | |
2255 | } | |
2256 | ||
2257 | // For conditional branches, we can perform simple conditional propagation on | |
2258 | // the condition value itself. | |
2259 | if (BranchInst *BI = dyn_cast<BranchInst>(I)) { | |
1a4d82fc | 2260 | if (!BI->isConditional()) |
223e47cc LB |
2261 | return false; |
2262 | ||
1a4d82fc JJ |
2263 | if (isa<Constant>(BI->getCondition())) |
2264 | return processFoldableCondBr(BI); | |
223e47cc | 2265 | |
1a4d82fc | 2266 | Value *BranchCond = BI->getCondition(); |
223e47cc LB |
2267 | BasicBlock *TrueSucc = BI->getSuccessor(0); |
2268 | BasicBlock *FalseSucc = BI->getSuccessor(1); | |
2269 | // Avoid multiple edges early. | |
2270 | if (TrueSucc == FalseSucc) | |
2271 | return false; | |
2272 | ||
2273 | BasicBlock *Parent = BI->getParent(); | |
2274 | bool Changed = false; | |
2275 | ||
2276 | Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); | |
2277 | BasicBlockEdge TrueE(Parent, TrueSucc); | |
2278 | Changed |= propagateEquality(BranchCond, TrueVal, TrueE); | |
2279 | ||
2280 | Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); | |
2281 | BasicBlockEdge FalseE(Parent, FalseSucc); | |
2282 | Changed |= propagateEquality(BranchCond, FalseVal, FalseE); | |
2283 | ||
2284 | return Changed; | |
2285 | } | |
2286 | ||
2287 | // For switches, propagate the case values into the case destinations. | |
2288 | if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { | |
2289 | Value *SwitchCond = SI->getCondition(); | |
2290 | BasicBlock *Parent = SI->getParent(); | |
2291 | bool Changed = false; | |
2292 | ||
2293 | // Remember how many outgoing edges there are to every successor. | |
2294 | SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; | |
2295 | for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i) | |
2296 | ++SwitchEdges[SI->getSuccessor(i)]; | |
2297 | ||
2298 | for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); | |
2299 | i != e; ++i) { | |
2300 | BasicBlock *Dst = i.getCaseSuccessor(); | |
2301 | // If there is only a single edge, propagate the case value into it. | |
2302 | if (SwitchEdges.lookup(Dst) == 1) { | |
2303 | BasicBlockEdge E(Parent, Dst); | |
2304 | Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); | |
2305 | } | |
2306 | } | |
2307 | return Changed; | |
2308 | } | |
2309 | ||
2310 | // Instructions with void type don't return a value, so there's | |
2311 | // no point in trying to find redundancies in them. | |
2312 | if (I->getType()->isVoidTy()) return false; | |
2313 | ||
2314 | uint32_t NextNum = VN.getNextUnusedValueNumber(); | |
2315 | unsigned Num = VN.lookup_or_add(I); | |
2316 | ||
2317 | // Allocations are always uniquely numbered, so we can save time and memory | |
2318 | // by fast failing them. | |
2319 | if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { | |
2320 | addToLeaderTable(Num, I, I->getParent()); | |
2321 | return false; | |
2322 | } | |
2323 | ||
2324 | // If the number we were assigned was a brand new VN, then we don't | |
2325 | // need to do a lookup to see if the number already exists | |
2326 | // somewhere in the domtree: it can't! | |
2327 | if (Num >= NextNum) { | |
2328 | addToLeaderTable(Num, I, I->getParent()); | |
2329 | return false; | |
2330 | } | |
2331 | ||
2332 | // Perform fast-path value-number based elimination of values inherited from | |
2333 | // dominators. | |
2334 | Value *repl = findLeader(I->getParent(), Num); | |
1a4d82fc | 2335 | if (!repl) { |
223e47cc LB |
2336 | // Failure, just remember this instance for future use. |
2337 | addToLeaderTable(Num, I, I->getParent()); | |
2338 | return false; | |
2339 | } | |
2340 | ||
2341 | // Remove it! | |
970d7e83 LB |
2342 | patchAndReplaceAllUsesWith(I, repl); |
2343 | if (MD && repl->getType()->getScalarType()->isPointerTy()) | |
223e47cc LB |
2344 | MD->invalidateCachedPointerInfo(repl); |
2345 | markInstructionForDeletion(I); | |
2346 | return true; | |
2347 | } | |
2348 | ||
2349 | /// runOnFunction - This is the main transformation entry point for a function. | |
2350 | bool GVN::runOnFunction(Function& F) { | |
1a4d82fc JJ |
2351 | if (skipOptnoneFunction(F)) |
2352 | return false; | |
2353 | ||
223e47cc LB |
2354 | if (!NoLoads) |
2355 | MD = &getAnalysis<MemoryDependenceAnalysis>(); | |
1a4d82fc JJ |
2356 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
2357 | DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); | |
2358 | DL = DLP ? &DLP->getDataLayout() : nullptr; | |
85aaf69f | 2359 | AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); |
223e47cc LB |
2360 | TLI = &getAnalysis<TargetLibraryInfo>(); |
2361 | VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); | |
2362 | VN.setMemDep(MD); | |
2363 | VN.setDomTree(DT); | |
2364 | ||
2365 | bool Changed = false; | |
2366 | bool ShouldContinue = true; | |
2367 | ||
2368 | // Merge unconditional branches, allowing PRE to catch more | |
2369 | // optimization opportunities. | |
2370 | for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { | |
2371 | BasicBlock *BB = FI++; | |
2372 | ||
2373 | bool removedBlock = MergeBlockIntoPredecessor(BB, this); | |
2374 | if (removedBlock) ++NumGVNBlocks; | |
2375 | ||
2376 | Changed |= removedBlock; | |
2377 | } | |
2378 | ||
2379 | unsigned Iteration = 0; | |
2380 | while (ShouldContinue) { | |
2381 | DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); | |
2382 | ShouldContinue = iterateOnFunction(F); | |
223e47cc LB |
2383 | Changed |= ShouldContinue; |
2384 | ++Iteration; | |
2385 | } | |
2386 | ||
2387 | if (EnablePRE) { | |
1a4d82fc JJ |
2388 | // Fabricate val-num for dead-code in order to suppress assertion in |
2389 | // performPRE(). | |
2390 | assignValNumForDeadCode(); | |
223e47cc LB |
2391 | bool PREChanged = true; |
2392 | while (PREChanged) { | |
2393 | PREChanged = performPRE(F); | |
2394 | Changed |= PREChanged; | |
2395 | } | |
2396 | } | |
1a4d82fc | 2397 | |
223e47cc LB |
2398 | // FIXME: Should perform GVN again after PRE does something. PRE can move |
2399 | // computations into blocks where they become fully redundant. Note that | |
2400 | // we can't do this until PRE's critical edge splitting updates memdep. | |
2401 | // Actually, when this happens, we should just fully integrate PRE into GVN. | |
2402 | ||
2403 | cleanupGlobalSets(); | |
1a4d82fc JJ |
2404 | // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each |
2405 | // iteration. | |
2406 | DeadBlocks.clear(); | |
223e47cc LB |
2407 | |
2408 | return Changed; | |
2409 | } | |
2410 | ||
2411 | ||
2412 | bool GVN::processBlock(BasicBlock *BB) { | |
2413 | // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function | |
2414 | // (and incrementing BI before processing an instruction). | |
2415 | assert(InstrsToErase.empty() && | |
2416 | "We expect InstrsToErase to be empty across iterations"); | |
1a4d82fc JJ |
2417 | if (DeadBlocks.count(BB)) |
2418 | return false; | |
2419 | ||
223e47cc LB |
2420 | bool ChangedFunction = false; |
2421 | ||
2422 | for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); | |
2423 | BI != BE;) { | |
2424 | ChangedFunction |= processInstruction(BI); | |
2425 | if (InstrsToErase.empty()) { | |
2426 | ++BI; | |
2427 | continue; | |
2428 | } | |
2429 | ||
2430 | // If we need some instructions deleted, do it now. | |
2431 | NumGVNInstr += InstrsToErase.size(); | |
2432 | ||
2433 | // Avoid iterator invalidation. | |
2434 | bool AtStart = BI == BB->begin(); | |
2435 | if (!AtStart) | |
2436 | --BI; | |
2437 | ||
1a4d82fc | 2438 | for (SmallVectorImpl<Instruction *>::iterator I = InstrsToErase.begin(), |
223e47cc LB |
2439 | E = InstrsToErase.end(); I != E; ++I) { |
2440 | DEBUG(dbgs() << "GVN removed: " << **I << '\n'); | |
2441 | if (MD) MD->removeInstruction(*I); | |
223e47cc | 2442 | DEBUG(verifyRemoved(*I)); |
970d7e83 | 2443 | (*I)->eraseFromParent(); |
223e47cc LB |
2444 | } |
2445 | InstrsToErase.clear(); | |
2446 | ||
2447 | if (AtStart) | |
2448 | BI = BB->begin(); | |
2449 | else | |
2450 | ++BI; | |
2451 | } | |
2452 | ||
2453 | return ChangedFunction; | |
2454 | } | |
2455 | ||
85aaf69f | 2456 | bool GVN::performScalarPRE(Instruction *CurInst) { |
970d7e83 | 2457 | SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap; |
223e47cc | 2458 | |
85aaf69f SL |
2459 | if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) || |
2460 | isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() || | |
2461 | CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || | |
2462 | isa<DbgInfoIntrinsic>(CurInst)) | |
2463 | return false; | |
223e47cc | 2464 | |
85aaf69f SL |
2465 | // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from |
2466 | // sinking the compare again, and it would force the code generator to | |
2467 | // move the i1 from processor flags or predicate registers into a general | |
2468 | // purpose register. | |
2469 | if (isa<CmpInst>(CurInst)) | |
2470 | return false; | |
223e47cc | 2471 | |
85aaf69f SL |
2472 | // We don't currently value number ANY inline asm calls. |
2473 | if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) | |
2474 | if (CallI->isInlineAsm()) | |
2475 | return false; | |
223e47cc | 2476 | |
85aaf69f SL |
2477 | uint32_t ValNo = VN.lookup(CurInst); |
2478 | ||
2479 | // Look for the predecessors for PRE opportunities. We're | |
2480 | // only trying to solve the basic diamond case, where | |
2481 | // a value is computed in the successor and one predecessor, | |
2482 | // but not the other. We also explicitly disallow cases | |
2483 | // where the successor is its own predecessor, because they're | |
2484 | // more complicated to get right. | |
2485 | unsigned NumWith = 0; | |
2486 | unsigned NumWithout = 0; | |
2487 | BasicBlock *PREPred = nullptr; | |
2488 | BasicBlock *CurrentBlock = CurInst->getParent(); | |
2489 | predMap.clear(); | |
2490 | ||
2491 | for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); | |
2492 | PI != PE; ++PI) { | |
2493 | BasicBlock *P = *PI; | |
2494 | // We're not interested in PRE where the block is its | |
2495 | // own predecessor, or in blocks with predecessors | |
2496 | // that are not reachable. | |
2497 | if (P == CurrentBlock) { | |
2498 | NumWithout = 2; | |
2499 | break; | |
2500 | } else if (!DT->isReachableFromEntry(P)) { | |
2501 | NumWithout = 2; | |
2502 | break; | |
2503 | } | |
223e47cc | 2504 | |
85aaf69f SL |
2505 | Value *predV = findLeader(P, ValNo); |
2506 | if (!predV) { | |
2507 | predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); | |
2508 | PREPred = P; | |
2509 | ++NumWithout; | |
2510 | } else if (predV == CurInst) { | |
2511 | /* CurInst dominates this predecessor. */ | |
2512 | NumWithout = 2; | |
2513 | break; | |
2514 | } else { | |
2515 | predMap.push_back(std::make_pair(predV, P)); | |
2516 | ++NumWith; | |
2517 | } | |
2518 | } | |
223e47cc | 2519 | |
85aaf69f SL |
2520 | // Don't do PRE when it might increase code size, i.e. when |
2521 | // we would need to insert instructions in more than one pred. | |
2522 | if (NumWithout != 1 || NumWith == 0) | |
2523 | return false; | |
223e47cc | 2524 | |
85aaf69f SL |
2525 | // Don't do PRE across indirect branch. |
2526 | if (isa<IndirectBrInst>(PREPred->getTerminator())) | |
2527 | return false; | |
223e47cc | 2528 | |
85aaf69f SL |
2529 | // We can't do PRE safely on a critical edge, so instead we schedule |
2530 | // the edge to be split and perform the PRE the next time we iterate | |
2531 | // on the function. | |
2532 | unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); | |
2533 | if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { | |
2534 | toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); | |
2535 | return false; | |
2536 | } | |
223e47cc | 2537 | |
85aaf69f SL |
2538 | // Instantiate the expression in the predecessor that lacked it. |
2539 | // Because we are going top-down through the block, all value numbers | |
2540 | // will be available in the predecessor by the time we need them. Any | |
2541 | // that weren't originally present will have been instantiated earlier | |
2542 | // in this loop. | |
2543 | Instruction *PREInstr = CurInst->clone(); | |
2544 | bool success = true; | |
2545 | for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { | |
2546 | Value *Op = PREInstr->getOperand(i); | |
2547 | if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) | |
2548 | continue; | |
223e47cc | 2549 | |
85aaf69f SL |
2550 | if (Value *V = findLeader(PREPred, VN.lookup(Op))) { |
2551 | PREInstr->setOperand(i, V); | |
2552 | } else { | |
2553 | success = false; | |
2554 | break; | |
2555 | } | |
2556 | } | |
223e47cc | 2557 | |
85aaf69f SL |
2558 | // Fail out if we encounter an operand that is not available in |
2559 | // the PRE predecessor. This is typically because of loads which | |
2560 | // are not value numbered precisely. | |
2561 | if (!success) { | |
2562 | DEBUG(verifyRemoved(PREInstr)); | |
2563 | delete PREInstr; | |
2564 | return false; | |
2565 | } | |
223e47cc | 2566 | |
85aaf69f SL |
2567 | PREInstr->insertBefore(PREPred->getTerminator()); |
2568 | PREInstr->setName(CurInst->getName() + ".pre"); | |
2569 | PREInstr->setDebugLoc(CurInst->getDebugLoc()); | |
2570 | VN.add(PREInstr, ValNo); | |
2571 | ++NumGVNPRE; | |
223e47cc | 2572 | |
85aaf69f SL |
2573 | // Update the availability map to include the new instruction. |
2574 | addToLeaderTable(ValNo, PREInstr, PREPred); | |
223e47cc | 2575 | |
85aaf69f SL |
2576 | // Create a PHI to make the value available in this block. |
2577 | PHINode *Phi = | |
2578 | PHINode::Create(CurInst->getType(), predMap.size(), | |
2579 | CurInst->getName() + ".pre-phi", CurrentBlock->begin()); | |
2580 | for (unsigned i = 0, e = predMap.size(); i != e; ++i) { | |
2581 | if (Value *V = predMap[i].first) | |
2582 | Phi->addIncoming(V, predMap[i].second); | |
2583 | else | |
2584 | Phi->addIncoming(PREInstr, PREPred); | |
2585 | } | |
2586 | ||
2587 | VN.add(Phi, ValNo); | |
2588 | addToLeaderTable(ValNo, Phi, CurrentBlock); | |
2589 | Phi->setDebugLoc(CurInst->getDebugLoc()); | |
2590 | CurInst->replaceAllUsesWith(Phi); | |
2591 | if (Phi->getType()->getScalarType()->isPointerTy()) { | |
2592 | // Because we have added a PHI-use of the pointer value, it has now | |
2593 | // "escaped" from alias analysis' perspective. We need to inform | |
2594 | // AA of this. | |
2595 | for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) { | |
2596 | unsigned jj = PHINode::getOperandNumForIncomingValue(ii); | |
2597 | VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); | |
2598 | } | |
223e47cc | 2599 | |
85aaf69f SL |
2600 | if (MD) |
2601 | MD->invalidateCachedPointerInfo(Phi); | |
2602 | } | |
2603 | VN.erase(CurInst); | |
2604 | removeFromLeaderTable(ValNo, CurInst, CurrentBlock); | |
223e47cc | 2605 | |
85aaf69f SL |
2606 | DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); |
2607 | if (MD) | |
2608 | MD->removeInstruction(CurInst); | |
2609 | DEBUG(verifyRemoved(CurInst)); | |
2610 | CurInst->eraseFromParent(); | |
2611 | return true; | |
2612 | } | |
2613 | ||
2614 | /// performPRE - Perform a purely local form of PRE that looks for diamond | |
2615 | /// control flow patterns and attempts to perform simple PRE at the join point. | |
2616 | bool GVN::performPRE(Function &F) { | |
2617 | bool Changed = false; | |
2618 | for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { | |
2619 | // Nothing to PRE in the entry block. | |
2620 | if (CurrentBlock == &F.getEntryBlock()) | |
2621 | continue; | |
2622 | ||
2623 | // Don't perform PRE on a landing pad. | |
2624 | if (CurrentBlock->isLandingPad()) | |
2625 | continue; | |
2626 | ||
2627 | for (BasicBlock::iterator BI = CurrentBlock->begin(), | |
2628 | BE = CurrentBlock->end(); | |
2629 | BI != BE;) { | |
2630 | Instruction *CurInst = BI++; | |
2631 | Changed = performScalarPRE(CurInst); | |
223e47cc LB |
2632 | } |
2633 | } | |
2634 | ||
2635 | if (splitCriticalEdges()) | |
2636 | Changed = true; | |
2637 | ||
2638 | return Changed; | |
2639 | } | |
2640 | ||
1a4d82fc JJ |
2641 | /// Split the critical edge connecting the given two blocks, and return |
2642 | /// the block inserted to the critical edge. | |
2643 | BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { | |
2644 | BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this); | |
2645 | if (MD) | |
2646 | MD->invalidateCachedPredecessors(); | |
2647 | return BB; | |
2648 | } | |
2649 | ||
223e47cc LB |
2650 | /// splitCriticalEdges - Split critical edges found during the previous |
2651 | /// iteration that may enable further optimization. | |
2652 | bool GVN::splitCriticalEdges() { | |
2653 | if (toSplit.empty()) | |
2654 | return false; | |
2655 | do { | |
2656 | std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); | |
2657 | SplitCriticalEdge(Edge.first, Edge.second, this); | |
2658 | } while (!toSplit.empty()); | |
2659 | if (MD) MD->invalidateCachedPredecessors(); | |
2660 | return true; | |
2661 | } | |
2662 | ||
2663 | /// iterateOnFunction - Executes one iteration of GVN | |
2664 | bool GVN::iterateOnFunction(Function &F) { | |
2665 | cleanupGlobalSets(); | |
2666 | ||
2667 | // Top-down walk of the dominator tree | |
2668 | bool Changed = false; | |
1a4d82fc JJ |
2669 | // Save the blocks this function have before transformation begins. GVN may |
2670 | // split critical edge, and hence may invalidate the RPO/DT iterator. | |
2671 | // | |
2672 | std::vector<BasicBlock *> BBVect; | |
2673 | BBVect.reserve(256); | |
85aaf69f SL |
2674 | // Needed for value numbering with phi construction to work. |
2675 | ReversePostOrderTraversal<Function *> RPOT(&F); | |
2676 | for (ReversePostOrderTraversal<Function *>::rpo_iterator RI = RPOT.begin(), | |
2677 | RE = RPOT.end(); | |
2678 | RI != RE; ++RI) | |
2679 | BBVect.push_back(*RI); | |
1a4d82fc JJ |
2680 | |
2681 | for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end(); | |
2682 | I != E; I++) | |
2683 | Changed |= processBlock(*I); | |
223e47cc LB |
2684 | |
2685 | return Changed; | |
2686 | } | |
2687 | ||
2688 | void GVN::cleanupGlobalSets() { | |
2689 | VN.clear(); | |
2690 | LeaderTable.clear(); | |
2691 | TableAllocator.Reset(); | |
2692 | } | |
2693 | ||
2694 | /// verifyRemoved - Verify that the specified instruction does not occur in our | |
2695 | /// internal data structures. | |
2696 | void GVN::verifyRemoved(const Instruction *Inst) const { | |
2697 | VN.verifyRemoved(Inst); | |
2698 | ||
2699 | // Walk through the value number scope to make sure the instruction isn't | |
2700 | // ferreted away in it. | |
2701 | for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator | |
2702 | I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { | |
2703 | const LeaderTableEntry *Node = &I->second; | |
2704 | assert(Node->Val != Inst && "Inst still in value numbering scope!"); | |
2705 | ||
2706 | while (Node->Next) { | |
2707 | Node = Node->Next; | |
2708 | assert(Node->Val != Inst && "Inst still in value numbering scope!"); | |
2709 | } | |
2710 | } | |
2711 | } | |
1a4d82fc JJ |
2712 | |
2713 | // BB is declared dead, which implied other blocks become dead as well. This | |
2714 | // function is to add all these blocks to "DeadBlocks". For the dead blocks' | |
2715 | // live successors, update their phi nodes by replacing the operands | |
2716 | // corresponding to dead blocks with UndefVal. | |
2717 | // | |
2718 | void GVN::addDeadBlock(BasicBlock *BB) { | |
2719 | SmallVector<BasicBlock *, 4> NewDead; | |
2720 | SmallSetVector<BasicBlock *, 4> DF; | |
2721 | ||
2722 | NewDead.push_back(BB); | |
2723 | while (!NewDead.empty()) { | |
2724 | BasicBlock *D = NewDead.pop_back_val(); | |
2725 | if (DeadBlocks.count(D)) | |
2726 | continue; | |
2727 | ||
2728 | // All blocks dominated by D are dead. | |
2729 | SmallVector<BasicBlock *, 8> Dom; | |
2730 | DT->getDescendants(D, Dom); | |
2731 | DeadBlocks.insert(Dom.begin(), Dom.end()); | |
2732 | ||
2733 | // Figure out the dominance-frontier(D). | |
2734 | for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(), | |
2735 | E = Dom.end(); I != E; I++) { | |
2736 | BasicBlock *B = *I; | |
2737 | for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) { | |
2738 | BasicBlock *S = *SI; | |
2739 | if (DeadBlocks.count(S)) | |
2740 | continue; | |
2741 | ||
2742 | bool AllPredDead = true; | |
2743 | for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++) | |
2744 | if (!DeadBlocks.count(*PI)) { | |
2745 | AllPredDead = false; | |
2746 | break; | |
2747 | } | |
2748 | ||
2749 | if (!AllPredDead) { | |
2750 | // S could be proved dead later on. That is why we don't update phi | |
2751 | // operands at this moment. | |
2752 | DF.insert(S); | |
2753 | } else { | |
2754 | // While S is not dominated by D, it is dead by now. This could take | |
2755 | // place if S already have a dead predecessor before D is declared | |
2756 | // dead. | |
2757 | NewDead.push_back(S); | |
2758 | } | |
2759 | } | |
2760 | } | |
2761 | } | |
2762 | ||
2763 | // For the dead blocks' live successors, update their phi nodes by replacing | |
2764 | // the operands corresponding to dead blocks with UndefVal. | |
2765 | for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end(); | |
2766 | I != E; I++) { | |
2767 | BasicBlock *B = *I; | |
2768 | if (DeadBlocks.count(B)) | |
2769 | continue; | |
2770 | ||
2771 | SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B)); | |
2772 | for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(), | |
2773 | PE = Preds.end(); PI != PE; PI++) { | |
2774 | BasicBlock *P = *PI; | |
2775 | ||
2776 | if (!DeadBlocks.count(P)) | |
2777 | continue; | |
2778 | ||
2779 | if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) { | |
2780 | if (BasicBlock *S = splitCriticalEdges(P, B)) | |
2781 | DeadBlocks.insert(P = S); | |
2782 | } | |
2783 | ||
2784 | for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) { | |
2785 | PHINode &Phi = cast<PHINode>(*II); | |
2786 | Phi.setIncomingValue(Phi.getBasicBlockIndex(P), | |
2787 | UndefValue::get(Phi.getType())); | |
2788 | } | |
2789 | } | |
2790 | } | |
2791 | } | |
2792 | ||
2793 | // If the given branch is recognized as a foldable branch (i.e. conditional | |
2794 | // branch with constant condition), it will perform following analyses and | |
2795 | // transformation. | |
2796 | // 1) If the dead out-coming edge is a critical-edge, split it. Let | |
2797 | // R be the target of the dead out-coming edge. | |
2798 | // 1) Identify the set of dead blocks implied by the branch's dead outcoming | |
2799 | // edge. The result of this step will be {X| X is dominated by R} | |
2800 | // 2) Identify those blocks which haves at least one dead prodecessor. The | |
2801 | // result of this step will be dominance-frontier(R). | |
2802 | // 3) Update the PHIs in DF(R) by replacing the operands corresponding to | |
2803 | // dead blocks with "UndefVal" in an hope these PHIs will optimized away. | |
2804 | // | |
2805 | // Return true iff *NEW* dead code are found. | |
2806 | bool GVN::processFoldableCondBr(BranchInst *BI) { | |
2807 | if (!BI || BI->isUnconditional()) | |
2808 | return false; | |
2809 | ||
2810 | ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); | |
2811 | if (!Cond) | |
2812 | return false; | |
2813 | ||
2814 | BasicBlock *DeadRoot = Cond->getZExtValue() ? | |
2815 | BI->getSuccessor(1) : BI->getSuccessor(0); | |
2816 | if (DeadBlocks.count(DeadRoot)) | |
2817 | return false; | |
2818 | ||
2819 | if (!DeadRoot->getSinglePredecessor()) | |
2820 | DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot); | |
2821 | ||
2822 | addDeadBlock(DeadRoot); | |
2823 | return true; | |
2824 | } | |
2825 | ||
2826 | // performPRE() will trigger assert if it comes across an instruction without | |
2827 | // associated val-num. As it normally has far more live instructions than dead | |
2828 | // instructions, it makes more sense just to "fabricate" a val-number for the | |
2829 | // dead code than checking if instruction involved is dead or not. | |
2830 | void GVN::assignValNumForDeadCode() { | |
2831 | for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(), | |
2832 | E = DeadBlocks.end(); I != E; I++) { | |
2833 | BasicBlock *BB = *I; | |
2834 | for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); | |
2835 | II != EE; II++) { | |
2836 | Instruction *Inst = &*II; | |
2837 | unsigned ValNum = VN.lookup_or_add(Inst); | |
2838 | addToLeaderTable(ValNum, Inst, BB); | |
2839 | } | |
2840 | } | |
2841 | } |