]>
Commit | Line | Data |
---|---|---|
1a4d82fc | 1 | //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===// |
223e47cc LB |
2 | // |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file implements an analysis that determines, for a given memory | |
1a4d82fc | 11 | // operation, what preceding memory operations it depends on. It builds on |
223e47cc LB |
12 | // alias analysis information, and tries to provide a lazy, caching interface to |
13 | // a common kind of alias information query. | |
14 | // | |
15 | //===----------------------------------------------------------------------===// | |
16 | ||
223e47cc | 17 | #include "llvm/Analysis/MemoryDependenceAnalysis.h" |
970d7e83 LB |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/Statistic.h" | |
223e47cc | 20 | #include "llvm/Analysis/AliasAnalysis.h" |
85aaf69f | 21 | #include "llvm/Analysis/AssumptionCache.h" |
223e47cc LB |
22 | #include "llvm/Analysis/InstructionSimplify.h" |
23 | #include "llvm/Analysis/MemoryBuiltins.h" | |
24 | #include "llvm/Analysis/PHITransAddr.h" | |
25 | #include "llvm/Analysis/ValueTracking.h" | |
970d7e83 | 26 | #include "llvm/IR/DataLayout.h" |
1a4d82fc | 27 | #include "llvm/IR/Dominators.h" |
970d7e83 LB |
28 | #include "llvm/IR/Function.h" |
29 | #include "llvm/IR/Instructions.h" | |
30 | #include "llvm/IR/IntrinsicInst.h" | |
31 | #include "llvm/IR/LLVMContext.h" | |
1a4d82fc | 32 | #include "llvm/IR/PredIteratorCache.h" |
223e47cc | 33 | #include "llvm/Support/Debug.h" |
223e47cc LB |
34 | using namespace llvm; |
35 | ||
1a4d82fc JJ |
36 | #define DEBUG_TYPE "memdep" |
37 | ||
223e47cc LB |
38 | STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); |
39 | STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); | |
40 | STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); | |
41 | ||
42 | STATISTIC(NumCacheNonLocalPtr, | |
43 | "Number of fully cached non-local ptr responses"); | |
44 | STATISTIC(NumCacheDirtyNonLocalPtr, | |
45 | "Number of cached, but dirty, non-local ptr responses"); | |
46 | STATISTIC(NumUncacheNonLocalPtr, | |
47 | "Number of uncached non-local ptr responses"); | |
48 | STATISTIC(NumCacheCompleteNonLocalPtr, | |
49 | "Number of block queries that were completely cached"); | |
50 | ||
51 | // Limit for the number of instructions to scan in a block. | |
85aaf69f SL |
52 | static const unsigned int BlockScanLimit = 100; |
53 | ||
54 | // Limit on the number of memdep results to process. | |
55 | static const unsigned int NumResultsLimit = 100; | |
223e47cc LB |
56 | |
57 | char MemoryDependenceAnalysis::ID = 0; | |
1a4d82fc | 58 | |
223e47cc LB |
59 | // Register this pass... |
60 | INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", | |
61 | "Memory Dependence Analysis", false, true) | |
85aaf69f | 62 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
223e47cc LB |
63 | INITIALIZE_AG_DEPENDENCY(AliasAnalysis) |
64 | INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", | |
65 | "Memory Dependence Analysis", false, true) | |
66 | ||
67 | MemoryDependenceAnalysis::MemoryDependenceAnalysis() | |
1a4d82fc | 68 | : FunctionPass(ID), PredCache() { |
223e47cc LB |
69 | initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry()); |
70 | } | |
71 | MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { | |
72 | } | |
73 | ||
74 | /// Clean up memory in between runs | |
75 | void MemoryDependenceAnalysis::releaseMemory() { | |
76 | LocalDeps.clear(); | |
77 | NonLocalDeps.clear(); | |
78 | NonLocalPointerDeps.clear(); | |
79 | ReverseLocalDeps.clear(); | |
80 | ReverseNonLocalDeps.clear(); | |
81 | ReverseNonLocalPtrDeps.clear(); | |
82 | PredCache->clear(); | |
83 | } | |
84 | ||
85 | ||
86 | ||
87 | /// getAnalysisUsage - Does not modify anything. It uses Alias Analysis. | |
88 | /// | |
89 | void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { | |
90 | AU.setPreservesAll(); | |
85aaf69f | 91 | AU.addRequired<AssumptionCacheTracker>(); |
223e47cc LB |
92 | AU.addRequiredTransitive<AliasAnalysis>(); |
93 | } | |
94 | ||
85aaf69f | 95 | bool MemoryDependenceAnalysis::runOnFunction(Function &F) { |
223e47cc | 96 | AA = &getAnalysis<AliasAnalysis>(); |
85aaf69f | 97 | AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); |
1a4d82fc JJ |
98 | DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); |
99 | DL = DLP ? &DLP->getDataLayout() : nullptr; | |
100 | DominatorTreeWrapperPass *DTWP = | |
101 | getAnalysisIfAvailable<DominatorTreeWrapperPass>(); | |
102 | DT = DTWP ? &DTWP->getDomTree() : nullptr; | |
103 | if (!PredCache) | |
223e47cc LB |
104 | PredCache.reset(new PredIteratorCache()); |
105 | return false; | |
106 | } | |
107 | ||
108 | /// RemoveFromReverseMap - This is a helper function that removes Val from | |
109 | /// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. | |
110 | template <typename KeyTy> | |
1a4d82fc | 111 | static void RemoveFromReverseMap(DenseMap<Instruction*, |
223e47cc LB |
112 | SmallPtrSet<KeyTy, 4> > &ReverseMap, |
113 | Instruction *Inst, KeyTy Val) { | |
114 | typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator | |
115 | InstIt = ReverseMap.find(Inst); | |
116 | assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); | |
117 | bool Found = InstIt->second.erase(Val); | |
118 | assert(Found && "Invalid reverse map!"); (void)Found; | |
119 | if (InstIt->second.empty()) | |
120 | ReverseMap.erase(InstIt); | |
121 | } | |
122 | ||
123 | /// GetLocation - If the given instruction references a specific memory | |
124 | /// location, fill in Loc with the details, otherwise set Loc.Ptr to null. | |
125 | /// Return a ModRefInfo value describing the general behavior of the | |
126 | /// instruction. | |
127 | static | |
128 | AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, | |
129 | AliasAnalysis::Location &Loc, | |
130 | AliasAnalysis *AA) { | |
131 | if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { | |
132 | if (LI->isUnordered()) { | |
133 | Loc = AA->getLocation(LI); | |
134 | return AliasAnalysis::Ref; | |
1a4d82fc JJ |
135 | } |
136 | if (LI->getOrdering() == Monotonic) { | |
223e47cc LB |
137 | Loc = AA->getLocation(LI); |
138 | return AliasAnalysis::ModRef; | |
139 | } | |
140 | Loc = AliasAnalysis::Location(); | |
141 | return AliasAnalysis::ModRef; | |
142 | } | |
143 | ||
144 | if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { | |
145 | if (SI->isUnordered()) { | |
146 | Loc = AA->getLocation(SI); | |
147 | return AliasAnalysis::Mod; | |
1a4d82fc JJ |
148 | } |
149 | if (SI->getOrdering() == Monotonic) { | |
223e47cc LB |
150 | Loc = AA->getLocation(SI); |
151 | return AliasAnalysis::ModRef; | |
152 | } | |
153 | Loc = AliasAnalysis::Location(); | |
154 | return AliasAnalysis::ModRef; | |
155 | } | |
156 | ||
157 | if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { | |
158 | Loc = AA->getLocation(V); | |
159 | return AliasAnalysis::ModRef; | |
160 | } | |
161 | ||
162 | if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) { | |
163 | // calls to free() deallocate the entire structure | |
164 | Loc = AliasAnalysis::Location(CI->getArgOperand(0)); | |
165 | return AliasAnalysis::Mod; | |
166 | } | |
167 | ||
1a4d82fc JJ |
168 | if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { |
169 | AAMDNodes AAInfo; | |
170 | ||
223e47cc LB |
171 | switch (II->getIntrinsicID()) { |
172 | case Intrinsic::lifetime_start: | |
173 | case Intrinsic::lifetime_end: | |
174 | case Intrinsic::invariant_start: | |
1a4d82fc | 175 | II->getAAMetadata(AAInfo); |
223e47cc LB |
176 | Loc = AliasAnalysis::Location(II->getArgOperand(1), |
177 | cast<ConstantInt>(II->getArgOperand(0)) | |
1a4d82fc | 178 | ->getZExtValue(), AAInfo); |
223e47cc LB |
179 | // These intrinsics don't really modify the memory, but returning Mod |
180 | // will allow them to be handled conservatively. | |
181 | return AliasAnalysis::Mod; | |
182 | case Intrinsic::invariant_end: | |
1a4d82fc | 183 | II->getAAMetadata(AAInfo); |
223e47cc LB |
184 | Loc = AliasAnalysis::Location(II->getArgOperand(2), |
185 | cast<ConstantInt>(II->getArgOperand(1)) | |
1a4d82fc | 186 | ->getZExtValue(), AAInfo); |
223e47cc LB |
187 | // These intrinsics don't really modify the memory, but returning Mod |
188 | // will allow them to be handled conservatively. | |
189 | return AliasAnalysis::Mod; | |
190 | default: | |
191 | break; | |
192 | } | |
1a4d82fc | 193 | } |
223e47cc LB |
194 | |
195 | // Otherwise, just do the coarse-grained thing that always works. | |
196 | if (Inst->mayWriteToMemory()) | |
197 | return AliasAnalysis::ModRef; | |
198 | if (Inst->mayReadFromMemory()) | |
199 | return AliasAnalysis::Ref; | |
200 | return AliasAnalysis::NoModRef; | |
201 | } | |
202 | ||
203 | /// getCallSiteDependencyFrom - Private helper for finding the local | |
204 | /// dependencies of a call site. | |
205 | MemDepResult MemoryDependenceAnalysis:: | |
206 | getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, | |
207 | BasicBlock::iterator ScanIt, BasicBlock *BB) { | |
208 | unsigned Limit = BlockScanLimit; | |
209 | ||
210 | // Walk backwards through the block, looking for dependencies | |
211 | while (ScanIt != BB->begin()) { | |
212 | // Limit the amount of scanning we do so we don't end up with quadratic | |
1a4d82fc | 213 | // running time on extreme testcases. |
223e47cc LB |
214 | --Limit; |
215 | if (!Limit) | |
216 | return MemDepResult::getUnknown(); | |
217 | ||
218 | Instruction *Inst = --ScanIt; | |
1a4d82fc | 219 | |
223e47cc LB |
220 | // If this inst is a memory op, get the pointer it accessed |
221 | AliasAnalysis::Location Loc; | |
222 | AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); | |
223 | if (Loc.Ptr) { | |
224 | // A simple instruction. | |
225 | if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef) | |
226 | return MemDepResult::getClobber(Inst); | |
227 | continue; | |
228 | } | |
229 | ||
230 | if (CallSite InstCS = cast<Value>(Inst)) { | |
231 | // Debug intrinsics don't cause dependences. | |
232 | if (isa<DbgInfoIntrinsic>(Inst)) continue; | |
233 | // If these two calls do not interfere, look past it. | |
234 | switch (AA->getModRefInfo(CS, InstCS)) { | |
235 | case AliasAnalysis::NoModRef: | |
236 | // If the two calls are the same, return InstCS as a Def, so that | |
237 | // CS can be found redundant and eliminated. | |
238 | if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) && | |
239 | CS.getInstruction()->isIdenticalToWhenDefined(Inst)) | |
240 | return MemDepResult::getDef(Inst); | |
241 | ||
242 | // Otherwise if the two calls don't interact (e.g. InstCS is readnone) | |
243 | // keep scanning. | |
244 | continue; | |
245 | default: | |
246 | return MemDepResult::getClobber(Inst); | |
247 | } | |
248 | } | |
249 | ||
250 | // If we could not obtain a pointer for the instruction and the instruction | |
251 | // touches memory then assume that this is a dependency. | |
252 | if (MR != AliasAnalysis::NoModRef) | |
253 | return MemDepResult::getClobber(Inst); | |
254 | } | |
255 | ||
256 | // No dependence found. If this is the entry block of the function, it is | |
257 | // unknown, otherwise it is non-local. | |
258 | if (BB != &BB->getParent()->getEntryBlock()) | |
259 | return MemDepResult::getNonLocal(); | |
260 | return MemDepResult::getNonFuncLocal(); | |
261 | } | |
262 | ||
263 | /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that | |
264 | /// would fully overlap MemLoc if done as a wider legal integer load. | |
265 | /// | |
266 | /// MemLocBase, MemLocOffset are lazily computed here the first time the | |
267 | /// base/offs of memloc is needed. | |
1a4d82fc | 268 | static bool |
223e47cc LB |
269 | isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, |
270 | const Value *&MemLocBase, | |
271 | int64_t &MemLocOffs, | |
272 | const LoadInst *LI, | |
1a4d82fc | 273 | const DataLayout *DL) { |
223e47cc | 274 | // If we have no target data, we can't do this. |
1a4d82fc | 275 | if (!DL) return false; |
223e47cc LB |
276 | |
277 | // If we haven't already computed the base/offset of MemLoc, do so now. | |
1a4d82fc JJ |
278 | if (!MemLocBase) |
279 | MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL); | |
223e47cc LB |
280 | |
281 | unsigned Size = MemoryDependenceAnalysis:: | |
282 | getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, | |
1a4d82fc | 283 | LI, *DL); |
223e47cc LB |
284 | return Size != 0; |
285 | } | |
286 | ||
287 | /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that | |
288 | /// looks at a memory location for a load (specified by MemLocBase, Offs, | |
289 | /// and Size) and compares it against a load. If the specified load could | |
290 | /// be safely widened to a larger integer load that is 1) still efficient, | |
291 | /// 2) safe for the target, and 3) would provide the specified memory | |
292 | /// location value, then this function returns the size in bytes of the | |
293 | /// load width to use. If not, this returns zero. | |
294 | unsigned MemoryDependenceAnalysis:: | |
295 | getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, | |
296 | unsigned MemLocSize, const LoadInst *LI, | |
1a4d82fc | 297 | const DataLayout &DL) { |
223e47cc LB |
298 | // We can only extend simple integer loads. |
299 | if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; | |
970d7e83 LB |
300 | |
301 | // Load widening is hostile to ThreadSanitizer: it may cause false positives | |
302 | // or make the reports more cryptic (access sizes are wrong). | |
303 | if (LI->getParent()->getParent()->getAttributes(). | |
304 | hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) | |
305 | return 0; | |
1a4d82fc | 306 | |
223e47cc LB |
307 | // Get the base of this load. |
308 | int64_t LIOffs = 0; | |
1a4d82fc JJ |
309 | const Value *LIBase = |
310 | GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &DL); | |
311 | ||
223e47cc LB |
312 | // If the two pointers are not based on the same pointer, we can't tell that |
313 | // they are related. | |
314 | if (LIBase != MemLocBase) return 0; | |
1a4d82fc | 315 | |
223e47cc LB |
316 | // Okay, the two values are based on the same pointer, but returned as |
317 | // no-alias. This happens when we have things like two byte loads at "P+1" | |
318 | // and "P+3". Check to see if increasing the size of the "LI" load up to its | |
319 | // alignment (or the largest native integer type) will allow us to load all | |
320 | // the bits required by MemLoc. | |
1a4d82fc | 321 | |
223e47cc LB |
322 | // If MemLoc is before LI, then no widening of LI will help us out. |
323 | if (MemLocOffs < LIOffs) return 0; | |
1a4d82fc | 324 | |
223e47cc LB |
325 | // Get the alignment of the load in bytes. We assume that it is safe to load |
326 | // any legal integer up to this size without a problem. For example, if we're | |
327 | // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can | |
328 | // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it | |
329 | // to i16. | |
330 | unsigned LoadAlign = LI->getAlignment(); | |
331 | ||
332 | int64_t MemLocEnd = MemLocOffs+MemLocSize; | |
1a4d82fc | 333 | |
223e47cc LB |
334 | // If no amount of rounding up will let MemLoc fit into LI, then bail out. |
335 | if (LIOffs+LoadAlign < MemLocEnd) return 0; | |
1a4d82fc | 336 | |
223e47cc LB |
337 | // This is the size of the load to try. Start with the next larger power of |
338 | // two. | |
339 | unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; | |
340 | NewLoadByteSize = NextPowerOf2(NewLoadByteSize); | |
1a4d82fc | 341 | |
223e47cc LB |
342 | while (1) { |
343 | // If this load size is bigger than our known alignment or would not fit | |
344 | // into a native integer register, then we fail. | |
345 | if (NewLoadByteSize > LoadAlign || | |
1a4d82fc | 346 | !DL.fitsInLegalInteger(NewLoadByteSize*8)) |
223e47cc LB |
347 | return 0; |
348 | ||
349 | if (LIOffs+NewLoadByteSize > MemLocEnd && | |
970d7e83 LB |
350 | LI->getParent()->getParent()->getAttributes(). |
351 | hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress)) | |
223e47cc LB |
352 | // We will be reading past the location accessed by the original program. |
353 | // While this is safe in a regular build, Address Safety analysis tools | |
354 | // may start reporting false warnings. So, don't do widening. | |
355 | return 0; | |
223e47cc LB |
356 | |
357 | // If a load of this width would include all of MemLoc, then we succeed. | |
358 | if (LIOffs+NewLoadByteSize >= MemLocEnd) | |
359 | return NewLoadByteSize; | |
1a4d82fc | 360 | |
223e47cc LB |
361 | NewLoadByteSize <<= 1; |
362 | } | |
363 | } | |
364 | ||
365 | /// getPointerDependencyFrom - Return the instruction on which a memory | |
366 | /// location depends. If isLoad is true, this routine ignores may-aliases with | |
367 | /// read-only operations. If isLoad is false, this routine ignores may-aliases | |
970d7e83 LB |
368 | /// with reads from read-only locations. If possible, pass the query |
369 | /// instruction as well; this function may take advantage of the metadata | |
370 | /// annotated to the query instruction to refine the result. | |
223e47cc | 371 | MemDepResult MemoryDependenceAnalysis:: |
1a4d82fc | 372 | getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, |
970d7e83 LB |
373 | BasicBlock::iterator ScanIt, BasicBlock *BB, |
374 | Instruction *QueryInst) { | |
223e47cc | 375 | |
1a4d82fc | 376 | const Value *MemLocBase = nullptr; |
223e47cc | 377 | int64_t MemLocOffset = 0; |
223e47cc | 378 | unsigned Limit = BlockScanLimit; |
970d7e83 | 379 | bool isInvariantLoad = false; |
1a4d82fc JJ |
380 | |
381 | // We must be careful with atomic accesses, as they may allow another thread | |
382 | // to touch this location, cloberring it. We are conservative: if the | |
383 | // QueryInst is not a simple (non-atomic) memory access, we automatically | |
384 | // return getClobber. | |
385 | // If it is simple, we know based on the results of | |
386 | // "Compiler testing via a theory of sound optimisations in the C11/C++11 | |
387 | // memory model" in PLDI 2013, that a non-atomic location can only be | |
388 | // clobbered between a pair of a release and an acquire action, with no | |
389 | // access to the location in between. | |
390 | // Here is an example for giving the general intuition behind this rule. | |
391 | // In the following code: | |
392 | // store x 0; | |
393 | // release action; [1] | |
394 | // acquire action; [4] | |
395 | // %val = load x; | |
396 | // It is unsafe to replace %val by 0 because another thread may be running: | |
397 | // acquire action; [2] | |
398 | // store x 42; | |
399 | // release action; [3] | |
400 | // with synchronization from 1 to 2 and from 3 to 4, resulting in %val | |
401 | // being 42. A key property of this program however is that if either | |
402 | // 1 or 4 were missing, there would be a race between the store of 42 | |
403 | // either the store of 0 or the load (making the whole progam racy). | |
404 | // The paper mentionned above shows that the same property is respected | |
405 | // by every program that can detect any optimisation of that kind: either | |
406 | // it is racy (undefined) or there is a release followed by an acquire | |
407 | // between the pair of accesses under consideration. | |
408 | bool HasSeenAcquire = false; | |
409 | ||
970d7e83 LB |
410 | if (isLoad && QueryInst) { |
411 | LoadInst *LI = dyn_cast<LoadInst>(QueryInst); | |
1a4d82fc | 412 | if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) |
970d7e83 LB |
413 | isInvariantLoad = true; |
414 | } | |
223e47cc LB |
415 | |
416 | // Walk backwards through the basic block, looking for dependencies. | |
417 | while (ScanIt != BB->begin()) { | |
1a4d82fc JJ |
418 | Instruction *Inst = --ScanIt; |
419 | ||
420 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) | |
421 | // Debug intrinsics don't (and can't) cause dependencies. | |
422 | if (isa<DbgInfoIntrinsic>(II)) continue; | |
423 | ||
223e47cc LB |
424 | // Limit the amount of scanning we do so we don't end up with quadratic |
425 | // running time on extreme testcases. | |
426 | --Limit; | |
427 | if (!Limit) | |
428 | return MemDepResult::getUnknown(); | |
429 | ||
223e47cc | 430 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { |
223e47cc LB |
431 | // If we reach a lifetime begin or end marker, then the query ends here |
432 | // because the value is undefined. | |
433 | if (II->getIntrinsicID() == Intrinsic::lifetime_start) { | |
434 | // FIXME: This only considers queries directly on the invariant-tagged | |
435 | // pointer, not on query pointers that are indexed off of them. It'd | |
436 | // be nice to handle that at some point (the right approach is to use | |
437 | // GetPointerBaseWithConstantOffset). | |
438 | if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)), | |
439 | MemLoc)) | |
440 | return MemDepResult::getDef(II); | |
441 | continue; | |
442 | } | |
443 | } | |
444 | ||
445 | // Values depend on loads if the pointers are must aliased. This means that | |
446 | // a load depends on another must aliased load from the same value. | |
1a4d82fc JJ |
447 | // One exception is atomic loads: a value can depend on an atomic load that it |
448 | // does not alias with when this atomic load indicates that another thread may | |
449 | // be accessing the location. | |
223e47cc LB |
450 | if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { |
451 | // Atomic loads have complications involved. | |
1a4d82fc JJ |
452 | // A Monotonic (or higher) load is OK if the query inst is itself not atomic. |
453 | // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any | |
454 | // release store will know to return getClobber. | |
223e47cc | 455 | // FIXME: This is overly conservative. |
1a4d82fc JJ |
456 | if (!LI->isUnordered()) { |
457 | if (!QueryInst) | |
458 | return MemDepResult::getClobber(LI); | |
459 | if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) { | |
460 | if (!QueryLI->isSimple()) | |
461 | return MemDepResult::getClobber(LI); | |
462 | } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) { | |
463 | if (!QuerySI->isSimple()) | |
464 | return MemDepResult::getClobber(LI); | |
465 | } else if (QueryInst->mayReadOrWriteMemory()) { | |
466 | return MemDepResult::getClobber(LI); | |
467 | } | |
468 | ||
469 | if (isAtLeastAcquire(LI->getOrdering())) | |
470 | HasSeenAcquire = true; | |
471 | } | |
472 | ||
473 | // FIXME: this is overly conservative. | |
474 | // While volatile access cannot be eliminated, they do not have to clobber | |
475 | // non-aliasing locations, as normal accesses can for example be reordered | |
476 | // with volatile accesses. | |
477 | if (LI->isVolatile()) | |
223e47cc LB |
478 | return MemDepResult::getClobber(LI); |
479 | ||
480 | AliasAnalysis::Location LoadLoc = AA->getLocation(LI); | |
1a4d82fc | 481 | |
223e47cc LB |
482 | // If we found a pointer, check if it could be the same as our pointer. |
483 | AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); | |
1a4d82fc | 484 | |
223e47cc LB |
485 | if (isLoad) { |
486 | if (R == AliasAnalysis::NoAlias) { | |
487 | // If this is an over-aligned integer load (for example, | |
488 | // "load i8* %P, align 4") see if it would obviously overlap with the | |
489 | // queried location if widened to a larger load (e.g. if the queried | |
490 | // location is 1 byte at P+1). If so, return it as a load/load | |
491 | // clobber result, allowing the client to decide to widen the load if | |
492 | // it wants to. | |
493 | if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) | |
494 | if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && | |
495 | isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, | |
1a4d82fc | 496 | MemLocOffset, LI, DL)) |
223e47cc | 497 | return MemDepResult::getClobber(Inst); |
1a4d82fc | 498 | |
223e47cc LB |
499 | continue; |
500 | } | |
1a4d82fc | 501 | |
223e47cc LB |
502 | // Must aliased loads are defs of each other. |
503 | if (R == AliasAnalysis::MustAlias) | |
504 | return MemDepResult::getDef(Inst); | |
505 | ||
506 | #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads | |
507 | // in terms of clobbering loads, but since it does this by looking | |
508 | // at the clobbering load directly, it doesn't know about any | |
509 | // phi translation that may have happened along the way. | |
510 | ||
511 | // If we have a partial alias, then return this as a clobber for the | |
512 | // client to handle. | |
513 | if (R == AliasAnalysis::PartialAlias) | |
514 | return MemDepResult::getClobber(Inst); | |
515 | #endif | |
1a4d82fc | 516 | |
223e47cc LB |
517 | // Random may-alias loads don't depend on each other without a |
518 | // dependence. | |
519 | continue; | |
520 | } | |
521 | ||
522 | // Stores don't depend on other no-aliased accesses. | |
523 | if (R == AliasAnalysis::NoAlias) | |
524 | continue; | |
525 | ||
526 | // Stores don't alias loads from read-only memory. | |
527 | if (AA->pointsToConstantMemory(LoadLoc)) | |
528 | continue; | |
529 | ||
530 | // Stores depend on may/must aliased loads. | |
531 | return MemDepResult::getDef(Inst); | |
532 | } | |
1a4d82fc | 533 | |
223e47cc LB |
534 | if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { |
535 | // Atomic stores have complications involved. | |
1a4d82fc JJ |
536 | // A Monotonic store is OK if the query inst is itself not atomic. |
537 | // A Release (or higher) store further requires that no acquire load | |
538 | // has been seen. | |
223e47cc | 539 | // FIXME: This is overly conservative. |
1a4d82fc JJ |
540 | if (!SI->isUnordered()) { |
541 | if (!QueryInst) | |
542 | return MemDepResult::getClobber(SI); | |
543 | if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) { | |
544 | if (!QueryLI->isSimple()) | |
545 | return MemDepResult::getClobber(SI); | |
546 | } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) { | |
547 | if (!QuerySI->isSimple()) | |
548 | return MemDepResult::getClobber(SI); | |
549 | } else if (QueryInst->mayReadOrWriteMemory()) { | |
550 | return MemDepResult::getClobber(SI); | |
551 | } | |
552 | ||
553 | if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering())) | |
554 | return MemDepResult::getClobber(SI); | |
555 | } | |
556 | ||
557 | // FIXME: this is overly conservative. | |
558 | // While volatile access cannot be eliminated, they do not have to clobber | |
559 | // non-aliasing locations, as normal accesses can for example be reordered | |
560 | // with volatile accesses. | |
561 | if (SI->isVolatile()) | |
223e47cc LB |
562 | return MemDepResult::getClobber(SI); |
563 | ||
564 | // If alias analysis can tell that this store is guaranteed to not modify | |
565 | // the query pointer, ignore it. Use getModRefInfo to handle cases where | |
566 | // the query pointer points to constant memory etc. | |
567 | if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef) | |
568 | continue; | |
569 | ||
570 | // Ok, this store might clobber the query pointer. Check to see if it is | |
571 | // a must alias: in this case, we want to return this as a def. | |
572 | AliasAnalysis::Location StoreLoc = AA->getLocation(SI); | |
1a4d82fc | 573 | |
223e47cc LB |
574 | // If we found a pointer, check if it could be the same as our pointer. |
575 | AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); | |
1a4d82fc | 576 | |
223e47cc LB |
577 | if (R == AliasAnalysis::NoAlias) |
578 | continue; | |
579 | if (R == AliasAnalysis::MustAlias) | |
580 | return MemDepResult::getDef(Inst); | |
970d7e83 LB |
581 | if (isInvariantLoad) |
582 | continue; | |
223e47cc LB |
583 | return MemDepResult::getClobber(Inst); |
584 | } | |
585 | ||
586 | // If this is an allocation, and if we know that the accessed pointer is to | |
587 | // the allocation, return Def. This means that there is no dependence and | |
588 | // the access can be optimized based on that. For example, a load could | |
589 | // turn into undef. | |
590 | // Note: Only determine this to be a malloc if Inst is the malloc call, not | |
591 | // a subsequent bitcast of the malloc call result. There can be stores to | |
592 | // the malloced memory between the malloc call and its bitcast uses, and we | |
593 | // need to continue scanning until the malloc call. | |
594 | const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); | |
595 | if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) { | |
1a4d82fc JJ |
596 | const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL); |
597 | ||
223e47cc LB |
598 | if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) |
599 | return MemDepResult::getDef(Inst); | |
600 | // Be conservative if the accessed pointer may alias the allocation. | |
601 | if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias) | |
602 | return MemDepResult::getClobber(Inst); | |
603 | // If the allocation is not aliased and does not read memory (like | |
604 | // strdup), it is safe to ignore. | |
605 | if (isa<AllocaInst>(Inst) || | |
606 | isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI)) | |
607 | continue; | |
608 | } | |
609 | ||
610 | // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. | |
611 | AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); | |
612 | // If necessary, perform additional analysis. | |
613 | if (MR == AliasAnalysis::ModRef) | |
614 | MR = AA->callCapturesBefore(Inst, MemLoc, DT); | |
615 | switch (MR) { | |
616 | case AliasAnalysis::NoModRef: | |
617 | // If the call has no effect on the queried pointer, just ignore it. | |
618 | continue; | |
619 | case AliasAnalysis::Mod: | |
620 | return MemDepResult::getClobber(Inst); | |
621 | case AliasAnalysis::Ref: | |
622 | // If the call is known to never store to the pointer, and if this is a | |
623 | // load query, we can safely ignore it (scan past it). | |
624 | if (isLoad) | |
625 | continue; | |
626 | default: | |
627 | // Otherwise, there is a potential dependence. Return a clobber. | |
628 | return MemDepResult::getClobber(Inst); | |
629 | } | |
630 | } | |
1a4d82fc | 631 | |
223e47cc LB |
632 | // No dependence found. If this is the entry block of the function, it is |
633 | // unknown, otherwise it is non-local. | |
634 | if (BB != &BB->getParent()->getEntryBlock()) | |
635 | return MemDepResult::getNonLocal(); | |
636 | return MemDepResult::getNonFuncLocal(); | |
637 | } | |
638 | ||
639 | /// getDependency - Return the instruction on which a memory operation | |
640 | /// depends. | |
641 | MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { | |
642 | Instruction *ScanPos = QueryInst; | |
1a4d82fc | 643 | |
223e47cc LB |
644 | // Check for a cached result |
645 | MemDepResult &LocalCache = LocalDeps[QueryInst]; | |
1a4d82fc | 646 | |
223e47cc LB |
647 | // If the cached entry is non-dirty, just return it. Note that this depends |
648 | // on MemDepResult's default constructing to 'dirty'. | |
649 | if (!LocalCache.isDirty()) | |
650 | return LocalCache; | |
1a4d82fc | 651 | |
223e47cc LB |
652 | // Otherwise, if we have a dirty entry, we know we can start the scan at that |
653 | // instruction, which may save us some work. | |
654 | if (Instruction *Inst = LocalCache.getInst()) { | |
655 | ScanPos = Inst; | |
1a4d82fc | 656 | |
223e47cc LB |
657 | RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); |
658 | } | |
1a4d82fc | 659 | |
223e47cc | 660 | BasicBlock *QueryParent = QueryInst->getParent(); |
1a4d82fc | 661 | |
223e47cc LB |
662 | // Do the scan. |
663 | if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { | |
664 | // No dependence found. If this is the entry block of the function, it is | |
665 | // unknown, otherwise it is non-local. | |
666 | if (QueryParent != &QueryParent->getParent()->getEntryBlock()) | |
667 | LocalCache = MemDepResult::getNonLocal(); | |
668 | else | |
669 | LocalCache = MemDepResult::getNonFuncLocal(); | |
670 | } else { | |
671 | AliasAnalysis::Location MemLoc; | |
672 | AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); | |
673 | if (MemLoc.Ptr) { | |
674 | // If we can do a pointer scan, make it happen. | |
675 | bool isLoad = !(MR & AliasAnalysis::Mod); | |
676 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) | |
677 | isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; | |
678 | ||
679 | LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, | |
970d7e83 | 680 | QueryParent, QueryInst); |
223e47cc LB |
681 | } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { |
682 | CallSite QueryCS(QueryInst); | |
683 | bool isReadOnly = AA->onlyReadsMemory(QueryCS); | |
684 | LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, | |
685 | QueryParent); | |
686 | } else | |
687 | // Non-memory instruction. | |
688 | LocalCache = MemDepResult::getUnknown(); | |
689 | } | |
1a4d82fc | 690 | |
223e47cc LB |
691 | // Remember the result! |
692 | if (Instruction *I = LocalCache.getInst()) | |
693 | ReverseLocalDeps[I].insert(QueryInst); | |
1a4d82fc | 694 | |
223e47cc LB |
695 | return LocalCache; |
696 | } | |
697 | ||
698 | #ifndef NDEBUG | |
699 | /// AssertSorted - This method is used when -debug is specified to verify that | |
700 | /// cache arrays are properly kept sorted. | |
701 | static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, | |
702 | int Count = -1) { | |
703 | if (Count == -1) Count = Cache.size(); | |
704 | if (Count == 0) return; | |
705 | ||
706 | for (unsigned i = 1; i != unsigned(Count); ++i) | |
707 | assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!"); | |
708 | } | |
709 | #endif | |
710 | ||
711 | /// getNonLocalCallDependency - Perform a full dependency query for the | |
712 | /// specified call, returning the set of blocks that the value is | |
713 | /// potentially live across. The returned set of results will include a | |
714 | /// "NonLocal" result for all blocks where the value is live across. | |
715 | /// | |
716 | /// This method assumes the instruction returns a "NonLocal" dependency | |
717 | /// within its own block. | |
718 | /// | |
719 | /// This returns a reference to an internal data structure that may be | |
720 | /// invalidated on the next non-local query or when an instruction is | |
721 | /// removed. Clients must copy this data if they want it around longer than | |
722 | /// that. | |
723 | const MemoryDependenceAnalysis::NonLocalDepInfo & | |
724 | MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { | |
725 | assert(getDependency(QueryCS.getInstruction()).isNonLocal() && | |
726 | "getNonLocalCallDependency should only be used on calls with non-local deps!"); | |
727 | PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()]; | |
728 | NonLocalDepInfo &Cache = CacheP.first; | |
729 | ||
730 | /// DirtyBlocks - This is the set of blocks that need to be recomputed. In | |
731 | /// the cached case, this can happen due to instructions being deleted etc. In | |
732 | /// the uncached case, this starts out as the set of predecessors we care | |
733 | /// about. | |
734 | SmallVector<BasicBlock*, 32> DirtyBlocks; | |
1a4d82fc | 735 | |
223e47cc LB |
736 | if (!Cache.empty()) { |
737 | // Okay, we have a cache entry. If we know it is not dirty, just return it | |
738 | // with no computation. | |
739 | if (!CacheP.second) { | |
740 | ++NumCacheNonLocal; | |
741 | return Cache; | |
742 | } | |
1a4d82fc | 743 | |
223e47cc LB |
744 | // If we already have a partially computed set of results, scan them to |
745 | // determine what is dirty, seeding our initial DirtyBlocks worklist. | |
746 | for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); | |
747 | I != E; ++I) | |
748 | if (I->getResult().isDirty()) | |
749 | DirtyBlocks.push_back(I->getBB()); | |
1a4d82fc | 750 | |
223e47cc LB |
751 | // Sort the cache so that we can do fast binary search lookups below. |
752 | std::sort(Cache.begin(), Cache.end()); | |
1a4d82fc | 753 | |
223e47cc LB |
754 | ++NumCacheDirtyNonLocal; |
755 | //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " | |
756 | // << Cache.size() << " cached: " << *QueryInst; | |
757 | } else { | |
758 | // Seed DirtyBlocks with each of the preds of QueryInst's block. | |
759 | BasicBlock *QueryBB = QueryCS.getInstruction()->getParent(); | |
760 | for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI) | |
761 | DirtyBlocks.push_back(*PI); | |
762 | ++NumUncacheNonLocal; | |
763 | } | |
1a4d82fc | 764 | |
223e47cc LB |
765 | // isReadonlyCall - If this is a read-only call, we can be more aggressive. |
766 | bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); | |
767 | ||
768 | SmallPtrSet<BasicBlock*, 64> Visited; | |
1a4d82fc | 769 | |
223e47cc LB |
770 | unsigned NumSortedEntries = Cache.size(); |
771 | DEBUG(AssertSorted(Cache)); | |
1a4d82fc | 772 | |
223e47cc LB |
773 | // Iterate while we still have blocks to update. |
774 | while (!DirtyBlocks.empty()) { | |
775 | BasicBlock *DirtyBB = DirtyBlocks.back(); | |
776 | DirtyBlocks.pop_back(); | |
1a4d82fc | 777 | |
223e47cc | 778 | // Already processed this block? |
85aaf69f | 779 | if (!Visited.insert(DirtyBB).second) |
223e47cc | 780 | continue; |
1a4d82fc | 781 | |
223e47cc LB |
782 | // Do a binary search to see if we already have an entry for this block in |
783 | // the cache set. If so, find it. | |
784 | DEBUG(AssertSorted(Cache, NumSortedEntries)); | |
1a4d82fc | 785 | NonLocalDepInfo::iterator Entry = |
223e47cc LB |
786 | std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, |
787 | NonLocalDepEntry(DirtyBB)); | |
1a4d82fc | 788 | if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB) |
223e47cc | 789 | --Entry; |
1a4d82fc JJ |
790 | |
791 | NonLocalDepEntry *ExistingResult = nullptr; | |
792 | if (Entry != Cache.begin()+NumSortedEntries && | |
223e47cc LB |
793 | Entry->getBB() == DirtyBB) { |
794 | // If we already have an entry, and if it isn't already dirty, the block | |
795 | // is done. | |
796 | if (!Entry->getResult().isDirty()) | |
797 | continue; | |
1a4d82fc | 798 | |
223e47cc LB |
799 | // Otherwise, remember this slot so we can update the value. |
800 | ExistingResult = &*Entry; | |
801 | } | |
1a4d82fc | 802 | |
223e47cc LB |
803 | // If the dirty entry has a pointer, start scanning from it so we don't have |
804 | // to rescan the entire block. | |
805 | BasicBlock::iterator ScanPos = DirtyBB->end(); | |
806 | if (ExistingResult) { | |
807 | if (Instruction *Inst = ExistingResult->getResult().getInst()) { | |
808 | ScanPos = Inst; | |
809 | // We're removing QueryInst's use of Inst. | |
810 | RemoveFromReverseMap(ReverseNonLocalDeps, Inst, | |
811 | QueryCS.getInstruction()); | |
812 | } | |
813 | } | |
1a4d82fc | 814 | |
223e47cc LB |
815 | // Find out if this block has a local dependency for QueryInst. |
816 | MemDepResult Dep; | |
1a4d82fc | 817 | |
223e47cc LB |
818 | if (ScanPos != DirtyBB->begin()) { |
819 | Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); | |
820 | } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { | |
821 | // No dependence found. If this is the entry block of the function, it is | |
822 | // a clobber, otherwise it is unknown. | |
823 | Dep = MemDepResult::getNonLocal(); | |
824 | } else { | |
825 | Dep = MemDepResult::getNonFuncLocal(); | |
826 | } | |
1a4d82fc | 827 | |
223e47cc LB |
828 | // If we had a dirty entry for the block, update it. Otherwise, just add |
829 | // a new entry. | |
830 | if (ExistingResult) | |
831 | ExistingResult->setResult(Dep); | |
832 | else | |
833 | Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); | |
1a4d82fc | 834 | |
223e47cc LB |
835 | // If the block has a dependency (i.e. it isn't completely transparent to |
836 | // the value), remember the association! | |
837 | if (!Dep.isNonLocal()) { | |
838 | // Keep the ReverseNonLocalDeps map up to date so we can efficiently | |
839 | // update this when we remove instructions. | |
840 | if (Instruction *Inst = Dep.getInst()) | |
841 | ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); | |
842 | } else { | |
1a4d82fc | 843 | |
223e47cc LB |
844 | // If the block *is* completely transparent to the load, we need to check |
845 | // the predecessors of this block. Add them to our worklist. | |
846 | for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) | |
847 | DirtyBlocks.push_back(*PI); | |
848 | } | |
849 | } | |
1a4d82fc | 850 | |
223e47cc LB |
851 | return Cache; |
852 | } | |
853 | ||
854 | /// getNonLocalPointerDependency - Perform a full dependency query for an | |
855 | /// access to the specified (non-volatile) memory location, returning the | |
856 | /// set of instructions that either define or clobber the value. | |
857 | /// | |
858 | /// This method assumes the pointer has a "NonLocal" dependency within its | |
859 | /// own block. | |
860 | /// | |
861 | void MemoryDependenceAnalysis:: | |
85aaf69f | 862 | getNonLocalPointerDependency(Instruction *QueryInst, |
223e47cc | 863 | SmallVectorImpl<NonLocalDepResult> &Result) { |
85aaf69f SL |
864 | |
865 | auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) { | |
866 | if (auto *I = dyn_cast<LoadInst>(Inst)) | |
867 | return AA->getLocation(I); | |
868 | else if (auto *I = dyn_cast<StoreInst>(Inst)) | |
869 | return AA->getLocation(I); | |
870 | else if (auto *I = dyn_cast<VAArgInst>(Inst)) | |
871 | return AA->getLocation(I); | |
872 | else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst)) | |
873 | return AA->getLocation(I); | |
874 | else if (auto *I = dyn_cast<AtomicRMWInst>(Inst)) | |
875 | return AA->getLocation(I); | |
876 | else | |
877 | llvm_unreachable("unsupported memory instruction"); | |
878 | }; | |
879 | ||
880 | const AliasAnalysis::Location Loc = getLocation(AA, QueryInst); | |
881 | bool isLoad = isa<LoadInst>(QueryInst); | |
882 | BasicBlock *FromBB = QueryInst->getParent(); | |
883 | assert(FromBB); | |
884 | ||
223e47cc LB |
885 | assert(Loc.Ptr->getType()->isPointerTy() && |
886 | "Can't get pointer deps of a non-pointer!"); | |
887 | Result.clear(); | |
85aaf69f SL |
888 | |
889 | // This routine does not expect to deal with volatile instructions. | |
890 | // Doing so would require piping through the QueryInst all the way through. | |
891 | // TODO: volatiles can't be elided, but they can be reordered with other | |
892 | // non-volatile accesses. | |
893 | auto isVolatile = [](Instruction *Inst) { | |
894 | if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { | |
895 | return LI->isVolatile(); | |
896 | } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { | |
897 | return SI->isVolatile(); | |
898 | } | |
899 | return false; | |
900 | }; | |
901 | // We currently give up on any instruction which is ordered, but we do handle | |
902 | // atomic instructions which are unordered. | |
903 | // TODO: Handle ordered instructions | |
904 | auto isOrdered = [](Instruction *Inst) { | |
905 | if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { | |
906 | return !LI->isUnordered(); | |
907 | } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { | |
908 | return !SI->isUnordered(); | |
909 | } | |
910 | return false; | |
911 | }; | |
912 | if (isVolatile(QueryInst) || isOrdered(QueryInst)) { | |
913 | Result.push_back(NonLocalDepResult(FromBB, | |
914 | MemDepResult::getUnknown(), | |
915 | const_cast<Value *>(Loc.Ptr))); | |
916 | return; | |
917 | } | |
1a4d82fc | 918 | |
85aaf69f SL |
919 | |
920 | PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC); | |
1a4d82fc | 921 | |
223e47cc LB |
922 | // This is the set of blocks we've inspected, and the pointer we consider in |
923 | // each block. Because of critical edges, we currently bail out if querying | |
924 | // a block with multiple different pointers. This can happen during PHI | |
925 | // translation. | |
926 | DenseMap<BasicBlock*, Value*> Visited; | |
927 | if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB, | |
928 | Result, Visited, true)) | |
929 | return; | |
930 | Result.clear(); | |
931 | Result.push_back(NonLocalDepResult(FromBB, | |
932 | MemDepResult::getUnknown(), | |
933 | const_cast<Value *>(Loc.Ptr))); | |
934 | } | |
935 | ||
936 | /// GetNonLocalInfoForBlock - Compute the memdep value for BB with | |
937 | /// Pointer/PointeeSize using either cached information in Cache or by doing a | |
938 | /// lookup (which may use dirty cache info if available). If we do a lookup, | |
939 | /// add the result to the cache. | |
940 | MemDepResult MemoryDependenceAnalysis:: | |
941 | GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, | |
942 | bool isLoad, BasicBlock *BB, | |
943 | NonLocalDepInfo *Cache, unsigned NumSortedEntries) { | |
1a4d82fc | 944 | |
223e47cc LB |
945 | // Do a binary search to see if we already have an entry for this block in |
946 | // the cache set. If so, find it. | |
947 | NonLocalDepInfo::iterator Entry = | |
948 | std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries, | |
949 | NonLocalDepEntry(BB)); | |
950 | if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) | |
951 | --Entry; | |
1a4d82fc JJ |
952 | |
953 | NonLocalDepEntry *ExistingResult = nullptr; | |
223e47cc LB |
954 | if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) |
955 | ExistingResult = &*Entry; | |
1a4d82fc | 956 | |
223e47cc LB |
957 | // If we have a cached entry, and it is non-dirty, use it as the value for |
958 | // this dependency. | |
959 | if (ExistingResult && !ExistingResult->getResult().isDirty()) { | |
960 | ++NumCacheNonLocalPtr; | |
961 | return ExistingResult->getResult(); | |
1a4d82fc JJ |
962 | } |
963 | ||
223e47cc LB |
964 | // Otherwise, we have to scan for the value. If we have a dirty cache |
965 | // entry, start scanning from its position, otherwise we scan from the end | |
966 | // of the block. | |
967 | BasicBlock::iterator ScanPos = BB->end(); | |
968 | if (ExistingResult && ExistingResult->getResult().getInst()) { | |
969 | assert(ExistingResult->getResult().getInst()->getParent() == BB && | |
970 | "Instruction invalidated?"); | |
971 | ++NumCacheDirtyNonLocalPtr; | |
972 | ScanPos = ExistingResult->getResult().getInst(); | |
1a4d82fc | 973 | |
223e47cc LB |
974 | // Eliminating the dirty entry from 'Cache', so update the reverse info. |
975 | ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); | |
976 | RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); | |
977 | } else { | |
978 | ++NumUncacheNonLocalPtr; | |
979 | } | |
1a4d82fc | 980 | |
223e47cc LB |
981 | // Scan the block for the dependency. |
982 | MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); | |
1a4d82fc | 983 | |
223e47cc LB |
984 | // If we had a dirty entry for the block, update it. Otherwise, just add |
985 | // a new entry. | |
986 | if (ExistingResult) | |
987 | ExistingResult->setResult(Dep); | |
988 | else | |
989 | Cache->push_back(NonLocalDepEntry(BB, Dep)); | |
1a4d82fc | 990 | |
223e47cc LB |
991 | // If the block has a dependency (i.e. it isn't completely transparent to |
992 | // the value), remember the reverse association because we just added it | |
993 | // to Cache! | |
994 | if (!Dep.isDef() && !Dep.isClobber()) | |
995 | return Dep; | |
1a4d82fc | 996 | |
223e47cc LB |
997 | // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently |
998 | // update MemDep when we remove instructions. | |
999 | Instruction *Inst = Dep.getInst(); | |
1000 | assert(Inst && "Didn't depend on anything?"); | |
1001 | ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); | |
1002 | ReverseNonLocalPtrDeps[Inst].insert(CacheKey); | |
1003 | return Dep; | |
1004 | } | |
1005 | ||
1a4d82fc | 1006 | /// SortNonLocalDepInfoCache - Sort the NonLocalDepInfo cache, given a certain |
223e47cc LB |
1007 | /// number of elements in the array that are already properly ordered. This is |
1008 | /// optimized for the case when only a few entries are added. | |
1a4d82fc | 1009 | static void |
223e47cc LB |
1010 | SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, |
1011 | unsigned NumSortedEntries) { | |
1012 | switch (Cache.size() - NumSortedEntries) { | |
1013 | case 0: | |
1014 | // done, no new entries. | |
1015 | break; | |
1016 | case 2: { | |
1017 | // Two new entries, insert the last one into place. | |
1018 | NonLocalDepEntry Val = Cache.back(); | |
1019 | Cache.pop_back(); | |
1020 | MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = | |
1021 | std::upper_bound(Cache.begin(), Cache.end()-1, Val); | |
1022 | Cache.insert(Entry, Val); | |
1023 | // FALL THROUGH. | |
1024 | } | |
1025 | case 1: | |
1026 | // One new entry, Just insert the new value at the appropriate position. | |
1027 | if (Cache.size() != 1) { | |
1028 | NonLocalDepEntry Val = Cache.back(); | |
1029 | Cache.pop_back(); | |
1030 | MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = | |
1031 | std::upper_bound(Cache.begin(), Cache.end(), Val); | |
1032 | Cache.insert(Entry, Val); | |
1033 | } | |
1034 | break; | |
1035 | default: | |
1036 | // Added many values, do a full scale sort. | |
1037 | std::sort(Cache.begin(), Cache.end()); | |
1038 | break; | |
1039 | } | |
1040 | } | |
1041 | ||
1042 | /// getNonLocalPointerDepFromBB - Perform a dependency query based on | |
1043 | /// pointer/pointeesize starting at the end of StartBB. Add any clobber/def | |
1044 | /// results to the results vector and keep track of which blocks are visited in | |
1045 | /// 'Visited'. | |
1046 | /// | |
1047 | /// This has special behavior for the first block queries (when SkipFirstBlock | |
1048 | /// is true). In this special case, it ignores the contents of the specified | |
1049 | /// block and starts returning dependence info for its predecessors. | |
1050 | /// | |
1051 | /// This function returns false on success, or true to indicate that it could | |
1052 | /// not compute dependence information for some reason. This should be treated | |
1053 | /// as a clobber dependence on the first instruction in the predecessor block. | |
1054 | bool MemoryDependenceAnalysis:: | |
1055 | getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, | |
1056 | const AliasAnalysis::Location &Loc, | |
1057 | bool isLoad, BasicBlock *StartBB, | |
1058 | SmallVectorImpl<NonLocalDepResult> &Result, | |
1059 | DenseMap<BasicBlock*, Value*> &Visited, | |
1060 | bool SkipFirstBlock) { | |
223e47cc LB |
1061 | // Look up the cached info for Pointer. |
1062 | ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); | |
1063 | ||
1064 | // Set up a temporary NLPI value. If the map doesn't yet have an entry for | |
1065 | // CacheKey, this value will be inserted as the associated value. Otherwise, | |
1066 | // it'll be ignored, and we'll have to check to see if the cached size and | |
1a4d82fc | 1067 | // aa tags are consistent with the current query. |
223e47cc LB |
1068 | NonLocalPointerInfo InitialNLPI; |
1069 | InitialNLPI.Size = Loc.Size; | |
1a4d82fc | 1070 | InitialNLPI.AATags = Loc.AATags; |
223e47cc LB |
1071 | |
1072 | // Get the NLPI for CacheKey, inserting one into the map if it doesn't | |
1073 | // already have one. | |
1a4d82fc | 1074 | std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = |
223e47cc LB |
1075 | NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); |
1076 | NonLocalPointerInfo *CacheInfo = &Pair.first->second; | |
1077 | ||
1078 | // If we already have a cache entry for this CacheKey, we may need to do some | |
1079 | // work to reconcile the cache entry and the current query. | |
1080 | if (!Pair.second) { | |
1081 | if (CacheInfo->Size < Loc.Size) { | |
1082 | // The query's Size is greater than the cached one. Throw out the | |
1083 | // cached data and proceed with the query at the greater size. | |
1084 | CacheInfo->Pair = BBSkipFirstBlockPair(); | |
1085 | CacheInfo->Size = Loc.Size; | |
1086 | for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), | |
1087 | DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) | |
1088 | if (Instruction *Inst = DI->getResult().getInst()) | |
1089 | RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); | |
1090 | CacheInfo->NonLocalDeps.clear(); | |
1091 | } else if (CacheInfo->Size > Loc.Size) { | |
1092 | // This query's Size is less than the cached one. Conservatively restart | |
1093 | // the query using the greater size. | |
1094 | return getNonLocalPointerDepFromBB(Pointer, | |
1095 | Loc.getWithNewSize(CacheInfo->Size), | |
1096 | isLoad, StartBB, Result, Visited, | |
1097 | SkipFirstBlock); | |
1098 | } | |
1099 | ||
1a4d82fc | 1100 | // If the query's AATags are inconsistent with the cached one, |
223e47cc LB |
1101 | // conservatively throw out the cached data and restart the query with |
1102 | // no tag if needed. | |
1a4d82fc JJ |
1103 | if (CacheInfo->AATags != Loc.AATags) { |
1104 | if (CacheInfo->AATags) { | |
223e47cc | 1105 | CacheInfo->Pair = BBSkipFirstBlockPair(); |
1a4d82fc | 1106 | CacheInfo->AATags = AAMDNodes(); |
223e47cc LB |
1107 | for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), |
1108 | DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) | |
1109 | if (Instruction *Inst = DI->getResult().getInst()) | |
1110 | RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); | |
1111 | CacheInfo->NonLocalDeps.clear(); | |
1112 | } | |
1a4d82fc JJ |
1113 | if (Loc.AATags) |
1114 | return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(), | |
223e47cc LB |
1115 | isLoad, StartBB, Result, Visited, |
1116 | SkipFirstBlock); | |
1117 | } | |
1118 | } | |
1119 | ||
1120 | NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps; | |
1121 | ||
1122 | // If we have valid cached information for exactly the block we are | |
1123 | // investigating, just return it with no recomputation. | |
1124 | if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { | |
1125 | // We have a fully cached result for this query then we can just return the | |
1126 | // cached results and populate the visited set. However, we have to verify | |
1127 | // that we don't already have conflicting results for these blocks. Check | |
1128 | // to ensure that if a block in the results set is in the visited set that | |
1129 | // it was for the same pointer query. | |
1130 | if (!Visited.empty()) { | |
1131 | for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); | |
1132 | I != E; ++I) { | |
1133 | DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); | |
1134 | if (VI == Visited.end() || VI->second == Pointer.getAddr()) | |
1135 | continue; | |
1a4d82fc | 1136 | |
223e47cc LB |
1137 | // We have a pointer mismatch in a block. Just return clobber, saying |
1138 | // that something was clobbered in this result. We could also do a | |
1139 | // non-fully cached query, but there is little point in doing this. | |
1140 | return true; | |
1141 | } | |
1142 | } | |
1a4d82fc | 1143 | |
223e47cc LB |
1144 | Value *Addr = Pointer.getAddr(); |
1145 | for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); | |
1146 | I != E; ++I) { | |
1147 | Visited.insert(std::make_pair(I->getBB(), Addr)); | |
1a4d82fc JJ |
1148 | if (I->getResult().isNonLocal()) { |
1149 | continue; | |
1150 | } | |
1151 | ||
1152 | if (!DT) { | |
1153 | Result.push_back(NonLocalDepResult(I->getBB(), | |
1154 | MemDepResult::getUnknown(), | |
1155 | Addr)); | |
1156 | } else if (DT->isReachableFromEntry(I->getBB())) { | |
223e47cc | 1157 | Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr)); |
1a4d82fc | 1158 | } |
223e47cc LB |
1159 | } |
1160 | ++NumCacheCompleteNonLocalPtr; | |
1161 | return false; | |
1162 | } | |
1a4d82fc | 1163 | |
223e47cc LB |
1164 | // Otherwise, either this is a new block, a block with an invalid cache |
1165 | // pointer or one that we're about to invalidate by putting more info into it | |
1166 | // than its valid cache info. If empty, the result will be valid cache info, | |
1167 | // otherwise it isn't. | |
1168 | if (Cache->empty()) | |
1169 | CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); | |
1170 | else | |
1171 | CacheInfo->Pair = BBSkipFirstBlockPair(); | |
1a4d82fc | 1172 | |
223e47cc LB |
1173 | SmallVector<BasicBlock*, 32> Worklist; |
1174 | Worklist.push_back(StartBB); | |
1a4d82fc | 1175 | |
223e47cc LB |
1176 | // PredList used inside loop. |
1177 | SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList; | |
1178 | ||
1179 | // Keep track of the entries that we know are sorted. Previously cached | |
1180 | // entries will all be sorted. The entries we add we only sort on demand (we | |
1181 | // don't insert every element into its sorted position). We know that we | |
1182 | // won't get any reuse from currently inserted values, because we don't | |
1183 | // revisit blocks after we insert info for them. | |
1184 | unsigned NumSortedEntries = Cache->size(); | |
1185 | DEBUG(AssertSorted(*Cache)); | |
1a4d82fc | 1186 | |
223e47cc LB |
1187 | while (!Worklist.empty()) { |
1188 | BasicBlock *BB = Worklist.pop_back_val(); | |
1a4d82fc | 1189 | |
85aaf69f SL |
1190 | // If we do process a large number of blocks it becomes very expensive and |
1191 | // likely it isn't worth worrying about | |
1192 | if (Result.size() > NumResultsLimit) { | |
1193 | Worklist.clear(); | |
1194 | // Sort it now (if needed) so that recursive invocations of | |
1195 | // getNonLocalPointerDepFromBB and other routines that could reuse the | |
1196 | // cache value will only see properly sorted cache arrays. | |
1197 | if (Cache && NumSortedEntries != Cache->size()) { | |
1198 | SortNonLocalDepInfoCache(*Cache, NumSortedEntries); | |
1199 | } | |
1200 | // Since we bail out, the "Cache" set won't contain all of the | |
1201 | // results for the query. This is ok (we can still use it to accelerate | |
1202 | // specific block queries) but we can't do the fastpath "return all | |
1203 | // results from the set". Clear out the indicator for this. | |
1204 | CacheInfo->Pair = BBSkipFirstBlockPair(); | |
1205 | return true; | |
1206 | } | |
1207 | ||
223e47cc LB |
1208 | // Skip the first block if we have it. |
1209 | if (!SkipFirstBlock) { | |
1210 | // Analyze the dependency of *Pointer in FromBB. See if we already have | |
1211 | // been here. | |
1212 | assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); | |
1213 | ||
1214 | // Get the dependency info for Pointer in BB. If we have cached | |
1215 | // information, we will use it, otherwise we compute it. | |
1216 | DEBUG(AssertSorted(*Cache, NumSortedEntries)); | |
1217 | MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, | |
1218 | NumSortedEntries); | |
1a4d82fc | 1219 | |
223e47cc | 1220 | // If we got a Def or Clobber, add this to the list of results. |
1a4d82fc JJ |
1221 | if (!Dep.isNonLocal()) { |
1222 | if (!DT) { | |
1223 | Result.push_back(NonLocalDepResult(BB, | |
1224 | MemDepResult::getUnknown(), | |
1225 | Pointer.getAddr())); | |
1226 | continue; | |
1227 | } else if (DT->isReachableFromEntry(BB)) { | |
1228 | Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); | |
1229 | continue; | |
1230 | } | |
223e47cc LB |
1231 | } |
1232 | } | |
1a4d82fc | 1233 | |
223e47cc LB |
1234 | // If 'Pointer' is an instruction defined in this block, then we need to do |
1235 | // phi translation to change it into a value live in the predecessor block. | |
1236 | // If not, we just add the predecessors to the worklist and scan them with | |
1237 | // the same Pointer. | |
1238 | if (!Pointer.NeedsPHITranslationFromBlock(BB)) { | |
1239 | SkipFirstBlock = false; | |
1240 | SmallVector<BasicBlock*, 16> NewBlocks; | |
1241 | for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { | |
1242 | // Verify that we haven't looked at this block yet. | |
1243 | std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> | |
1244 | InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); | |
1245 | if (InsertRes.second) { | |
1246 | // First time we've looked at *PI. | |
1247 | NewBlocks.push_back(*PI); | |
1248 | continue; | |
1249 | } | |
1a4d82fc | 1250 | |
223e47cc LB |
1251 | // If we have seen this block before, but it was with a different |
1252 | // pointer then we have a phi translation failure and we have to treat | |
1253 | // this as a clobber. | |
1254 | if (InsertRes.first->second != Pointer.getAddr()) { | |
1255 | // Make sure to clean up the Visited map before continuing on to | |
1256 | // PredTranslationFailure. | |
1257 | for (unsigned i = 0; i < NewBlocks.size(); i++) | |
1258 | Visited.erase(NewBlocks[i]); | |
1259 | goto PredTranslationFailure; | |
1260 | } | |
1261 | } | |
1262 | Worklist.append(NewBlocks.begin(), NewBlocks.end()); | |
1263 | continue; | |
1264 | } | |
1a4d82fc | 1265 | |
223e47cc LB |
1266 | // We do need to do phi translation, if we know ahead of time we can't phi |
1267 | // translate this value, don't even try. | |
1268 | if (!Pointer.IsPotentiallyPHITranslatable()) | |
1269 | goto PredTranslationFailure; | |
1a4d82fc | 1270 | |
223e47cc LB |
1271 | // We may have added values to the cache list before this PHI translation. |
1272 | // If so, we haven't done anything to ensure that the cache remains sorted. | |
1273 | // Sort it now (if needed) so that recursive invocations of | |
1274 | // getNonLocalPointerDepFromBB and other routines that could reuse the cache | |
1275 | // value will only see properly sorted cache arrays. | |
1276 | if (Cache && NumSortedEntries != Cache->size()) { | |
1277 | SortNonLocalDepInfoCache(*Cache, NumSortedEntries); | |
1278 | NumSortedEntries = Cache->size(); | |
1279 | } | |
1a4d82fc | 1280 | Cache = nullptr; |
223e47cc LB |
1281 | |
1282 | PredList.clear(); | |
1283 | for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { | |
1284 | BasicBlock *Pred = *PI; | |
1285 | PredList.push_back(std::make_pair(Pred, Pointer)); | |
1286 | ||
1287 | // Get the PHI translated pointer in this predecessor. This can fail if | |
1288 | // not translatable, in which case the getAddr() returns null. | |
1289 | PHITransAddr &PredPointer = PredList.back().second; | |
1a4d82fc | 1290 | PredPointer.PHITranslateValue(BB, Pred, nullptr); |
223e47cc LB |
1291 | |
1292 | Value *PredPtrVal = PredPointer.getAddr(); | |
1a4d82fc | 1293 | |
223e47cc LB |
1294 | // Check to see if we have already visited this pred block with another |
1295 | // pointer. If so, we can't do this lookup. This failure can occur | |
1296 | // with PHI translation when a critical edge exists and the PHI node in | |
1297 | // the successor translates to a pointer value different than the | |
1298 | // pointer the block was first analyzed with. | |
1299 | std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> | |
1300 | InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal)); | |
1301 | ||
1302 | if (!InsertRes.second) { | |
1303 | // We found the pred; take it off the list of preds to visit. | |
1304 | PredList.pop_back(); | |
1305 | ||
1306 | // If the predecessor was visited with PredPtr, then we already did | |
1307 | // the analysis and can ignore it. | |
1308 | if (InsertRes.first->second == PredPtrVal) | |
1309 | continue; | |
1a4d82fc | 1310 | |
223e47cc LB |
1311 | // Otherwise, the block was previously analyzed with a different |
1312 | // pointer. We can't represent the result of this case, so we just | |
1313 | // treat this as a phi translation failure. | |
1314 | ||
1315 | // Make sure to clean up the Visited map before continuing on to | |
1316 | // PredTranslationFailure. | |
1a4d82fc | 1317 | for (unsigned i = 0, n = PredList.size(); i < n; ++i) |
223e47cc LB |
1318 | Visited.erase(PredList[i].first); |
1319 | ||
1320 | goto PredTranslationFailure; | |
1321 | } | |
1322 | } | |
1323 | ||
1324 | // Actually process results here; this need to be a separate loop to avoid | |
1325 | // calling getNonLocalPointerDepFromBB for blocks we don't want to return | |
1a4d82fc | 1326 | // any results for. (getNonLocalPointerDepFromBB will modify our |
223e47cc LB |
1327 | // datastructures in ways the code after the PredTranslationFailure label |
1328 | // doesn't expect.) | |
1a4d82fc | 1329 | for (unsigned i = 0, n = PredList.size(); i < n; ++i) { |
223e47cc LB |
1330 | BasicBlock *Pred = PredList[i].first; |
1331 | PHITransAddr &PredPointer = PredList[i].second; | |
1332 | Value *PredPtrVal = PredPointer.getAddr(); | |
1333 | ||
1334 | bool CanTranslate = true; | |
1335 | // If PHI translation was unable to find an available pointer in this | |
1336 | // predecessor, then we have to assume that the pointer is clobbered in | |
1337 | // that predecessor. We can still do PRE of the load, which would insert | |
1338 | // a computation of the pointer in this predecessor. | |
1a4d82fc | 1339 | if (!PredPtrVal) |
223e47cc LB |
1340 | CanTranslate = false; |
1341 | ||
1342 | // FIXME: it is entirely possible that PHI translating will end up with | |
1343 | // the same value. Consider PHI translating something like: | |
1344 | // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* | |
1345 | // to recurse here, pedantically speaking. | |
1346 | ||
1347 | // If getNonLocalPointerDepFromBB fails here, that means the cached | |
1348 | // result conflicted with the Visited list; we have to conservatively | |
1349 | // assume it is unknown, but this also does not block PRE of the load. | |
1350 | if (!CanTranslate || | |
1351 | getNonLocalPointerDepFromBB(PredPointer, | |
1352 | Loc.getWithNewPtr(PredPtrVal), | |
1353 | isLoad, Pred, | |
1354 | Result, Visited)) { | |
1355 | // Add the entry to the Result list. | |
1356 | NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal); | |
1357 | Result.push_back(Entry); | |
1358 | ||
1359 | // Since we had a phi translation failure, the cache for CacheKey won't | |
1360 | // include all of the entries that we need to immediately satisfy future | |
1361 | // queries. Mark this in NonLocalPointerDeps by setting the | |
1362 | // BBSkipFirstBlockPair pointer to null. This requires reuse of the | |
1363 | // cached value to do more work but not miss the phi trans failure. | |
1364 | NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey]; | |
1365 | NLPI.Pair = BBSkipFirstBlockPair(); | |
1366 | continue; | |
1367 | } | |
1368 | } | |
1a4d82fc | 1369 | |
223e47cc LB |
1370 | // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. |
1371 | CacheInfo = &NonLocalPointerDeps[CacheKey]; | |
1372 | Cache = &CacheInfo->NonLocalDeps; | |
1373 | NumSortedEntries = Cache->size(); | |
1a4d82fc | 1374 | |
223e47cc LB |
1375 | // Since we did phi translation, the "Cache" set won't contain all of the |
1376 | // results for the query. This is ok (we can still use it to accelerate | |
1377 | // specific block queries) but we can't do the fastpath "return all | |
1378 | // results from the set" Clear out the indicator for this. | |
1379 | CacheInfo->Pair = BBSkipFirstBlockPair(); | |
1380 | SkipFirstBlock = false; | |
1381 | continue; | |
1382 | ||
1383 | PredTranslationFailure: | |
1384 | // The following code is "failure"; we can't produce a sane translation | |
1385 | // for the given block. It assumes that we haven't modified any of | |
1386 | // our datastructures while processing the current block. | |
1a4d82fc JJ |
1387 | |
1388 | if (!Cache) { | |
223e47cc LB |
1389 | // Refresh the CacheInfo/Cache pointer if it got invalidated. |
1390 | CacheInfo = &NonLocalPointerDeps[CacheKey]; | |
1391 | Cache = &CacheInfo->NonLocalDeps; | |
1392 | NumSortedEntries = Cache->size(); | |
1393 | } | |
1a4d82fc | 1394 | |
223e47cc LB |
1395 | // Since we failed phi translation, the "Cache" set won't contain all of the |
1396 | // results for the query. This is ok (we can still use it to accelerate | |
1397 | // specific block queries) but we can't do the fastpath "return all | |
1398 | // results from the set". Clear out the indicator for this. | |
1399 | CacheInfo->Pair = BBSkipFirstBlockPair(); | |
1a4d82fc | 1400 | |
223e47cc LB |
1401 | // If *nothing* works, mark the pointer as unknown. |
1402 | // | |
1403 | // If this is the magic first block, return this as a clobber of the whole | |
1404 | // incoming value. Since we can't phi translate to one of the predecessors, | |
1405 | // we have to bail out. | |
1406 | if (SkipFirstBlock) | |
1407 | return true; | |
1a4d82fc | 1408 | |
223e47cc LB |
1409 | for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { |
1410 | assert(I != Cache->rend() && "Didn't find current block??"); | |
1411 | if (I->getBB() != BB) | |
1412 | continue; | |
1a4d82fc | 1413 | |
85aaf69f | 1414 | assert((I->getResult().isNonLocal() || !DT->isReachableFromEntry(BB)) && |
223e47cc LB |
1415 | "Should only be here with transparent block"); |
1416 | I->setResult(MemDepResult::getUnknown()); | |
1417 | Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), | |
1418 | Pointer.getAddr())); | |
1419 | break; | |
1420 | } | |
1421 | } | |
1422 | ||
1423 | // Okay, we're done now. If we added new values to the cache, re-sort it. | |
1424 | SortNonLocalDepInfoCache(*Cache, NumSortedEntries); | |
1425 | DEBUG(AssertSorted(*Cache)); | |
1426 | return false; | |
1427 | } | |
1428 | ||
1429 | /// RemoveCachedNonLocalPointerDependencies - If P exists in | |
1430 | /// CachedNonLocalPointerInfo, remove it. | |
1431 | void MemoryDependenceAnalysis:: | |
1432 | RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { | |
1a4d82fc | 1433 | CachedNonLocalPointerInfo::iterator It = |
223e47cc LB |
1434 | NonLocalPointerDeps.find(P); |
1435 | if (It == NonLocalPointerDeps.end()) return; | |
1a4d82fc | 1436 | |
223e47cc LB |
1437 | // Remove all of the entries in the BB->val map. This involves removing |
1438 | // instructions from the reverse map. | |
1439 | NonLocalDepInfo &PInfo = It->second.NonLocalDeps; | |
1a4d82fc | 1440 | |
223e47cc LB |
1441 | for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { |
1442 | Instruction *Target = PInfo[i].getResult().getInst(); | |
1a4d82fc | 1443 | if (!Target) continue; // Ignore non-local dep results. |
223e47cc | 1444 | assert(Target->getParent() == PInfo[i].getBB()); |
1a4d82fc | 1445 | |
223e47cc LB |
1446 | // Eliminating the dirty entry from 'Cache', so update the reverse info. |
1447 | RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); | |
1448 | } | |
1a4d82fc | 1449 | |
223e47cc LB |
1450 | // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). |
1451 | NonLocalPointerDeps.erase(It); | |
1452 | } | |
1453 | ||
1454 | ||
1455 | /// invalidateCachedPointerInfo - This method is used to invalidate cached | |
1456 | /// information about the specified pointer, because it may be too | |
1457 | /// conservative in memdep. This is an optional call that can be used when | |
1458 | /// the client detects an equivalence between the pointer and some other | |
1459 | /// value and replaces the other value with ptr. This can make Ptr available | |
1460 | /// in more places that cached info does not necessarily keep. | |
1461 | void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) { | |
1462 | // If Ptr isn't really a pointer, just ignore it. | |
1463 | if (!Ptr->getType()->isPointerTy()) return; | |
1464 | // Flush store info for the pointer. | |
1465 | RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); | |
1466 | // Flush load info for the pointer. | |
1467 | RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); | |
1468 | } | |
1469 | ||
1470 | /// invalidateCachedPredecessors - Clear the PredIteratorCache info. | |
1471 | /// This needs to be done when the CFG changes, e.g., due to splitting | |
1472 | /// critical edges. | |
1473 | void MemoryDependenceAnalysis::invalidateCachedPredecessors() { | |
1474 | PredCache->clear(); | |
1475 | } | |
1476 | ||
1477 | /// removeInstruction - Remove an instruction from the dependence analysis, | |
1478 | /// updating the dependence of instructions that previously depended on it. | |
1479 | /// This method attempts to keep the cache coherent using the reverse map. | |
1480 | void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { | |
1481 | // Walk through the Non-local dependencies, removing this one as the value | |
1482 | // for any cached queries. | |
1483 | NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst); | |
1484 | if (NLDI != NonLocalDeps.end()) { | |
1485 | NonLocalDepInfo &BlockMap = NLDI->second.first; | |
1486 | for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end(); | |
1487 | DI != DE; ++DI) | |
1488 | if (Instruction *Inst = DI->getResult().getInst()) | |
1489 | RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst); | |
1490 | NonLocalDeps.erase(NLDI); | |
1491 | } | |
1492 | ||
1493 | // If we have a cached local dependence query for this instruction, remove it. | |
1494 | // | |
1495 | LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst); | |
1496 | if (LocalDepEntry != LocalDeps.end()) { | |
1497 | // Remove us from DepInst's reverse set now that the local dep info is gone. | |
1498 | if (Instruction *Inst = LocalDepEntry->second.getInst()) | |
1499 | RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst); | |
1500 | ||
1501 | // Remove this local dependency info. | |
1502 | LocalDeps.erase(LocalDepEntry); | |
1503 | } | |
1a4d82fc | 1504 | |
223e47cc LB |
1505 | // If we have any cached pointer dependencies on this instruction, remove |
1506 | // them. If the instruction has non-pointer type, then it can't be a pointer | |
1507 | // base. | |
1a4d82fc | 1508 | |
223e47cc LB |
1509 | // Remove it from both the load info and the store info. The instruction |
1510 | // can't be in either of these maps if it is non-pointer. | |
1511 | if (RemInst->getType()->isPointerTy()) { | |
1512 | RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); | |
1513 | RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); | |
1514 | } | |
1a4d82fc | 1515 | |
223e47cc | 1516 | // Loop over all of the things that depend on the instruction we're removing. |
1a4d82fc | 1517 | // |
223e47cc LB |
1518 | SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd; |
1519 | ||
1520 | // If we find RemInst as a clobber or Def in any of the maps for other values, | |
1521 | // we need to replace its entry with a dirty version of the instruction after | |
1522 | // it. If RemInst is a terminator, we use a null dirty value. | |
1523 | // | |
1524 | // Using a dirty version of the instruction after RemInst saves having to scan | |
1525 | // the entire block to get to this point. | |
1526 | MemDepResult NewDirtyVal; | |
1527 | if (!RemInst->isTerminator()) | |
1528 | NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); | |
1a4d82fc | 1529 | |
223e47cc LB |
1530 | ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); |
1531 | if (ReverseDepIt != ReverseLocalDeps.end()) { | |
223e47cc | 1532 | // RemInst can't be the terminator if it has local stuff depending on it. |
1a4d82fc | 1533 | assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) && |
223e47cc | 1534 | "Nothing can locally depend on a terminator"); |
1a4d82fc JJ |
1535 | |
1536 | for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) { | |
223e47cc LB |
1537 | assert(InstDependingOnRemInst != RemInst && |
1538 | "Already removed our local dep info"); | |
1a4d82fc | 1539 | |
223e47cc | 1540 | LocalDeps[InstDependingOnRemInst] = NewDirtyVal; |
1a4d82fc | 1541 | |
223e47cc LB |
1542 | // Make sure to remember that new things depend on NewDepInst. |
1543 | assert(NewDirtyVal.getInst() && "There is no way something else can have " | |
1544 | "a local dep on this if it is a terminator!"); | |
1a4d82fc | 1545 | ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), |
223e47cc LB |
1546 | InstDependingOnRemInst)); |
1547 | } | |
1a4d82fc | 1548 | |
223e47cc LB |
1549 | ReverseLocalDeps.erase(ReverseDepIt); |
1550 | ||
1551 | // Add new reverse deps after scanning the set, to avoid invalidating the | |
1552 | // 'ReverseDeps' reference. | |
1553 | while (!ReverseDepsToAdd.empty()) { | |
1554 | ReverseLocalDeps[ReverseDepsToAdd.back().first] | |
1555 | .insert(ReverseDepsToAdd.back().second); | |
1556 | ReverseDepsToAdd.pop_back(); | |
1557 | } | |
1558 | } | |
1a4d82fc | 1559 | |
223e47cc LB |
1560 | ReverseDepIt = ReverseNonLocalDeps.find(RemInst); |
1561 | if (ReverseDepIt != ReverseNonLocalDeps.end()) { | |
1a4d82fc JJ |
1562 | for (Instruction *I : ReverseDepIt->second) { |
1563 | assert(I != RemInst && "Already removed NonLocalDep info for RemInst"); | |
1564 | ||
1565 | PerInstNLInfo &INLD = NonLocalDeps[I]; | |
223e47cc LB |
1566 | // The information is now dirty! |
1567 | INLD.second = true; | |
1a4d82fc JJ |
1568 | |
1569 | for (NonLocalDepInfo::iterator DI = INLD.first.begin(), | |
223e47cc LB |
1570 | DE = INLD.first.end(); DI != DE; ++DI) { |
1571 | if (DI->getResult().getInst() != RemInst) continue; | |
1a4d82fc | 1572 | |
223e47cc LB |
1573 | // Convert to a dirty entry for the subsequent instruction. |
1574 | DI->setResult(NewDirtyVal); | |
1a4d82fc | 1575 | |
223e47cc | 1576 | if (Instruction *NextI = NewDirtyVal.getInst()) |
1a4d82fc | 1577 | ReverseDepsToAdd.push_back(std::make_pair(NextI, I)); |
223e47cc LB |
1578 | } |
1579 | } | |
1580 | ||
1581 | ReverseNonLocalDeps.erase(ReverseDepIt); | |
1582 | ||
1583 | // Add new reverse deps after scanning the set, to avoid invalidating 'Set' | |
1584 | while (!ReverseDepsToAdd.empty()) { | |
1585 | ReverseNonLocalDeps[ReverseDepsToAdd.back().first] | |
1586 | .insert(ReverseDepsToAdd.back().second); | |
1587 | ReverseDepsToAdd.pop_back(); | |
1588 | } | |
1589 | } | |
1a4d82fc | 1590 | |
223e47cc LB |
1591 | // If the instruction is in ReverseNonLocalPtrDeps then it appears as a |
1592 | // value in the NonLocalPointerDeps info. | |
1593 | ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = | |
1594 | ReverseNonLocalPtrDeps.find(RemInst); | |
1595 | if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { | |
223e47cc | 1596 | SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd; |
1a4d82fc JJ |
1597 | |
1598 | for (ValueIsLoadPair P : ReversePtrDepIt->second) { | |
223e47cc LB |
1599 | assert(P.getPointer() != RemInst && |
1600 | "Already removed NonLocalPointerDeps info for RemInst"); | |
1a4d82fc | 1601 | |
223e47cc | 1602 | NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; |
1a4d82fc | 1603 | |
223e47cc LB |
1604 | // The cache is not valid for any specific block anymore. |
1605 | NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); | |
1a4d82fc | 1606 | |
223e47cc LB |
1607 | // Update any entries for RemInst to use the instruction after it. |
1608 | for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); | |
1609 | DI != DE; ++DI) { | |
1610 | if (DI->getResult().getInst() != RemInst) continue; | |
1a4d82fc | 1611 | |
223e47cc LB |
1612 | // Convert to a dirty entry for the subsequent instruction. |
1613 | DI->setResult(NewDirtyVal); | |
1a4d82fc | 1614 | |
223e47cc LB |
1615 | if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) |
1616 | ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); | |
1617 | } | |
1a4d82fc | 1618 | |
223e47cc LB |
1619 | // Re-sort the NonLocalDepInfo. Changing the dirty entry to its |
1620 | // subsequent value may invalidate the sortedness. | |
1621 | std::sort(NLPDI.begin(), NLPDI.end()); | |
1622 | } | |
1a4d82fc | 1623 | |
223e47cc | 1624 | ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); |
1a4d82fc | 1625 | |
223e47cc LB |
1626 | while (!ReversePtrDepsToAdd.empty()) { |
1627 | ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] | |
1628 | .insert(ReversePtrDepsToAdd.back().second); | |
1629 | ReversePtrDepsToAdd.pop_back(); | |
1630 | } | |
1631 | } | |
1a4d82fc JJ |
1632 | |
1633 | ||
223e47cc LB |
1634 | assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); |
1635 | AA->deleteValue(RemInst); | |
1636 | DEBUG(verifyRemoved(RemInst)); | |
1637 | } | |
1638 | /// verifyRemoved - Verify that the specified instruction does not occur | |
1a4d82fc JJ |
1639 | /// in our internal data structures. This function verifies by asserting in |
1640 | /// debug builds. | |
223e47cc | 1641 | void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { |
1a4d82fc | 1642 | #ifndef NDEBUG |
223e47cc LB |
1643 | for (LocalDepMapType::const_iterator I = LocalDeps.begin(), |
1644 | E = LocalDeps.end(); I != E; ++I) { | |
1645 | assert(I->first != D && "Inst occurs in data structures"); | |
1646 | assert(I->second.getInst() != D && | |
1647 | "Inst occurs in data structures"); | |
1648 | } | |
1a4d82fc | 1649 | |
223e47cc LB |
1650 | for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), |
1651 | E = NonLocalPointerDeps.end(); I != E; ++I) { | |
1652 | assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); | |
1653 | const NonLocalDepInfo &Val = I->second.NonLocalDeps; | |
1654 | for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end(); | |
1655 | II != E; ++II) | |
1656 | assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); | |
1657 | } | |
1a4d82fc | 1658 | |
223e47cc LB |
1659 | for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), |
1660 | E = NonLocalDeps.end(); I != E; ++I) { | |
1661 | assert(I->first != D && "Inst occurs in data structures"); | |
1662 | const PerInstNLInfo &INLD = I->second; | |
1663 | for (NonLocalDepInfo::const_iterator II = INLD.first.begin(), | |
1664 | EE = INLD.first.end(); II != EE; ++II) | |
1665 | assert(II->getResult().getInst() != D && "Inst occurs in data structures"); | |
1666 | } | |
1a4d82fc | 1667 | |
223e47cc LB |
1668 | for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), |
1669 | E = ReverseLocalDeps.end(); I != E; ++I) { | |
1670 | assert(I->first != D && "Inst occurs in data structures"); | |
1a4d82fc JJ |
1671 | for (Instruction *Inst : I->second) |
1672 | assert(Inst != D && "Inst occurs in data structures"); | |
223e47cc | 1673 | } |
1a4d82fc | 1674 | |
223e47cc LB |
1675 | for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), |
1676 | E = ReverseNonLocalDeps.end(); | |
1677 | I != E; ++I) { | |
1678 | assert(I->first != D && "Inst occurs in data structures"); | |
1a4d82fc JJ |
1679 | for (Instruction *Inst : I->second) |
1680 | assert(Inst != D && "Inst occurs in data structures"); | |
223e47cc | 1681 | } |
1a4d82fc | 1682 | |
223e47cc LB |
1683 | for (ReverseNonLocalPtrDepTy::const_iterator |
1684 | I = ReverseNonLocalPtrDeps.begin(), | |
1685 | E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { | |
1686 | assert(I->first != D && "Inst occurs in rev NLPD map"); | |
1a4d82fc JJ |
1687 | |
1688 | for (ValueIsLoadPair P : I->second) | |
1689 | assert(P != ValueIsLoadPair(D, false) && | |
1690 | P != ValueIsLoadPair(D, true) && | |
223e47cc LB |
1691 | "Inst occurs in ReverseNonLocalPtrDeps map"); |
1692 | } | |
1a4d82fc | 1693 | #endif |
223e47cc | 1694 | } |