]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | //===-- AtomicExpandPass.cpp - Expand atomic instructions -------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file contains a pass (at IR level) to replace atomic instructions with | |
11 | // either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg. | |
12 | // | |
13 | //===----------------------------------------------------------------------===// | |
14 | ||
15 | #include "llvm/CodeGen/Passes.h" | |
16 | #include "llvm/IR/Function.h" | |
17 | #include "llvm/IR/IRBuilder.h" | |
18 | #include "llvm/IR/InstIterator.h" | |
19 | #include "llvm/IR/Instructions.h" | |
20 | #include "llvm/IR/Intrinsics.h" | |
21 | #include "llvm/IR/Module.h" | |
22 | #include "llvm/Support/Debug.h" | |
23 | #include "llvm/Target/TargetLowering.h" | |
24 | #include "llvm/Target/TargetMachine.h" | |
25 | #include "llvm/Target/TargetSubtargetInfo.h" | |
26 | ||
27 | using namespace llvm; | |
28 | ||
29 | #define DEBUG_TYPE "atomic-expand" | |
30 | ||
31 | namespace { | |
32 | class AtomicExpand: public FunctionPass { | |
33 | const TargetMachine *TM; | |
34 | public: | |
35 | static char ID; // Pass identification, replacement for typeid | |
36 | explicit AtomicExpand(const TargetMachine *TM = nullptr) | |
37 | : FunctionPass(ID), TM(TM) { | |
38 | initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); | |
39 | } | |
40 | ||
41 | bool runOnFunction(Function &F) override; | |
42 | ||
43 | private: | |
44 | bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, | |
45 | bool IsStore, bool IsLoad); | |
46 | bool expandAtomicLoad(LoadInst *LI); | |
47 | bool expandAtomicLoadToLL(LoadInst *LI); | |
48 | bool expandAtomicLoadToCmpXchg(LoadInst *LI); | |
49 | bool expandAtomicStore(StoreInst *SI); | |
50 | bool expandAtomicRMW(AtomicRMWInst *AI); | |
51 | bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); | |
52 | bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); | |
53 | bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); | |
54 | bool isIdempotentRMW(AtomicRMWInst *AI); | |
55 | bool simplifyIdempotentRMW(AtomicRMWInst *AI); | |
56 | }; | |
57 | } | |
58 | ||
59 | char AtomicExpand::ID = 0; | |
60 | char &llvm::AtomicExpandID = AtomicExpand::ID; | |
61 | INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", | |
62 | "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg", | |
63 | false, false) | |
64 | ||
65 | FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { | |
66 | return new AtomicExpand(TM); | |
67 | } | |
68 | ||
69 | bool AtomicExpand::runOnFunction(Function &F) { | |
70 | if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand()) | |
71 | return false; | |
72 | auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); | |
73 | ||
74 | SmallVector<Instruction *, 1> AtomicInsts; | |
75 | ||
76 | // Changing control-flow while iterating through it is a bad idea, so gather a | |
77 | // list of all atomic instructions before we start. | |
78 | for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { | |
79 | if (I->isAtomic()) | |
80 | AtomicInsts.push_back(&*I); | |
81 | } | |
82 | ||
83 | bool MadeChange = false; | |
84 | for (auto I : AtomicInsts) { | |
85 | auto LI = dyn_cast<LoadInst>(I); | |
86 | auto SI = dyn_cast<StoreInst>(I); | |
87 | auto RMWI = dyn_cast<AtomicRMWInst>(I); | |
88 | auto CASI = dyn_cast<AtomicCmpXchgInst>(I); | |
89 | assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) && | |
90 | "Unknown atomic instruction"); | |
91 | ||
92 | auto FenceOrdering = Monotonic; | |
93 | bool IsStore, IsLoad; | |
94 | if (TargetLowering->getInsertFencesForAtomic()) { | |
95 | if (LI && isAtLeastAcquire(LI->getOrdering())) { | |
96 | FenceOrdering = LI->getOrdering(); | |
97 | LI->setOrdering(Monotonic); | |
98 | IsStore = false; | |
99 | IsLoad = true; | |
100 | } else if (SI && isAtLeastRelease(SI->getOrdering())) { | |
101 | FenceOrdering = SI->getOrdering(); | |
102 | SI->setOrdering(Monotonic); | |
103 | IsStore = true; | |
104 | IsLoad = false; | |
105 | } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) || | |
106 | isAtLeastAcquire(RMWI->getOrdering()))) { | |
107 | FenceOrdering = RMWI->getOrdering(); | |
108 | RMWI->setOrdering(Monotonic); | |
109 | IsStore = IsLoad = true; | |
110 | } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() && | |
111 | (isAtLeastRelease(CASI->getSuccessOrdering()) || | |
112 | isAtLeastAcquire(CASI->getSuccessOrdering()))) { | |
113 | // If a compare and swap is lowered to LL/SC, we can do smarter fence | |
114 | // insertion, with a stronger one on the success path than on the | |
115 | // failure path. As a result, fence insertion is directly done by | |
116 | // expandAtomicCmpXchg in that case. | |
117 | FenceOrdering = CASI->getSuccessOrdering(); | |
118 | CASI->setSuccessOrdering(Monotonic); | |
119 | CASI->setFailureOrdering(Monotonic); | |
120 | IsStore = IsLoad = true; | |
121 | } | |
122 | ||
123 | if (FenceOrdering != Monotonic) { | |
124 | MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); | |
125 | } | |
126 | } | |
127 | ||
128 | if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) { | |
129 | MadeChange |= expandAtomicLoad(LI); | |
130 | } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) { | |
131 | MadeChange |= expandAtomicStore(SI); | |
132 | } else if (RMWI) { | |
133 | // There are two different ways of expanding RMW instructions: | |
134 | // - into a load if it is idempotent | |
135 | // - into a Cmpxchg/LL-SC loop otherwise | |
136 | // we try them in that order. | |
137 | MadeChange |= (isIdempotentRMW(RMWI) && | |
138 | simplifyIdempotentRMW(RMWI)) || | |
139 | (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) && | |
140 | expandAtomicRMW(RMWI)); | |
141 | } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) { | |
142 | MadeChange |= expandAtomicCmpXchg(CASI); | |
143 | } | |
144 | } | |
145 | return MadeChange; | |
146 | } | |
147 | ||
148 | bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, | |
149 | bool IsStore, bool IsLoad) { | |
150 | IRBuilder<> Builder(I); | |
151 | ||
152 | auto LeadingFence = | |
153 | TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence( | |
154 | Builder, Order, IsStore, IsLoad); | |
155 | ||
156 | auto TrailingFence = | |
157 | TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence( | |
158 | Builder, Order, IsStore, IsLoad); | |
159 | // The trailing fence is emitted before the instruction instead of after | |
160 | // because there is no easy way of setting Builder insertion point after | |
161 | // an instruction. So we must erase it from the BB, and insert it back | |
162 | // in the right place. | |
163 | // We have a guard here because not every atomic operation generates a | |
164 | // trailing fence. | |
165 | if (TrailingFence) { | |
166 | TrailingFence->removeFromParent(); | |
167 | TrailingFence->insertAfter(I); | |
168 | } | |
169 | ||
170 | return (LeadingFence || TrailingFence); | |
171 | } | |
172 | ||
173 | bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { | |
174 | if (TM->getSubtargetImpl() | |
175 | ->getTargetLowering() | |
176 | ->hasLoadLinkedStoreConditional()) | |
177 | return expandAtomicLoadToLL(LI); | |
178 | else | |
179 | return expandAtomicLoadToCmpXchg(LI); | |
180 | } | |
181 | ||
182 | bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { | |
183 | auto TLI = TM->getSubtargetImpl()->getTargetLowering(); | |
184 | IRBuilder<> Builder(LI); | |
185 | ||
186 | // On some architectures, load-linked instructions are atomic for larger | |
187 | // sizes than normal loads. For example, the only 64-bit load guaranteed | |
188 | // to be single-copy atomic by ARM is an ldrexd (A3.5.3). | |
189 | Value *Val = | |
190 | TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); | |
191 | ||
192 | LI->replaceAllUsesWith(Val); | |
193 | LI->eraseFromParent(); | |
194 | ||
195 | return true; | |
196 | } | |
197 | ||
198 | bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { | |
199 | IRBuilder<> Builder(LI); | |
200 | AtomicOrdering Order = LI->getOrdering(); | |
201 | Value *Addr = LI->getPointerOperand(); | |
202 | Type *Ty = cast<PointerType>(Addr->getType())->getElementType(); | |
203 | Constant *DummyVal = Constant::getNullValue(Ty); | |
204 | ||
205 | Value *Pair = Builder.CreateAtomicCmpXchg( | |
206 | Addr, DummyVal, DummyVal, Order, | |
207 | AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); | |
208 | Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); | |
209 | ||
210 | LI->replaceAllUsesWith(Loaded); | |
211 | LI->eraseFromParent(); | |
212 | ||
213 | return true; | |
214 | } | |
215 | ||
216 | bool AtomicExpand::expandAtomicStore(StoreInst *SI) { | |
217 | // This function is only called on atomic stores that are too large to be | |
218 | // atomic if implemented as a native store. So we replace them by an | |
219 | // atomic swap, that can be implemented for example as a ldrex/strex on ARM | |
220 | // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. | |
221 | // It is the responsibility of the target to only return true in | |
222 | // shouldExpandAtomicRMW in cases where this is required and possible. | |
223 | IRBuilder<> Builder(SI); | |
224 | AtomicRMWInst *AI = | |
225 | Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), | |
226 | SI->getValueOperand(), SI->getOrdering()); | |
227 | SI->eraseFromParent(); | |
228 | ||
229 | // Now we have an appropriate swap instruction, lower it as usual. | |
230 | return expandAtomicRMW(AI); | |
231 | } | |
232 | ||
233 | bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { | |
234 | if (TM->getSubtargetImpl() | |
235 | ->getTargetLowering() | |
236 | ->hasLoadLinkedStoreConditional()) | |
237 | return expandAtomicRMWToLLSC(AI); | |
238 | else | |
239 | return expandAtomicRMWToCmpXchg(AI); | |
240 | } | |
241 | ||
242 | /// Emit IR to implement the given atomicrmw operation on values in registers, | |
243 | /// returning the new value. | |
244 | static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, | |
245 | Value *Loaded, Value *Inc) { | |
246 | Value *NewVal; | |
247 | switch (Op) { | |
248 | case AtomicRMWInst::Xchg: | |
249 | return Inc; | |
250 | case AtomicRMWInst::Add: | |
251 | return Builder.CreateAdd(Loaded, Inc, "new"); | |
252 | case AtomicRMWInst::Sub: | |
253 | return Builder.CreateSub(Loaded, Inc, "new"); | |
254 | case AtomicRMWInst::And: | |
255 | return Builder.CreateAnd(Loaded, Inc, "new"); | |
256 | case AtomicRMWInst::Nand: | |
257 | return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); | |
258 | case AtomicRMWInst::Or: | |
259 | return Builder.CreateOr(Loaded, Inc, "new"); | |
260 | case AtomicRMWInst::Xor: | |
261 | return Builder.CreateXor(Loaded, Inc, "new"); | |
262 | case AtomicRMWInst::Max: | |
263 | NewVal = Builder.CreateICmpSGT(Loaded, Inc); | |
264 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |
265 | case AtomicRMWInst::Min: | |
266 | NewVal = Builder.CreateICmpSLE(Loaded, Inc); | |
267 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |
268 | case AtomicRMWInst::UMax: | |
269 | NewVal = Builder.CreateICmpUGT(Loaded, Inc); | |
270 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |
271 | case AtomicRMWInst::UMin: | |
272 | NewVal = Builder.CreateICmpULE(Loaded, Inc); | |
273 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |
274 | default: | |
275 | llvm_unreachable("Unknown atomic op"); | |
276 | } | |
277 | } | |
278 | ||
279 | bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { | |
280 | auto TLI = TM->getSubtargetImpl()->getTargetLowering(); | |
281 | AtomicOrdering MemOpOrder = AI->getOrdering(); | |
282 | Value *Addr = AI->getPointerOperand(); | |
283 | BasicBlock *BB = AI->getParent(); | |
284 | Function *F = BB->getParent(); | |
285 | LLVMContext &Ctx = F->getContext(); | |
286 | ||
287 | // Given: atomicrmw some_op iN* %addr, iN %incr ordering | |
288 | // | |
289 | // The standard expansion we produce is: | |
290 | // [...] | |
291 | // fence? | |
292 | // atomicrmw.start: | |
293 | // %loaded = @load.linked(%addr) | |
294 | // %new = some_op iN %loaded, %incr | |
295 | // %stored = @store_conditional(%new, %addr) | |
296 | // %try_again = icmp i32 ne %stored, 0 | |
297 | // br i1 %try_again, label %loop, label %atomicrmw.end | |
298 | // atomicrmw.end: | |
299 | // fence? | |
300 | // [...] | |
301 | BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); | |
302 | BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); | |
303 | ||
304 | // This grabs the DebugLoc from AI. | |
305 | IRBuilder<> Builder(AI); | |
306 | ||
307 | // The split call above "helpfully" added a branch at the end of BB (to the | |
308 | // wrong place), but we might want a fence too. It's easiest to just remove | |
309 | // the branch entirely. | |
310 | std::prev(BB->end())->eraseFromParent(); | |
311 | Builder.SetInsertPoint(BB); | |
312 | Builder.CreateBr(LoopBB); | |
313 | ||
314 | // Start the main loop block now that we've taken care of the preliminaries. | |
315 | Builder.SetInsertPoint(LoopBB); | |
316 | Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); | |
317 | ||
318 | Value *NewVal = | |
319 | performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); | |
320 | ||
321 | Value *StoreSuccess = | |
322 | TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); | |
323 | Value *TryAgain = Builder.CreateICmpNE( | |
324 | StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); | |
325 | Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); | |
326 | ||
327 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | |
328 | ||
329 | AI->replaceAllUsesWith(Loaded); | |
330 | AI->eraseFromParent(); | |
331 | ||
332 | return true; | |
333 | } | |
334 | ||
335 | bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { | |
336 | AtomicOrdering MemOpOrder = | |
337 | AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); | |
338 | Value *Addr = AI->getPointerOperand(); | |
339 | BasicBlock *BB = AI->getParent(); | |
340 | Function *F = BB->getParent(); | |
341 | LLVMContext &Ctx = F->getContext(); | |
342 | ||
343 | // Given: atomicrmw some_op iN* %addr, iN %incr ordering | |
344 | // | |
345 | // The standard expansion we produce is: | |
346 | // [...] | |
347 | // %init_loaded = load atomic iN* %addr | |
348 | // br label %loop | |
349 | // loop: | |
350 | // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] | |
351 | // %new = some_op iN %loaded, %incr | |
352 | // %pair = cmpxchg iN* %addr, iN %loaded, iN %new | |
353 | // %new_loaded = extractvalue { iN, i1 } %pair, 0 | |
354 | // %success = extractvalue { iN, i1 } %pair, 1 | |
355 | // br i1 %success, label %atomicrmw.end, label %loop | |
356 | // atomicrmw.end: | |
357 | // [...] | |
358 | BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); | |
359 | BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); | |
360 | ||
361 | // This grabs the DebugLoc from AI. | |
362 | IRBuilder<> Builder(AI); | |
363 | ||
364 | // The split call above "helpfully" added a branch at the end of BB (to the | |
365 | // wrong place), but we want a load. It's easiest to just remove | |
366 | // the branch entirely. | |
367 | std::prev(BB->end())->eraseFromParent(); | |
368 | Builder.SetInsertPoint(BB); | |
369 | LoadInst *InitLoaded = Builder.CreateLoad(Addr); | |
370 | // Atomics require at least natural alignment. | |
371 | InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); | |
372 | Builder.CreateBr(LoopBB); | |
373 | ||
374 | // Start the main loop block now that we've taken care of the preliminaries. | |
375 | Builder.SetInsertPoint(LoopBB); | |
376 | PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); | |
377 | Loaded->addIncoming(InitLoaded, BB); | |
378 | ||
379 | Value *NewVal = | |
380 | performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); | |
381 | ||
382 | Value *Pair = Builder.CreateAtomicCmpXchg( | |
383 | Addr, Loaded, NewVal, MemOpOrder, | |
384 | AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); | |
385 | Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); | |
386 | Loaded->addIncoming(NewLoaded, LoopBB); | |
387 | ||
388 | Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); | |
389 | Builder.CreateCondBr(Success, ExitBB, LoopBB); | |
390 | ||
391 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | |
392 | ||
393 | AI->replaceAllUsesWith(NewLoaded); | |
394 | AI->eraseFromParent(); | |
395 | ||
396 | return true; | |
397 | } | |
398 | ||
399 | bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { | |
400 | auto TLI = TM->getSubtargetImpl()->getTargetLowering(); | |
401 | AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); | |
402 | AtomicOrdering FailureOrder = CI->getFailureOrdering(); | |
403 | Value *Addr = CI->getPointerOperand(); | |
404 | BasicBlock *BB = CI->getParent(); | |
405 | Function *F = BB->getParent(); | |
406 | LLVMContext &Ctx = F->getContext(); | |
407 | // If getInsertFencesForAtomic() returns true, then the target does not want | |
408 | // to deal with memory orders, and emitLeading/TrailingFence should take care | |
409 | // of everything. Otherwise, emitLeading/TrailingFence are no-op and we | |
410 | // should preserve the ordering. | |
411 | AtomicOrdering MemOpOrder = | |
412 | TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder; | |
413 | ||
414 | // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord | |
415 | // | |
416 | // The full expansion we produce is: | |
417 | // [...] | |
418 | // fence? | |
419 | // cmpxchg.start: | |
420 | // %loaded = @load.linked(%addr) | |
421 | // %should_store = icmp eq %loaded, %desired | |
422 | // br i1 %should_store, label %cmpxchg.trystore, | |
423 | // label %cmpxchg.failure | |
424 | // cmpxchg.trystore: | |
425 | // %stored = @store_conditional(%new, %addr) | |
426 | // %success = icmp eq i32 %stored, 0 | |
427 | // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure | |
428 | // cmpxchg.success: | |
429 | // fence? | |
430 | // br label %cmpxchg.end | |
431 | // cmpxchg.failure: | |
432 | // fence? | |
433 | // br label %cmpxchg.end | |
434 | // cmpxchg.end: | |
435 | // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] | |
436 | // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 | |
437 | // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 | |
438 | // [...] | |
439 | BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); | |
440 | auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); | |
441 | auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); | |
442 | auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); | |
443 | auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); | |
444 | ||
445 | // This grabs the DebugLoc from CI | |
446 | IRBuilder<> Builder(CI); | |
447 | ||
448 | // The split call above "helpfully" added a branch at the end of BB (to the | |
449 | // wrong place), but we might want a fence too. It's easiest to just remove | |
450 | // the branch entirely. | |
451 | std::prev(BB->end())->eraseFromParent(); | |
452 | Builder.SetInsertPoint(BB); | |
453 | TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, | |
454 | /*IsLoad=*/true); | |
455 | Builder.CreateBr(LoopBB); | |
456 | ||
457 | // Start the main loop block now that we've taken care of the preliminaries. | |
458 | Builder.SetInsertPoint(LoopBB); | |
459 | Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); | |
460 | Value *ShouldStore = | |
461 | Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); | |
462 | ||
463 | // If the the cmpxchg doesn't actually need any ordering when it fails, we can | |
464 | // jump straight past that fence instruction (if it exists). | |
465 | Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); | |
466 | ||
467 | Builder.SetInsertPoint(TryStoreBB); | |
468 | Value *StoreSuccess = TLI->emitStoreConditional( | |
469 | Builder, CI->getNewValOperand(), Addr, MemOpOrder); | |
470 | StoreSuccess = Builder.CreateICmpEQ( | |
471 | StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); | |
472 | Builder.CreateCondBr(StoreSuccess, SuccessBB, | |
473 | CI->isWeak() ? FailureBB : LoopBB); | |
474 | ||
475 | // Make sure later instructions don't get reordered with a fence if necessary. | |
476 | Builder.SetInsertPoint(SuccessBB); | |
477 | TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, | |
478 | /*IsLoad=*/true); | |
479 | Builder.CreateBr(ExitBB); | |
480 | ||
481 | Builder.SetInsertPoint(FailureBB); | |
482 | TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, | |
483 | /*IsLoad=*/true); | |
484 | Builder.CreateBr(ExitBB); | |
485 | ||
486 | // Finally, we have control-flow based knowledge of whether the cmpxchg | |
487 | // succeeded or not. We expose this to later passes by converting any | |
488 | // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. | |
489 | ||
490 | // Setup the builder so we can create any PHIs we need. | |
491 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | |
492 | PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); | |
493 | Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); | |
494 | Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); | |
495 | ||
496 | // Look for any users of the cmpxchg that are just comparing the loaded value | |
497 | // against the desired one, and replace them with the CFG-derived version. | |
498 | SmallVector<ExtractValueInst *, 2> PrunedInsts; | |
499 | for (auto User : CI->users()) { | |
500 | ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); | |
501 | if (!EV) | |
502 | continue; | |
503 | ||
504 | assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && | |
505 | "weird extraction from { iN, i1 }"); | |
506 | ||
507 | if (EV->getIndices()[0] == 0) | |
508 | EV->replaceAllUsesWith(Loaded); | |
509 | else | |
510 | EV->replaceAllUsesWith(Success); | |
511 | ||
512 | PrunedInsts.push_back(EV); | |
513 | } | |
514 | ||
515 | // We can remove the instructions now we're no longer iterating through them. | |
516 | for (auto EV : PrunedInsts) | |
517 | EV->eraseFromParent(); | |
518 | ||
519 | if (!CI->use_empty()) { | |
520 | // Some use of the full struct return that we don't understand has happened, | |
521 | // so we've got to reconstruct it properly. | |
522 | Value *Res; | |
523 | Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); | |
524 | Res = Builder.CreateInsertValue(Res, Success, 1); | |
525 | ||
526 | CI->replaceAllUsesWith(Res); | |
527 | } | |
528 | ||
529 | CI->eraseFromParent(); | |
530 | return true; | |
531 | } | |
532 | ||
533 | bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { | |
534 | auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); | |
535 | if(!C) | |
536 | return false; | |
537 | ||
538 | AtomicRMWInst::BinOp Op = RMWI->getOperation(); | |
539 | switch(Op) { | |
540 | case AtomicRMWInst::Add: | |
541 | case AtomicRMWInst::Sub: | |
542 | case AtomicRMWInst::Or: | |
543 | case AtomicRMWInst::Xor: | |
544 | return C->isZero(); | |
545 | case AtomicRMWInst::And: | |
546 | return C->isMinusOne(); | |
547 | // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... | |
548 | default: | |
549 | return false; | |
550 | } | |
551 | } | |
552 | ||
553 | bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { | |
554 | auto TLI = TM->getSubtargetImpl()->getTargetLowering(); | |
555 | ||
556 | if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { | |
557 | if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) | |
558 | expandAtomicLoad(ResultingLoad); | |
559 | return true; | |
560 | } | |
561 | ||
562 | return false; | |
563 | } |