]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This pass identifies loops where we can generate the PPC branch instructions | |
11 | // that decrement and test the count register (CTR) (bdnz and friends). | |
223e47cc LB |
12 | // |
13 | // The pattern that defines the induction variable can changed depending on | |
14 | // prior optimizations. For example, the IndVarSimplify phase run by 'opt' | |
15 | // normalizes induction variables, and the Loop Strength Reduction pass | |
16 | // run by 'llc' may also make changes to the induction variable. | |
223e47cc LB |
17 | // |
18 | // Criteria for CTR loops: | |
19 | // - Countable loops (w/ ind. var for a trip count) | |
223e47cc LB |
20 | // - Try inner-most loops first |
21 | // - No nested CTR loops. | |
22 | // - No function calls in loops. | |
23 | // | |
223e47cc LB |
24 | //===----------------------------------------------------------------------===// |
25 | ||
1a4d82fc | 26 | #include "llvm/Transforms/Scalar.h" |
223e47cc | 27 | #include "PPC.h" |
970d7e83 | 28 | #include "PPCTargetMachine.h" |
1a4d82fc | 29 | #include "llvm/ADT/STLExtras.h" |
223e47cc | 30 | #include "llvm/ADT/Statistic.h" |
1a4d82fc JJ |
31 | #include "llvm/Analysis/LoopInfo.h" |
32 | #include "llvm/Analysis/ScalarEvolutionExpander.h" | |
970d7e83 | 33 | #include "llvm/IR/Constants.h" |
1a4d82fc JJ |
34 | #include "llvm/IR/DerivedTypes.h" |
35 | #include "llvm/IR/Dominators.h" | |
36 | #include "llvm/IR/InlineAsm.h" | |
37 | #include "llvm/IR/Instructions.h" | |
38 | #include "llvm/IR/IntrinsicInst.h" | |
39 | #include "llvm/IR/Module.h" | |
40 | #include "llvm/IR/ValueHandle.h" | |
970d7e83 | 41 | #include "llvm/PassSupport.h" |
1a4d82fc | 42 | #include "llvm/Support/CommandLine.h" |
223e47cc LB |
43 | #include "llvm/Support/Debug.h" |
44 | #include "llvm/Support/raw_ostream.h" | |
1a4d82fc JJ |
45 | #include "llvm/Target/TargetLibraryInfo.h" |
46 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |
47 | #include "llvm/Transforms/Utils/Local.h" | |
48 | #include "llvm/Transforms/Utils/LoopUtils.h" | |
49 | ||
50 | #ifndef NDEBUG | |
51 | #include "llvm/CodeGen/MachineDominators.h" | |
52 | #include "llvm/CodeGen/MachineFunction.h" | |
53 | #include "llvm/CodeGen/MachineFunctionPass.h" | |
54 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |
55 | #endif | |
56 | ||
223e47cc | 57 | #include <algorithm> |
1a4d82fc | 58 | #include <vector> |
223e47cc LB |
59 | |
60 | using namespace llvm; | |
61 | ||
1a4d82fc JJ |
62 | #define DEBUG_TYPE "ctrloops" |
63 | ||
64 | #ifndef NDEBUG | |
65 | static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); | |
66 | #endif | |
67 | ||
223e47cc LB |
68 | STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); |
69 | ||
970d7e83 LB |
70 | namespace llvm { |
71 | void initializePPCCTRLoopsPass(PassRegistry&); | |
1a4d82fc JJ |
72 | #ifndef NDEBUG |
73 | void initializePPCCTRLoopsVerifyPass(PassRegistry&); | |
74 | #endif | |
970d7e83 LB |
75 | } |
76 | ||
223e47cc | 77 | namespace { |
1a4d82fc JJ |
78 | struct PPCCTRLoops : public FunctionPass { |
79 | ||
80 | #ifndef NDEBUG | |
81 | static int Counter; | |
82 | #endif | |
223e47cc LB |
83 | |
84 | public: | |
1a4d82fc | 85 | static char ID; |
223e47cc | 86 | |
1a4d82fc JJ |
87 | PPCCTRLoops() : FunctionPass(ID), TM(nullptr) { |
88 | initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); | |
89 | } | |
90 | PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { | |
970d7e83 LB |
91 | initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); |
92 | } | |
223e47cc | 93 | |
1a4d82fc | 94 | bool runOnFunction(Function &F) override; |
223e47cc | 95 | |
1a4d82fc JJ |
96 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
97 | AU.addRequired<LoopInfo>(); | |
98 | AU.addPreserved<LoopInfo>(); | |
99 | AU.addRequired<DominatorTreeWrapperPass>(); | |
100 | AU.addPreserved<DominatorTreeWrapperPass>(); | |
101 | AU.addRequired<ScalarEvolution>(); | |
223e47cc LB |
102 | } |
103 | ||
104 | private: | |
1a4d82fc JJ |
105 | bool mightUseCTR(const Triple &TT, BasicBlock *BB); |
106 | bool convertToCTRLoop(Loop *L); | |
107 | ||
108 | private: | |
109 | PPCTargetMachine *TM; | |
110 | LoopInfo *LI; | |
111 | ScalarEvolution *SE; | |
112 | const DataLayout *DL; | |
113 | DominatorTree *DT; | |
114 | const TargetLibraryInfo *LibInfo; | |
223e47cc LB |
115 | }; |
116 | ||
117 | char PPCCTRLoops::ID = 0; | |
1a4d82fc JJ |
118 | #ifndef NDEBUG |
119 | int PPCCTRLoops::Counter = 0; | |
120 | #endif | |
223e47cc | 121 | |
1a4d82fc JJ |
122 | #ifndef NDEBUG |
123 | struct PPCCTRLoopsVerify : public MachineFunctionPass { | |
223e47cc | 124 | public: |
1a4d82fc | 125 | static char ID; |
223e47cc | 126 | |
1a4d82fc JJ |
127 | PPCCTRLoopsVerify() : MachineFunctionPass(ID) { |
128 | initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); | |
223e47cc LB |
129 | } |
130 | ||
1a4d82fc JJ |
131 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
132 | AU.addRequired<MachineDominatorTree>(); | |
133 | MachineFunctionPass::getAnalysisUsage(AU); | |
223e47cc | 134 | } |
1a4d82fc JJ |
135 | |
136 | bool runOnMachineFunction(MachineFunction &MF) override; | |
137 | ||
138 | private: | |
139 | MachineDominatorTree *MDT; | |
223e47cc | 140 | }; |
1a4d82fc JJ |
141 | |
142 | char PPCCTRLoopsVerify::ID = 0; | |
143 | #endif // NDEBUG | |
223e47cc LB |
144 | } // end anonymous namespace |
145 | ||
970d7e83 LB |
146 | INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", |
147 | false, false) | |
1a4d82fc JJ |
148 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
149 | INITIALIZE_PASS_DEPENDENCY(LoopInfo) | |
150 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) | |
970d7e83 LB |
151 | INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", |
152 | false, false) | |
223e47cc | 153 | |
1a4d82fc JJ |
154 | FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) { |
155 | return new PPCCTRLoops(TM); | |
223e47cc LB |
156 | } |
157 | ||
1a4d82fc JJ |
158 | #ifndef NDEBUG |
159 | INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", | |
160 | "PowerPC CTR Loops Verify", false, false) | |
161 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) | |
162 | INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", | |
163 | "PowerPC CTR Loops Verify", false, false) | |
223e47cc | 164 | |
1a4d82fc JJ |
165 | FunctionPass *llvm::createPPCCTRLoopsVerify() { |
166 | return new PPCCTRLoopsVerify(); | |
223e47cc | 167 | } |
1a4d82fc | 168 | #endif // NDEBUG |
223e47cc | 169 | |
1a4d82fc JJ |
170 | bool PPCCTRLoops::runOnFunction(Function &F) { |
171 | LI = &getAnalysis<LoopInfo>(); | |
172 | SE = &getAnalysis<ScalarEvolution>(); | |
173 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |
174 | DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); | |
175 | DL = DLP ? &DLP->getDataLayout() : nullptr; | |
176 | LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>(); | |
223e47cc | 177 | |
1a4d82fc | 178 | bool MadeChange = false; |
223e47cc | 179 | |
1a4d82fc | 180 | for (LoopInfo::iterator I = LI->begin(), E = LI->end(); |
223e47cc | 181 | I != E; ++I) { |
1a4d82fc JJ |
182 | Loop *L = *I; |
183 | if (!L->getParentLoop()) | |
184 | MadeChange |= convertToCTRLoop(L); | |
223e47cc LB |
185 | } |
186 | ||
1a4d82fc | 187 | return MadeChange; |
223e47cc LB |
188 | } |
189 | ||
1a4d82fc JJ |
190 | static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { |
191 | if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) | |
192 | return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); | |
193 | ||
194 | return false; | |
223e47cc LB |
195 | } |
196 | ||
85aaf69f SL |
197 | // Determining the address of a TLS variable results in a function call in |
198 | // certain TLS models. | |
199 | static bool memAddrUsesCTR(const PPCTargetMachine *TM, | |
200 | const llvm::Value *MemAddr) { | |
201 | const auto *GV = dyn_cast<GlobalValue>(MemAddr); | |
202 | if (!GV) | |
203 | return false; | |
204 | if (!GV->isThreadLocal()) | |
205 | return false; | |
206 | if (!TM) | |
207 | return true; | |
208 | TLSModel::Model Model = TM->getTLSModel(GV); | |
209 | return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; | |
210 | } | |
211 | ||
1a4d82fc JJ |
212 | bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { |
213 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); | |
214 | J != JE; ++J) { | |
215 | if (CallInst *CI = dyn_cast<CallInst>(J)) { | |
216 | if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { | |
217 | // Inline ASM is okay, unless it clobbers the ctr register. | |
218 | InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); | |
219 | for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { | |
220 | InlineAsm::ConstraintInfo &C = CIV[i]; | |
221 | if (C.Type != InlineAsm::isInput) | |
222 | for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) | |
223 | if (StringRef(C.Codes[j]).equals_lower("{ctr}")) | |
224 | return true; | |
225 | } | |
223e47cc | 226 | |
1a4d82fc JJ |
227 | continue; |
228 | } | |
223e47cc | 229 | |
1a4d82fc JJ |
230 | if (!TM) |
231 | return true; | |
232 | const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); | |
233 | ||
234 | if (Function *F = CI->getCalledFunction()) { | |
235 | // Most intrinsics don't become function calls, but some might. | |
236 | // sin, cos, exp and log are always calls. | |
237 | unsigned Opcode; | |
238 | if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { | |
239 | switch (F->getIntrinsicID()) { | |
240 | default: continue; | |
241 | ||
242 | // VisualStudio defines setjmp as _setjmp | |
243 | #if defined(_MSC_VER) && defined(setjmp) && \ | |
244 | !defined(setjmp_undefined_for_msvc) | |
245 | # pragma push_macro("setjmp") | |
246 | # undef setjmp | |
247 | # define setjmp_undefined_for_msvc | |
248 | #endif | |
249 | ||
250 | case Intrinsic::setjmp: | |
251 | ||
252 | #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) | |
253 | // let's return it to _setjmp state | |
254 | # pragma pop_macro("setjmp") | |
255 | # undef setjmp_undefined_for_msvc | |
256 | #endif | |
257 | ||
258 | case Intrinsic::longjmp: | |
259 | ||
260 | // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp | |
261 | // because, although it does clobber the counter register, the | |
262 | // control can't then return to inside the loop unless there is also | |
263 | // an eh_sjlj_setjmp. | |
264 | case Intrinsic::eh_sjlj_setjmp: | |
265 | ||
266 | case Intrinsic::memcpy: | |
267 | case Intrinsic::memmove: | |
268 | case Intrinsic::memset: | |
269 | case Intrinsic::powi: | |
270 | case Intrinsic::log: | |
271 | case Intrinsic::log2: | |
272 | case Intrinsic::log10: | |
273 | case Intrinsic::exp: | |
274 | case Intrinsic::exp2: | |
275 | case Intrinsic::pow: | |
276 | case Intrinsic::sin: | |
277 | case Intrinsic::cos: | |
278 | return true; | |
279 | case Intrinsic::copysign: | |
280 | if (CI->getArgOperand(0)->getType()->getScalarType()-> | |
281 | isPPC_FP128Ty()) | |
282 | return true; | |
283 | else | |
284 | continue; // ISD::FCOPYSIGN is never a library call. | |
285 | case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; | |
286 | case Intrinsic::floor: Opcode = ISD::FFLOOR; break; | |
287 | case Intrinsic::ceil: Opcode = ISD::FCEIL; break; | |
288 | case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; | |
289 | case Intrinsic::rint: Opcode = ISD::FRINT; break; | |
290 | case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; | |
291 | case Intrinsic::round: Opcode = ISD::FROUND; break; | |
223e47cc | 292 | } |
1a4d82fc JJ |
293 | } |
294 | ||
295 | // PowerPC does not use [US]DIVREM or other library calls for | |
296 | // operations on regular types which are not otherwise library calls | |
297 | // (i.e. soft float or atomics). If adapting for targets that do, | |
298 | // additional care is required here. | |
299 | ||
300 | LibFunc::Func Func; | |
301 | if (!F->hasLocalLinkage() && F->hasName() && LibInfo && | |
302 | LibInfo->getLibFunc(F->getName(), Func) && | |
303 | LibInfo->hasOptimizedCodeGen(Func)) { | |
304 | // Non-read-only functions are never treated as intrinsics. | |
305 | if (!CI->onlyReadsMemory()) | |
306 | return true; | |
307 | ||
308 | // Conversion happens only for FP calls. | |
309 | if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) | |
310 | return true; | |
311 | ||
312 | switch (Func) { | |
313 | default: return true; | |
314 | case LibFunc::copysign: | |
315 | case LibFunc::copysignf: | |
316 | continue; // ISD::FCOPYSIGN is never a library call. | |
317 | case LibFunc::copysignl: | |
318 | return true; | |
319 | case LibFunc::fabs: | |
320 | case LibFunc::fabsf: | |
321 | case LibFunc::fabsl: | |
322 | continue; // ISD::FABS is never a library call. | |
323 | case LibFunc::sqrt: | |
324 | case LibFunc::sqrtf: | |
325 | case LibFunc::sqrtl: | |
326 | Opcode = ISD::FSQRT; break; | |
327 | case LibFunc::floor: | |
328 | case LibFunc::floorf: | |
329 | case LibFunc::floorl: | |
330 | Opcode = ISD::FFLOOR; break; | |
331 | case LibFunc::nearbyint: | |
332 | case LibFunc::nearbyintf: | |
333 | case LibFunc::nearbyintl: | |
334 | Opcode = ISD::FNEARBYINT; break; | |
335 | case LibFunc::ceil: | |
336 | case LibFunc::ceilf: | |
337 | case LibFunc::ceill: | |
338 | Opcode = ISD::FCEIL; break; | |
339 | case LibFunc::rint: | |
340 | case LibFunc::rintf: | |
341 | case LibFunc::rintl: | |
342 | Opcode = ISD::FRINT; break; | |
343 | case LibFunc::round: | |
344 | case LibFunc::roundf: | |
345 | case LibFunc::roundl: | |
346 | Opcode = ISD::FROUND; break; | |
347 | case LibFunc::trunc: | |
348 | case LibFunc::truncf: | |
349 | case LibFunc::truncl: | |
350 | Opcode = ISD::FTRUNC; break; | |
223e47cc | 351 | } |
1a4d82fc JJ |
352 | |
353 | MVT VTy = | |
354 | TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true); | |
355 | if (VTy == MVT::Other) | |
356 | return true; | |
357 | ||
358 | if (TLI->isOperationLegalOrCustom(Opcode, VTy)) | |
359 | continue; | |
360 | else if (VTy.isVector() && | |
361 | TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType())) | |
362 | continue; | |
363 | ||
364 | return true; | |
223e47cc | 365 | } |
223e47cc | 366 | } |
1a4d82fc JJ |
367 | |
368 | return true; | |
369 | } else if (isa<BinaryOperator>(J) && | |
370 | J->getType()->getScalarType()->isPPC_FP128Ty()) { | |
371 | // Most operations on ppc_f128 values become calls. | |
372 | return true; | |
373 | } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || | |
374 | isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { | |
375 | CastInst *CI = cast<CastInst>(J); | |
376 | if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || | |
377 | CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || | |
378 | isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) || | |
379 | isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType())) | |
380 | return true; | |
381 | } else if (isLargeIntegerTy(TT.isArch32Bit(), | |
382 | J->getType()->getScalarType()) && | |
383 | (J->getOpcode() == Instruction::UDiv || | |
384 | J->getOpcode() == Instruction::SDiv || | |
385 | J->getOpcode() == Instruction::URem || | |
386 | J->getOpcode() == Instruction::SRem)) { | |
387 | return true; | |
388 | } else if (TT.isArch32Bit() && | |
389 | isLargeIntegerTy(false, J->getType()->getScalarType()) && | |
390 | (J->getOpcode() == Instruction::Shl || | |
391 | J->getOpcode() == Instruction::AShr || | |
392 | J->getOpcode() == Instruction::LShr)) { | |
393 | // Only on PPC32, for 128-bit integers (specifically not 64-bit | |
394 | // integers), these might be runtime calls. | |
395 | return true; | |
396 | } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { | |
397 | // On PowerPC, indirect jumps use the counter register. | |
398 | return true; | |
399 | } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { | |
400 | if (!TM) | |
401 | return true; | |
402 | const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); | |
403 | ||
404 | if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) | |
405 | return true; | |
223e47cc | 406 | } |
85aaf69f SL |
407 | for (Value *Operand : J->operands()) |
408 | if (memAddrUsesCTR(TM, Operand)) | |
409 | return true; | |
223e47cc | 410 | } |
223e47cc | 411 | |
1a4d82fc | 412 | return false; |
223e47cc LB |
413 | } |
414 | ||
1a4d82fc JJ |
415 | bool PPCCTRLoops::convertToCTRLoop(Loop *L) { |
416 | bool MadeChange = false; | |
223e47cc | 417 | |
1a4d82fc JJ |
418 | Triple TT = Triple(L->getHeader()->getParent()->getParent()-> |
419 | getTargetTriple()); | |
420 | if (!TT.isArch32Bit() && !TT.isArch64Bit()) | |
421 | return MadeChange; // Unknown arch. type. | |
422 | ||
423 | // Process nested loops first. | |
424 | for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { | |
425 | MadeChange |= convertToCTRLoop(*I); | |
223e47cc | 426 | } |
1a4d82fc JJ |
427 | |
428 | // If a nested loop has been converted, then we can't convert this loop. | |
429 | if (MadeChange) | |
430 | return MadeChange; | |
431 | ||
432 | #ifndef NDEBUG | |
433 | // Stop trying after reaching the limit (if any). | |
434 | int Limit = CTRLoopLimit; | |
435 | if (Limit >= 0) { | |
436 | if (Counter >= CTRLoopLimit) | |
437 | return false; | |
438 | Counter++; | |
223e47cc | 439 | } |
1a4d82fc JJ |
440 | #endif |
441 | ||
442 | // We don't want to spill/restore the counter register, and so we don't | |
443 | // want to use the counter register if the loop contains calls. | |
444 | for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); | |
445 | I != IE; ++I) | |
446 | if (mightUseCTR(TT, *I)) | |
447 | return MadeChange; | |
448 | ||
449 | SmallVector<BasicBlock*, 4> ExitingBlocks; | |
450 | L->getExitingBlocks(ExitingBlocks); | |
451 | ||
452 | BasicBlock *CountedExitBlock = nullptr; | |
453 | const SCEV *ExitCount = nullptr; | |
454 | BranchInst *CountedExitBranch = nullptr; | |
455 | for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), | |
456 | IE = ExitingBlocks.end(); I != IE; ++I) { | |
457 | const SCEV *EC = SE->getExitCount(L, *I); | |
458 | DEBUG(dbgs() << "Exit Count for " << *L << " from block " << | |
459 | (*I)->getName() << ": " << *EC << "\n"); | |
460 | if (isa<SCEVCouldNotCompute>(EC)) | |
461 | continue; | |
462 | if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { | |
463 | if (ConstEC->getValue()->isZero()) | |
464 | continue; | |
465 | } else if (!SE->isLoopInvariant(EC, L)) | |
466 | continue; | |
467 | ||
468 | if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32)) | |
469 | continue; | |
470 | ||
471 | // We now have a loop-invariant count of loop iterations (which is not the | |
472 | // constant zero) for which we know that this loop will not exit via this | |
473 | // exisiting block. | |
474 | ||
475 | // We need to make sure that this block will run on every loop iteration. | |
476 | // For this to be true, we must dominate all blocks with backedges. Such | |
477 | // blocks are in-loop predecessors to the header block. | |
478 | bool NotAlways = false; | |
479 | for (pred_iterator PI = pred_begin(L->getHeader()), | |
480 | PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { | |
481 | if (!L->contains(*PI)) | |
482 | continue; | |
223e47cc | 483 | |
1a4d82fc JJ |
484 | if (!DT->dominates(*I, *PI)) { |
485 | NotAlways = true; | |
486 | break; | |
223e47cc LB |
487 | } |
488 | } | |
1a4d82fc JJ |
489 | |
490 | if (NotAlways) | |
491 | continue; | |
492 | ||
493 | // Make sure this blocks ends with a conditional branch. | |
494 | Instruction *TI = (*I)->getTerminator(); | |
495 | if (!TI) | |
496 | continue; | |
497 | ||
498 | if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { | |
499 | if (!BI->isConditional()) | |
500 | continue; | |
501 | ||
502 | CountedExitBranch = BI; | |
503 | } else | |
504 | continue; | |
505 | ||
506 | // Note that this block may not be the loop latch block, even if the loop | |
507 | // has a latch block. | |
508 | CountedExitBlock = *I; | |
509 | ExitCount = EC; | |
510 | break; | |
223e47cc | 511 | } |
1a4d82fc JJ |
512 | |
513 | if (!CountedExitBlock) | |
514 | return MadeChange; | |
515 | ||
516 | BasicBlock *Preheader = L->getLoopPreheader(); | |
517 | ||
518 | // If we don't have a preheader, then insert one. If we already have a | |
519 | // preheader, then we can use it (except if the preheader contains a use of | |
520 | // the CTR register because some such uses might be reordered by the | |
521 | // selection DAG after the mtctr instruction). | |
522 | if (!Preheader || mightUseCTR(TT, Preheader)) | |
523 | Preheader = InsertPreheaderForLoop(L, this); | |
524 | if (!Preheader) | |
525 | return MadeChange; | |
526 | ||
527 | DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); | |
528 | ||
529 | // Insert the count into the preheader and replace the condition used by the | |
530 | // selected branch. | |
531 | MadeChange = true; | |
532 | ||
533 | SCEVExpander SCEVE(*SE, "loopcnt"); | |
534 | LLVMContext &C = SE->getContext(); | |
535 | Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : | |
536 | Type::getInt32Ty(C); | |
537 | if (!ExitCount->getType()->isPointerTy() && | |
538 | ExitCount->getType() != CountType) | |
539 | ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); | |
540 | ExitCount = SE->getAddExpr(ExitCount, | |
541 | SE->getConstant(CountType, 1)); | |
542 | Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, | |
543 | Preheader->getTerminator()); | |
544 | ||
545 | IRBuilder<> CountBuilder(Preheader->getTerminator()); | |
546 | Module *M = Preheader->getParent()->getParent(); | |
547 | Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, | |
548 | CountType); | |
549 | CountBuilder.CreateCall(MTCTRFunc, ECValue); | |
550 | ||
551 | IRBuilder<> CondBuilder(CountedExitBranch); | |
552 | Value *DecFunc = | |
553 | Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); | |
554 | Value *NewCond = CondBuilder.CreateCall(DecFunc); | |
555 | Value *OldCond = CountedExitBranch->getCondition(); | |
556 | CountedExitBranch->setCondition(NewCond); | |
557 | ||
558 | // The false branch must exit the loop. | |
559 | if (!L->contains(CountedExitBranch->getSuccessor(0))) | |
560 | CountedExitBranch->swapSuccessors(); | |
561 | ||
562 | // The old condition may be dead now, and may have even created a dead PHI | |
563 | // (the original induction variable). | |
564 | RecursivelyDeleteTriviallyDeadInstructions(OldCond); | |
565 | DeleteDeadPHIs(CountedExitBlock); | |
566 | ||
567 | ++NumCTRLoops; | |
568 | return MadeChange; | |
223e47cc LB |
569 | } |
570 | ||
1a4d82fc JJ |
571 | #ifndef NDEBUG |
572 | static bool clobbersCTR(const MachineInstr *MI) { | |
223e47cc LB |
573 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { |
574 | const MachineOperand &MO = MI->getOperand(i); | |
1a4d82fc JJ |
575 | if (MO.isReg()) { |
576 | if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) | |
577 | return true; | |
578 | } else if (MO.isRegMask()) { | |
579 | if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) | |
580 | return true; | |
223e47cc LB |
581 | } |
582 | } | |
583 | ||
1a4d82fc | 584 | return false; |
223e47cc LB |
585 | } |
586 | ||
1a4d82fc JJ |
587 | static bool verifyCTRBranch(MachineBasicBlock *MBB, |
588 | MachineBasicBlock::iterator I) { | |
589 | MachineBasicBlock::iterator BI = I; | |
590 | SmallSet<MachineBasicBlock *, 16> Visited; | |
591 | SmallVector<MachineBasicBlock *, 8> Preds; | |
592 | bool CheckPreds; | |
593 | ||
594 | if (I == MBB->begin()) { | |
595 | Visited.insert(MBB); | |
596 | goto queue_preds; | |
597 | } else | |
598 | --I; | |
599 | ||
600 | check_block: | |
601 | Visited.insert(MBB); | |
602 | if (I == MBB->end()) | |
603 | goto queue_preds; | |
604 | ||
605 | CheckPreds = true; | |
606 | for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { | |
607 | unsigned Opc = I->getOpcode(); | |
608 | if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { | |
609 | CheckPreds = false; | |
610 | break; | |
223e47cc LB |
611 | } |
612 | ||
1a4d82fc JJ |
613 | if (I != BI && clobbersCTR(I)) { |
614 | DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" << | |
615 | MBB->getFullName() << ") instruction " << *I << | |
616 | " clobbers CTR, invalidating " << "BB#" << | |
617 | BI->getParent()->getNumber() << " (" << | |
618 | BI->getParent()->getFullName() << ") instruction " << | |
619 | *BI << "\n"); | |
620 | return false; | |
223e47cc | 621 | } |
223e47cc | 622 | |
1a4d82fc JJ |
623 | if (I == IE) |
624 | break; | |
223e47cc LB |
625 | } |
626 | ||
1a4d82fc JJ |
627 | if (!CheckPreds && Preds.empty()) |
628 | return true; | |
223e47cc | 629 | |
1a4d82fc JJ |
630 | if (CheckPreds) { |
631 | queue_preds: | |
632 | if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { | |
633 | DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" << | |
634 | BI->getParent()->getNumber() << " (" << | |
635 | BI->getParent()->getFullName() << ") instruction " << | |
636 | *BI << "\n"); | |
223e47cc LB |
637 | return false; |
638 | } | |
1a4d82fc JJ |
639 | |
640 | for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), | |
641 | PIE = MBB->pred_end(); PI != PIE; ++PI) | |
642 | Preds.push_back(*PI); | |
223e47cc LB |
643 | } |
644 | ||
1a4d82fc JJ |
645 | do { |
646 | MBB = Preds.pop_back_val(); | |
647 | if (!Visited.count(MBB)) { | |
648 | I = MBB->getLastNonDebugInstr(); | |
649 | goto check_block; | |
223e47cc | 650 | } |
1a4d82fc | 651 | } while (!Preds.empty()); |
223e47cc | 652 | |
1a4d82fc JJ |
653 | return true; |
654 | } | |
223e47cc | 655 | |
1a4d82fc JJ |
656 | bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { |
657 | MDT = &getAnalysis<MachineDominatorTree>(); | |
658 | ||
659 | // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before | |
660 | // any other instructions that might clobber the ctr register. | |
661 | for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); | |
662 | I != IE; ++I) { | |
663 | MachineBasicBlock *MBB = I; | |
664 | if (!MDT->isReachableFromEntry(MBB)) | |
665 | continue; | |
666 | ||
667 | for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), | |
668 | MIIE = MBB->end(); MII != MIIE; ++MII) { | |
669 | unsigned Opc = MII->getOpcode(); | |
670 | if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || | |
671 | Opc == PPC::BDZ8 || Opc == PPC::BDZ) | |
672 | if (!verifyCTRBranch(MBB, MII)) | |
673 | llvm_unreachable("Invalid PPC CTR loop!"); | |
674 | } | |
675 | } | |
223e47cc | 676 | |
1a4d82fc | 677 | return false; |
223e47cc | 678 | } |
1a4d82fc | 679 | #endif // NDEBUG |
223e47cc | 680 |