]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This pass identifies loops where we can generate the Hexagon hardware | |
11 | // loop instruction. The hardware loop can perform loop branches with a | |
12 | // zero-cycle overhead. | |
13 | // | |
14 | // The pattern that defines the induction variable can changed depending on | |
15 | // prior optimizations. For example, the IndVarSimplify phase run by 'opt' | |
16 | // normalizes induction variables, and the Loop Strength Reduction pass | |
17 | // run by 'llc' may also make changes to the induction variable. | |
18 | // The pattern detected by this phase is due to running Strength Reduction. | |
19 | // | |
20 | // Criteria for hardware loops: | |
21 | // - Countable loops (w/ ind. var for a trip count) | |
22 | // - Assumes loops are normalized by IndVarSimplify | |
23 | // - Try inner-most loops first | |
24 | // - No nested hardware loops. | |
25 | // - No function calls in loops. | |
26 | // | |
27 | //===----------------------------------------------------------------------===// | |
28 | ||
970d7e83 | 29 | #include "llvm/ADT/SmallSet.h" |
1a4d82fc JJ |
30 | #include "Hexagon.h" |
31 | #include "HexagonTargetMachine.h" | |
223e47cc | 32 | #include "llvm/ADT/Statistic.h" |
223e47cc LB |
33 | #include "llvm/CodeGen/MachineDominators.h" |
34 | #include "llvm/CodeGen/MachineFunction.h" | |
35 | #include "llvm/CodeGen/MachineFunctionPass.h" | |
36 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |
37 | #include "llvm/CodeGen/MachineLoopInfo.h" | |
38 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |
970d7e83 LB |
39 | #include "llvm/PassSupport.h" |
40 | #include "llvm/Support/CommandLine.h" | |
223e47cc LB |
41 | #include "llvm/Support/Debug.h" |
42 | #include "llvm/Support/raw_ostream.h" | |
43 | #include "llvm/Target/TargetInstrInfo.h" | |
44 | #include <algorithm> | |
970d7e83 | 45 | #include <vector> |
223e47cc LB |
46 | |
47 | using namespace llvm; | |
48 | ||
1a4d82fc JJ |
49 | #define DEBUG_TYPE "hwloops" |
50 | ||
970d7e83 LB |
51 | #ifndef NDEBUG |
52 | static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1)); | |
53 | #endif | |
54 | ||
223e47cc LB |
55 | STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); |
56 | ||
970d7e83 LB |
57 | namespace llvm { |
58 | void initializeHexagonHardwareLoopsPass(PassRegistry&); | |
59 | } | |
60 | ||
223e47cc LB |
61 | namespace { |
62 | class CountValue; | |
63 | struct HexagonHardwareLoops : public MachineFunctionPass { | |
970d7e83 LB |
64 | MachineLoopInfo *MLI; |
65 | MachineRegisterInfo *MRI; | |
66 | MachineDominatorTree *MDT; | |
67 | const HexagonTargetMachine *TM; | |
68 | const HexagonInstrInfo *TII; | |
69 | const HexagonRegisterInfo *TRI; | |
70 | #ifndef NDEBUG | |
71 | static int Counter; | |
72 | #endif | |
223e47cc LB |
73 | |
74 | public: | |
970d7e83 | 75 | static char ID; |
223e47cc | 76 | |
970d7e83 LB |
77 | HexagonHardwareLoops() : MachineFunctionPass(ID) { |
78 | initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry()); | |
79 | } | |
223e47cc | 80 | |
1a4d82fc | 81 | bool runOnMachineFunction(MachineFunction &MF) override; |
223e47cc | 82 | |
1a4d82fc | 83 | const char *getPassName() const override { return "Hexagon Hardware Loops"; } |
223e47cc | 84 | |
1a4d82fc | 85 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
223e47cc | 86 | AU.addRequired<MachineDominatorTree>(); |
223e47cc | 87 | AU.addRequired<MachineLoopInfo>(); |
223e47cc LB |
88 | MachineFunctionPass::getAnalysisUsage(AU); |
89 | } | |
90 | ||
91 | private: | |
970d7e83 LB |
92 | /// Kinds of comparisons in the compare instructions. |
93 | struct Comparison { | |
94 | enum Kind { | |
95 | EQ = 0x01, | |
96 | NE = 0x02, | |
97 | L = 0x04, // Less-than property. | |
98 | G = 0x08, // Greater-than property. | |
99 | U = 0x40, // Unsigned property. | |
100 | LTs = L, | |
101 | LEs = L | EQ, | |
102 | GTs = G, | |
103 | GEs = G | EQ, | |
104 | LTu = L | U, | |
105 | LEu = L | EQ | U, | |
106 | GTu = G | U, | |
107 | GEu = G | EQ | U | |
108 | }; | |
109 | ||
110 | static Kind getSwappedComparison(Kind Cmp) { | |
111 | assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator"); | |
112 | if ((Cmp & L) || (Cmp & G)) | |
113 | return (Kind)(Cmp ^ (L|G)); | |
114 | return Cmp; | |
115 | } | |
116 | }; | |
223e47cc | 117 | |
970d7e83 LB |
118 | /// \brief Find the register that contains the loop controlling |
119 | /// induction variable. | |
120 | /// If successful, it will return true and set the \p Reg, \p IVBump | |
121 | /// and \p IVOp arguments. Otherwise it will return false. | |
122 | /// The returned induction register is the register R that follows the | |
123 | /// following induction pattern: | |
124 | /// loop: | |
125 | /// R = phi ..., [ R.next, LatchBlock ] | |
126 | /// R.next = R + #bump | |
127 | /// if (R.next < #N) goto loop | |
128 | /// IVBump is the immediate value added to R, and IVOp is the instruction | |
129 | /// "R.next = R + #bump". | |
130 | bool findInductionRegister(MachineLoop *L, unsigned &Reg, | |
131 | int64_t &IVBump, MachineInstr *&IVOp) const; | |
132 | ||
133 | /// \brief Analyze the statements in a loop to determine if the loop | |
134 | /// has a computable trip count and, if so, return a value that represents | |
135 | /// the trip count expression. | |
136 | CountValue *getLoopTripCount(MachineLoop *L, | |
1a4d82fc | 137 | SmallVectorImpl<MachineInstr *> &OldInsts); |
970d7e83 LB |
138 | |
139 | /// \brief Return the expression that represents the number of times | |
140 | /// a loop iterates. The function takes the operands that represent the | |
141 | /// loop start value, loop end value, and induction value. Based upon | |
142 | /// these operands, the function attempts to compute the trip count. | |
143 | /// If the trip count is not directly available (as an immediate value, | |
144 | /// or a register), the function will attempt to insert computation of it | |
145 | /// to the loop's preheader. | |
146 | CountValue *computeCount(MachineLoop *Loop, | |
147 | const MachineOperand *Start, | |
148 | const MachineOperand *End, | |
149 | unsigned IVReg, | |
150 | int64_t IVBump, | |
151 | Comparison::Kind Cmp) const; | |
152 | ||
153 | /// \brief Return true if the instruction is not valid within a hardware | |
154 | /// loop. | |
223e47cc LB |
155 | bool isInvalidLoopOperation(const MachineInstr *MI) const; |
156 | ||
970d7e83 LB |
157 | /// \brief Return true if the loop contains an instruction that inhibits |
158 | /// using the hardware loop. | |
223e47cc LB |
159 | bool containsInvalidInstruction(MachineLoop *L) const; |
160 | ||
970d7e83 LB |
161 | /// \brief Given a loop, check if we can convert it to a hardware loop. |
162 | /// If so, then perform the conversion and return true. | |
223e47cc LB |
163 | bool convertToHardwareLoop(MachineLoop *L); |
164 | ||
970d7e83 LB |
165 | /// \brief Return true if the instruction is now dead. |
166 | bool isDead(const MachineInstr *MI, | |
1a4d82fc | 167 | SmallVectorImpl<MachineInstr *> &DeadPhis) const; |
970d7e83 LB |
168 | |
169 | /// \brief Remove the instruction if it is now dead. | |
170 | void removeIfDead(MachineInstr *MI); | |
171 | ||
172 | /// \brief Make sure that the "bump" instruction executes before the | |
173 | /// compare. We need that for the IV fixup, so that the compare | |
174 | /// instruction would not use a bumped value that has not yet been | |
175 | /// defined. If the instructions are out of order, try to reorder them. | |
176 | bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); | |
177 | ||
178 | /// \brief Get the instruction that loads an immediate value into \p R, | |
179 | /// or 0 if such an instruction does not exist. | |
180 | MachineInstr *defWithImmediate(unsigned R); | |
181 | ||
182 | /// \brief Get the immediate value referenced to by \p MO, either for | |
183 | /// immediate operands, or for register operands, where the register | |
184 | /// was defined with an immediate value. | |
185 | int64_t getImmediate(MachineOperand &MO); | |
186 | ||
187 | /// \brief Reset the given machine operand to now refer to a new immediate | |
188 | /// value. Assumes that the operand was already referencing an immediate | |
189 | /// value, either directly, or via a register. | |
190 | void setImmediate(MachineOperand &MO, int64_t Val); | |
191 | ||
192 | /// \brief Fix the data flow of the induction varible. | |
193 | /// The desired flow is: phi ---> bump -+-> comparison-in-latch. | |
194 | /// | | |
195 | /// +-> back to phi | |
196 | /// where "bump" is the increment of the induction variable: | |
197 | /// iv = iv + #const. | |
198 | /// Due to some prior code transformations, the actual flow may look | |
199 | /// like this: | |
200 | /// phi -+-> bump ---> back to phi | |
201 | /// | | |
202 | /// +-> comparison-in-latch (against upper_bound-bump), | |
203 | /// i.e. the comparison that controls the loop execution may be using | |
204 | /// the value of the induction variable from before the increment. | |
205 | /// | |
206 | /// Return true if the loop's flow is the desired one (i.e. it's | |
207 | /// either been fixed, or no fixing was necessary). | |
208 | /// Otherwise, return false. This can happen if the induction variable | |
209 | /// couldn't be identified, or if the value in the latch's comparison | |
210 | /// cannot be adjusted to reflect the post-bump value. | |
211 | bool fixupInductionVariable(MachineLoop *L); | |
212 | ||
213 | /// \brief Given a loop, if it does not have a preheader, create one. | |
214 | /// Return the block that is the preheader. | |
215 | MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); | |
223e47cc LB |
216 | }; |
217 | ||
218 | char HexagonHardwareLoops::ID = 0; | |
970d7e83 LB |
219 | #ifndef NDEBUG |
220 | int HexagonHardwareLoops::Counter = 0; | |
221 | #endif | |
223e47cc | 222 | |
1a4d82fc | 223 | /// \brief Abstraction for a trip count of a loop. A smaller version |
970d7e83 LB |
224 | /// of the MachineOperand class without the concerns of changing the |
225 | /// operand representation. | |
223e47cc LB |
226 | class CountValue { |
227 | public: | |
228 | enum CountValueType { | |
229 | CV_Register, | |
230 | CV_Immediate | |
231 | }; | |
232 | private: | |
233 | CountValueType Kind; | |
234 | union Values { | |
970d7e83 LB |
235 | struct { |
236 | unsigned Reg; | |
237 | unsigned Sub; | |
238 | } R; | |
239 | unsigned ImmVal; | |
223e47cc | 240 | } Contents; |
223e47cc LB |
241 | |
242 | public: | |
970d7e83 LB |
243 | explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) { |
244 | Kind = t; | |
245 | if (Kind == CV_Register) { | |
246 | Contents.R.Reg = v; | |
247 | Contents.R.Sub = u; | |
248 | } else { | |
249 | Contents.ImmVal = v; | |
250 | } | |
251 | } | |
223e47cc LB |
252 | bool isReg() const { return Kind == CV_Register; } |
253 | bool isImm() const { return Kind == CV_Immediate; } | |
223e47cc LB |
254 | |
255 | unsigned getReg() const { | |
256 | assert(isReg() && "Wrong CountValue accessor"); | |
970d7e83 | 257 | return Contents.R.Reg; |
223e47cc | 258 | } |
970d7e83 LB |
259 | unsigned getSubReg() const { |
260 | assert(isReg() && "Wrong CountValue accessor"); | |
261 | return Contents.R.Sub; | |
223e47cc | 262 | } |
970d7e83 | 263 | unsigned getImm() const { |
223e47cc | 264 | assert(isImm() && "Wrong CountValue accessor"); |
223e47cc LB |
265 | return Contents.ImmVal; |
266 | } | |
223e47cc | 267 | |
1a4d82fc JJ |
268 | void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const { |
269 | const TargetRegisterInfo *TRI = | |
270 | TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr; | |
970d7e83 LB |
271 | if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } |
272 | if (isImm()) { OS << Contents.ImmVal; } | |
223e47cc | 273 | } |
223e47cc | 274 | }; |
970d7e83 | 275 | } // end anonymous namespace |
223e47cc | 276 | |
223e47cc | 277 | |
970d7e83 LB |
278 | INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", |
279 | "Hexagon Hardware Loops", false, false) | |
280 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) | |
281 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) | |
282 | INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", | |
283 | "Hexagon Hardware Loops", false, false) | |
223e47cc LB |
284 | |
285 | ||
970d7e83 | 286 | /// \brief Returns true if the instruction is a hardware loop instruction. |
223e47cc | 287 | static bool isHardwareLoop(const MachineInstr *MI) { |
85aaf69f SL |
288 | return MI->getOpcode() == Hexagon::J2_loop0r || |
289 | MI->getOpcode() == Hexagon::J2_loop0i; | |
223e47cc LB |
290 | } |
291 | ||
223e47cc LB |
292 | FunctionPass *llvm::createHexagonHardwareLoops() { |
293 | return new HexagonHardwareLoops(); | |
294 | } | |
295 | ||
296 | ||
297 | bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { | |
298 | DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); | |
299 | ||
300 | bool Changed = false; | |
301 | ||
223e47cc | 302 | MLI = &getAnalysis<MachineLoopInfo>(); |
223e47cc | 303 | MRI = &MF.getRegInfo(); |
970d7e83 LB |
304 | MDT = &getAnalysis<MachineDominatorTree>(); |
305 | TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget()); | |
1a4d82fc JJ |
306 | TII = static_cast<const HexagonInstrInfo *>( |
307 | TM->getSubtargetImpl()->getInstrInfo()); | |
308 | TRI = static_cast<const HexagonRegisterInfo *>( | |
309 | TM->getSubtargetImpl()->getRegisterInfo()); | |
223e47cc LB |
310 | |
311 | for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); | |
312 | I != E; ++I) { | |
313 | MachineLoop *L = *I; | |
970d7e83 | 314 | if (!L->getParentLoop()) |
223e47cc | 315 | Changed |= convertToHardwareLoop(L); |
223e47cc LB |
316 | } |
317 | ||
318 | return Changed; | |
319 | } | |
320 | ||
970d7e83 LB |
321 | |
322 | bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, | |
323 | unsigned &Reg, | |
324 | int64_t &IVBump, | |
325 | MachineInstr *&IVOp | |
326 | ) const { | |
327 | MachineBasicBlock *Header = L->getHeader(); | |
328 | MachineBasicBlock *Preheader = L->getLoopPreheader(); | |
329 | MachineBasicBlock *Latch = L->getLoopLatch(); | |
330 | if (!Header || !Preheader || !Latch) | |
331 | return false; | |
332 | ||
333 | // This pair represents an induction register together with an immediate | |
334 | // value that will be added to it in each loop iteration. | |
335 | typedef std::pair<unsigned,int64_t> RegisterBump; | |
336 | ||
337 | // Mapping: R.next -> (R, bump), where R, R.next and bump are derived | |
338 | // from an induction operation | |
339 | // R.next = R + bump | |
340 | // where bump is an immediate value. | |
341 | typedef std::map<unsigned,RegisterBump> InductionMap; | |
342 | ||
343 | InductionMap IndMap; | |
344 | ||
345 | typedef MachineBasicBlock::instr_iterator instr_iterator; | |
346 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); | |
347 | I != E && I->isPHI(); ++I) { | |
348 | MachineInstr *Phi = &*I; | |
349 | ||
350 | // Have a PHI instruction. Get the operand that corresponds to the | |
351 | // latch block, and see if is a result of an addition of form "reg+imm", | |
352 | // where the "reg" is defined by the PHI node we are looking at. | |
353 | for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { | |
354 | if (Phi->getOperand(i+1).getMBB() != Latch) | |
355 | continue; | |
356 | ||
357 | unsigned PhiOpReg = Phi->getOperand(i).getReg(); | |
358 | MachineInstr *DI = MRI->getVRegDef(PhiOpReg); | |
359 | unsigned UpdOpc = DI->getOpcode(); | |
360 | bool isAdd = (UpdOpc == Hexagon::ADD_ri); | |
361 | ||
362 | if (isAdd) { | |
363 | // If the register operand to the add is the PHI we're | |
364 | // looking at, this meets the induction pattern. | |
365 | unsigned IndReg = DI->getOperand(1).getReg(); | |
366 | if (MRI->getVRegDef(IndReg) == Phi) { | |
367 | unsigned UpdReg = DI->getOperand(0).getReg(); | |
368 | int64_t V = DI->getOperand(2).getImm(); | |
369 | IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); | |
370 | } | |
371 | } | |
372 | } // for (i) | |
373 | } // for (instr) | |
374 | ||
375 | SmallVector<MachineOperand,2> Cond; | |
1a4d82fc | 376 | MachineBasicBlock *TB = nullptr, *FB = nullptr; |
970d7e83 LB |
377 | bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); |
378 | if (NotAnalyzed) | |
379 | return false; | |
380 | ||
381 | unsigned CSz = Cond.size(); | |
382 | assert (CSz == 1 || CSz == 2); | |
383 | unsigned PredR = Cond[CSz-1].getReg(); | |
384 | ||
385 | MachineInstr *PredI = MRI->getVRegDef(PredR); | |
386 | if (!PredI->isCompare()) | |
387 | return false; | |
388 | ||
389 | unsigned CmpReg1 = 0, CmpReg2 = 0; | |
390 | int CmpImm = 0, CmpMask = 0; | |
391 | bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2, | |
392 | CmpMask, CmpImm); | |
393 | // Fail if the compare was not analyzed, or it's not comparing a register | |
394 | // with an immediate value. Not checking the mask here, since we handle | |
395 | // the individual compare opcodes (including CMPb) later on. | |
396 | if (!CmpAnalyzed) | |
397 | return false; | |
398 | ||
399 | // Exactly one of the input registers to the comparison should be among | |
400 | // the induction registers. | |
401 | InductionMap::iterator IndMapEnd = IndMap.end(); | |
402 | InductionMap::iterator F = IndMapEnd; | |
403 | if (CmpReg1 != 0) { | |
404 | InductionMap::iterator F1 = IndMap.find(CmpReg1); | |
405 | if (F1 != IndMapEnd) | |
406 | F = F1; | |
407 | } | |
408 | if (CmpReg2 != 0) { | |
409 | InductionMap::iterator F2 = IndMap.find(CmpReg2); | |
410 | if (F2 != IndMapEnd) { | |
411 | if (F != IndMapEnd) | |
412 | return false; | |
413 | F = F2; | |
414 | } | |
415 | } | |
416 | if (F == IndMapEnd) | |
417 | return false; | |
418 | ||
419 | Reg = F->second.first; | |
420 | IVBump = F->second.second; | |
421 | IVOp = MRI->getVRegDef(F->first); | |
422 | return true; | |
423 | } | |
424 | ||
425 | ||
426 | /// \brief Analyze the statements in a loop to determine if the loop has | |
427 | /// a computable trip count and, if so, return a value that represents | |
428 | /// the trip count expression. | |
223e47cc | 429 | /// |
970d7e83 LB |
430 | /// This function iterates over the phi nodes in the loop to check for |
431 | /// induction variable patterns that are used in the calculation for | |
432 | /// the number of time the loop is executed. | |
433 | CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, | |
1a4d82fc | 434 | SmallVectorImpl<MachineInstr *> &OldInsts) { |
223e47cc LB |
435 | MachineBasicBlock *TopMBB = L->getTopBlock(); |
436 | MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); | |
437 | assert(PI != TopMBB->pred_end() && | |
438 | "Loop must have more than one incoming edge!"); | |
439 | MachineBasicBlock *Backedge = *PI++; | |
970d7e83 | 440 | if (PI == TopMBB->pred_end()) // dead loop? |
1a4d82fc | 441 | return nullptr; |
223e47cc | 442 | MachineBasicBlock *Incoming = *PI++; |
970d7e83 | 443 | if (PI != TopMBB->pred_end()) // multiple backedges? |
1a4d82fc | 444 | return nullptr; |
223e47cc | 445 | |
970d7e83 | 446 | // Make sure there is one incoming and one backedge and determine which |
223e47cc LB |
447 | // is which. |
448 | if (L->contains(Incoming)) { | |
449 | if (L->contains(Backedge)) | |
1a4d82fc | 450 | return nullptr; |
223e47cc LB |
451 | std::swap(Incoming, Backedge); |
452 | } else if (!L->contains(Backedge)) | |
1a4d82fc | 453 | return nullptr; |
223e47cc | 454 | |
970d7e83 LB |
455 | // Look for the cmp instruction to determine if we can get a useful trip |
456 | // count. The trip count can be either a register or an immediate. The | |
457 | // location of the value depends upon the type (reg or imm). | |
458 | MachineBasicBlock *Latch = L->getLoopLatch(); | |
459 | if (!Latch) | |
1a4d82fc | 460 | return nullptr; |
970d7e83 LB |
461 | |
462 | unsigned IVReg = 0; | |
463 | int64_t IVBump = 0; | |
464 | MachineInstr *IVOp; | |
465 | bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); | |
466 | if (!FoundIV) | |
1a4d82fc | 467 | return nullptr; |
970d7e83 LB |
468 | |
469 | MachineBasicBlock *Preheader = L->getLoopPreheader(); | |
470 | ||
1a4d82fc | 471 | MachineOperand *InitialValue = nullptr; |
970d7e83 LB |
472 | MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); |
473 | for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { | |
474 | MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); | |
475 | if (MBB == Preheader) | |
476 | InitialValue = &IV_Phi->getOperand(i); | |
477 | else if (MBB == Latch) | |
478 | IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. | |
479 | } | |
480 | if (!InitialValue) | |
1a4d82fc | 481 | return nullptr; |
970d7e83 LB |
482 | |
483 | SmallVector<MachineOperand,2> Cond; | |
1a4d82fc | 484 | MachineBasicBlock *TB = nullptr, *FB = nullptr; |
970d7e83 LB |
485 | bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); |
486 | if (NotAnalyzed) | |
1a4d82fc | 487 | return nullptr; |
970d7e83 LB |
488 | |
489 | MachineBasicBlock *Header = L->getHeader(); | |
490 | // TB must be non-null. If FB is also non-null, one of them must be | |
491 | // the header. Otherwise, branch to TB could be exiting the loop, and | |
492 | // the fall through can go to the header. | |
493 | assert (TB && "Latch block without a branch?"); | |
494 | assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); | |
495 | if (!TB || (FB && TB != Header && FB != Header)) | |
1a4d82fc | 496 | return nullptr; |
970d7e83 LB |
497 | |
498 | // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch | |
499 | // to put imm(0), followed by P in the vector Cond. | |
500 | // If TB is not the header, it means that the "not-taken" path must lead | |
501 | // to the header. | |
502 | bool Negated = (Cond.size() > 1) ^ (TB != Header); | |
503 | unsigned PredReg = Cond[Cond.size()-1].getReg(); | |
504 | MachineInstr *CondI = MRI->getVRegDef(PredReg); | |
505 | unsigned CondOpc = CondI->getOpcode(); | |
506 | ||
507 | unsigned CmpReg1 = 0, CmpReg2 = 0; | |
508 | int Mask = 0, ImmValue = 0; | |
509 | bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2, | |
510 | Mask, ImmValue); | |
511 | if (!AnalyzedCmp) | |
1a4d82fc | 512 | return nullptr; |
970d7e83 LB |
513 | |
514 | // The comparison operator type determines how we compute the loop | |
515 | // trip count. | |
516 | OldInsts.push_back(CondI); | |
517 | OldInsts.push_back(IVOp); | |
518 | ||
519 | // Sadly, the following code gets information based on the position | |
520 | // of the operands in the compare instruction. This has to be done | |
521 | // this way, because the comparisons check for a specific relationship | |
522 | // between the operands (e.g. is-less-than), rather than to find out | |
523 | // what relationship the operands are in (as on PPC). | |
524 | Comparison::Kind Cmp; | |
525 | bool isSwapped = false; | |
526 | const MachineOperand &Op1 = CondI->getOperand(1); | |
527 | const MachineOperand &Op2 = CondI->getOperand(2); | |
1a4d82fc | 528 | const MachineOperand *EndValue = nullptr; |
970d7e83 LB |
529 | |
530 | if (Op1.isReg()) { | |
531 | if (Op2.isImm() || Op1.getReg() == IVReg) | |
532 | EndValue = &Op2; | |
533 | else { | |
534 | EndValue = &Op1; | |
535 | isSwapped = true; | |
223e47cc LB |
536 | } |
537 | } | |
223e47cc | 538 | |
970d7e83 | 539 | if (!EndValue) |
1a4d82fc | 540 | return nullptr; |
970d7e83 LB |
541 | |
542 | switch (CondOpc) { | |
85aaf69f SL |
543 | case Hexagon::C2_cmpeqi: |
544 | case Hexagon::C2_cmpeq: | |
970d7e83 LB |
545 | Cmp = !Negated ? Comparison::EQ : Comparison::NE; |
546 | break; | |
85aaf69f SL |
547 | case Hexagon::C2_cmpgtui: |
548 | case Hexagon::C2_cmpgtu: | |
970d7e83 LB |
549 | Cmp = !Negated ? Comparison::GTu : Comparison::LEu; |
550 | break; | |
85aaf69f SL |
551 | case Hexagon::C2_cmpgti: |
552 | case Hexagon::C2_cmpgt: | |
970d7e83 LB |
553 | Cmp = !Negated ? Comparison::GTs : Comparison::LEs; |
554 | break; | |
555 | // Very limited support for byte/halfword compares. | |
556 | case Hexagon::CMPbEQri_V4: | |
557 | case Hexagon::CMPhEQri_V4: { | |
558 | if (IVBump != 1) | |
1a4d82fc | 559 | return nullptr; |
970d7e83 LB |
560 | |
561 | int64_t InitV, EndV; | |
562 | // Since the comparisons are "ri", the EndValue should be an | |
563 | // immediate. Check it just in case. | |
564 | assert(EndValue->isImm() && "Unrecognized latch comparison"); | |
565 | EndV = EndValue->getImm(); | |
566 | // Allow InitialValue to be a register defined with an immediate. | |
567 | if (InitialValue->isReg()) { | |
568 | if (!defWithImmediate(InitialValue->getReg())) | |
1a4d82fc | 569 | return nullptr; |
970d7e83 | 570 | InitV = getImmediate(*InitialValue); |
223e47cc | 571 | } else { |
970d7e83 LB |
572 | assert(InitialValue->isImm()); |
573 | InitV = InitialValue->getImm(); | |
574 | } | |
575 | if (InitV >= EndV) | |
1a4d82fc | 576 | return nullptr; |
970d7e83 LB |
577 | if (CondOpc == Hexagon::CMPbEQri_V4) { |
578 | if (!isInt<8>(InitV) || !isInt<8>(EndV)) | |
1a4d82fc | 579 | return nullptr; |
970d7e83 LB |
580 | } else { // Hexagon::CMPhEQri_V4 |
581 | if (!isInt<16>(InitV) || !isInt<16>(EndV)) | |
1a4d82fc | 582 | return nullptr; |
223e47cc | 583 | } |
970d7e83 LB |
584 | Cmp = !Negated ? Comparison::EQ : Comparison::NE; |
585 | break; | |
223e47cc | 586 | } |
970d7e83 | 587 | default: |
1a4d82fc | 588 | return nullptr; |
223e47cc | 589 | } |
970d7e83 LB |
590 | |
591 | if (isSwapped) | |
592 | Cmp = Comparison::getSwappedComparison(Cmp); | |
593 | ||
594 | if (InitialValue->isReg()) { | |
595 | unsigned R = InitialValue->getReg(); | |
596 | MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); | |
597 | if (!MDT->properlyDominates(DefBB, Header)) | |
1a4d82fc | 598 | return nullptr; |
970d7e83 LB |
599 | OldInsts.push_back(MRI->getVRegDef(R)); |
600 | } | |
601 | if (EndValue->isReg()) { | |
602 | unsigned R = EndValue->getReg(); | |
603 | MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); | |
604 | if (!MDT->properlyDominates(DefBB, Header)) | |
1a4d82fc | 605 | return nullptr; |
970d7e83 LB |
606 | } |
607 | ||
608 | return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); | |
223e47cc LB |
609 | } |
610 | ||
970d7e83 LB |
611 | /// \brief Helper function that returns the expression that represents the |
612 | /// number of times a loop iterates. The function takes the operands that | |
613 | /// represent the loop start value, loop end value, and induction value. | |
614 | /// Based upon these operands, the function attempts to compute the trip count. | |
615 | CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, | |
616 | const MachineOperand *Start, | |
617 | const MachineOperand *End, | |
618 | unsigned IVReg, | |
619 | int64_t IVBump, | |
620 | Comparison::Kind Cmp) const { | |
621 | // Cannot handle comparison EQ, i.e. while (A == B). | |
622 | if (Cmp == Comparison::EQ) | |
1a4d82fc | 623 | return nullptr; |
970d7e83 LB |
624 | |
625 | // Check if either the start or end values are an assignment of an immediate. | |
626 | // If so, use the immediate value rather than the register. | |
627 | if (Start->isReg()) { | |
628 | const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); | |
85aaf69f | 629 | if (StartValInstr && StartValInstr->getOpcode() == Hexagon::A2_tfrsi) |
970d7e83 LB |
630 | Start = &StartValInstr->getOperand(1); |
631 | } | |
632 | if (End->isReg()) { | |
633 | const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); | |
85aaf69f | 634 | if (EndValInstr && EndValInstr->getOpcode() == Hexagon::A2_tfrsi) |
970d7e83 LB |
635 | End = &EndValInstr->getOperand(1); |
636 | } | |
637 | ||
638 | assert (Start->isReg() || Start->isImm()); | |
639 | assert (End->isReg() || End->isImm()); | |
640 | ||
641 | bool CmpLess = Cmp & Comparison::L; | |
642 | bool CmpGreater = Cmp & Comparison::G; | |
643 | bool CmpHasEqual = Cmp & Comparison::EQ; | |
644 | ||
645 | // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. | |
646 | // If loop executes while iv is "less" with the iv value going down, then | |
647 | // the iv must wrap. | |
648 | if (CmpLess && IVBump < 0) | |
1a4d82fc | 649 | return nullptr; |
970d7e83 LB |
650 | // If loop executes while iv is "greater" with the iv value going up, then |
651 | // the iv must wrap. | |
652 | if (CmpGreater && IVBump > 0) | |
1a4d82fc | 653 | return nullptr; |
970d7e83 LB |
654 | |
655 | if (Start->isImm() && End->isImm()) { | |
656 | // Both, start and end are immediates. | |
657 | int64_t StartV = Start->getImm(); | |
658 | int64_t EndV = End->getImm(); | |
659 | int64_t Dist = EndV - StartV; | |
660 | if (Dist == 0) | |
1a4d82fc | 661 | return nullptr; |
970d7e83 LB |
662 | |
663 | bool Exact = (Dist % IVBump) == 0; | |
664 | ||
665 | if (Cmp == Comparison::NE) { | |
666 | if (!Exact) | |
1a4d82fc | 667 | return nullptr; |
970d7e83 | 668 | if ((Dist < 0) ^ (IVBump < 0)) |
1a4d82fc | 669 | return nullptr; |
970d7e83 LB |
670 | } |
671 | ||
672 | // For comparisons that include the final value (i.e. include equality | |
673 | // with the final value), we need to increase the distance by 1. | |
674 | if (CmpHasEqual) | |
675 | Dist = Dist > 0 ? Dist+1 : Dist-1; | |
676 | ||
677 | // assert (CmpLess => Dist > 0); | |
678 | assert ((!CmpLess || Dist > 0) && "Loop should never iterate!"); | |
679 | // assert (CmpGreater => Dist < 0); | |
680 | assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!"); | |
681 | ||
682 | // "Normalized" distance, i.e. with the bump set to +-1. | |
683 | int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump | |
684 | : (-Dist + (-IVBump-1)) / (-IVBump); | |
685 | assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); | |
686 | ||
687 | uint64_t Count = Dist1; | |
688 | ||
689 | if (Count > 0xFFFFFFFFULL) | |
1a4d82fc | 690 | return nullptr; |
970d7e83 LB |
691 | |
692 | return new CountValue(CountValue::CV_Immediate, Count); | |
693 | } | |
694 | ||
695 | // A general case: Start and End are some values, but the actual | |
696 | // iteration count may not be available. If it is not, insert | |
697 | // a computation of it into the preheader. | |
698 | ||
699 | // If the induction variable bump is not a power of 2, quit. | |
700 | // Othwerise we'd need a general integer division. | |
1a4d82fc JJ |
701 | if (!isPowerOf2_64(abs64(IVBump))) |
702 | return nullptr; | |
970d7e83 LB |
703 | |
704 | MachineBasicBlock *PH = Loop->getLoopPreheader(); | |
705 | assert (PH && "Should have a preheader by now"); | |
706 | MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); | |
707 | DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc() | |
708 | : DebugLoc(); | |
709 | ||
710 | // If Start is an immediate and End is a register, the trip count | |
711 | // will be "reg - imm". Hexagon's "subtract immediate" instruction | |
712 | // is actually "reg + -imm". | |
713 | ||
714 | // If the loop IV is going downwards, i.e. if the bump is negative, | |
715 | // then the iteration count (computed as End-Start) will need to be | |
716 | // negated. To avoid the negation, just swap Start and End. | |
717 | if (IVBump < 0) { | |
718 | std::swap(Start, End); | |
719 | IVBump = -IVBump; | |
720 | } | |
721 | // Cmp may now have a wrong direction, e.g. LEs may now be GEs. | |
722 | // Signedness, and "including equality" are preserved. | |
723 | ||
724 | bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) | |
725 | bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) | |
726 | ||
727 | int64_t StartV = 0, EndV = 0; | |
728 | if (Start->isImm()) | |
729 | StartV = Start->getImm(); | |
730 | if (End->isImm()) | |
731 | EndV = End->getImm(); | |
732 | ||
733 | int64_t AdjV = 0; | |
734 | // To compute the iteration count, we would need this computation: | |
735 | // Count = (End - Start + (IVBump-1)) / IVBump | |
736 | // or, when CmpHasEqual: | |
737 | // Count = (End - Start + (IVBump-1)+1) / IVBump | |
738 | // The "IVBump-1" part is the adjustment (AdjV). We can avoid | |
739 | // generating an instruction specifically to add it if we can adjust | |
740 | // the immediate values for Start or End. | |
741 | ||
742 | if (CmpHasEqual) { | |
743 | // Need to add 1 to the total iteration count. | |
744 | if (Start->isImm()) | |
745 | StartV--; | |
746 | else if (End->isImm()) | |
747 | EndV++; | |
748 | else | |
749 | AdjV += 1; | |
750 | } | |
751 | ||
752 | if (Cmp != Comparison::NE) { | |
753 | if (Start->isImm()) | |
754 | StartV -= (IVBump-1); | |
755 | else if (End->isImm()) | |
756 | EndV += (IVBump-1); | |
757 | else | |
758 | AdjV += (IVBump-1); | |
759 | } | |
760 | ||
761 | unsigned R = 0, SR = 0; | |
762 | if (Start->isReg()) { | |
763 | R = Start->getReg(); | |
764 | SR = Start->getSubReg(); | |
765 | } else { | |
766 | R = End->getReg(); | |
767 | SR = End->getSubReg(); | |
768 | } | |
769 | const TargetRegisterClass *RC = MRI->getRegClass(R); | |
770 | // Hardware loops cannot handle 64-bit registers. If it's a double | |
771 | // register, it has to have a subregister. | |
772 | if (!SR && RC == &Hexagon::DoubleRegsRegClass) | |
1a4d82fc | 773 | return nullptr; |
970d7e83 LB |
774 | const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; |
775 | ||
776 | // Compute DistR (register with the distance between Start and End). | |
777 | unsigned DistR, DistSR; | |
778 | ||
779 | // Avoid special case, where the start value is an imm(0). | |
780 | if (Start->isImm() && StartV == 0) { | |
781 | DistR = End->getReg(); | |
782 | DistSR = End->getSubReg(); | |
783 | } else { | |
85aaf69f | 784 | const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) : |
970d7e83 LB |
785 | (RegToImm ? TII->get(Hexagon::SUB_ri) : |
786 | TII->get(Hexagon::ADD_ri)); | |
787 | unsigned SubR = MRI->createVirtualRegister(IntRC); | |
788 | MachineInstrBuilder SubIB = | |
789 | BuildMI(*PH, InsertPos, DL, SubD, SubR); | |
790 | ||
791 | if (RegToReg) { | |
792 | SubIB.addReg(End->getReg(), 0, End->getSubReg()) | |
793 | .addReg(Start->getReg(), 0, Start->getSubReg()); | |
794 | } else if (RegToImm) { | |
795 | SubIB.addImm(EndV) | |
796 | .addReg(Start->getReg(), 0, Start->getSubReg()); | |
797 | } else { // ImmToReg | |
798 | SubIB.addReg(End->getReg(), 0, End->getSubReg()) | |
799 | .addImm(-StartV); | |
800 | } | |
801 | DistR = SubR; | |
802 | DistSR = 0; | |
803 | } | |
804 | ||
805 | // From DistR, compute AdjR (register with the adjusted distance). | |
806 | unsigned AdjR, AdjSR; | |
807 | ||
808 | if (AdjV == 0) { | |
809 | AdjR = DistR; | |
810 | AdjSR = DistSR; | |
811 | } else { | |
812 | // Generate CountR = ADD DistR, AdjVal | |
813 | unsigned AddR = MRI->createVirtualRegister(IntRC); | |
814 | const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri); | |
815 | BuildMI(*PH, InsertPos, DL, AddD, AddR) | |
816 | .addReg(DistR, 0, DistSR) | |
817 | .addImm(AdjV); | |
818 | ||
819 | AdjR = AddR; | |
820 | AdjSR = 0; | |
821 | } | |
822 | ||
823 | // From AdjR, compute CountR (register with the final count). | |
824 | unsigned CountR, CountSR; | |
825 | ||
826 | if (IVBump == 1) { | |
827 | CountR = AdjR; | |
828 | CountSR = AdjSR; | |
829 | } else { | |
830 | // The IV bump is a power of two. Log_2(IV bump) is the shift amount. | |
831 | unsigned Shift = Log2_32(IVBump); | |
832 | ||
833 | // Generate NormR = LSR DistR, Shift. | |
834 | unsigned LsrR = MRI->createVirtualRegister(IntRC); | |
85aaf69f | 835 | const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r); |
970d7e83 LB |
836 | BuildMI(*PH, InsertPos, DL, LsrD, LsrR) |
837 | .addReg(AdjR, 0, AdjSR) | |
838 | .addImm(Shift); | |
839 | ||
840 | CountR = LsrR; | |
841 | CountSR = 0; | |
842 | } | |
843 | ||
844 | return new CountValue(CountValue::CV_Register, CountR, CountSR); | |
223e47cc LB |
845 | } |
846 | ||
970d7e83 LB |
847 | |
848 | /// \brief Return true if the operation is invalid within hardware loop. | |
849 | bool HexagonHardwareLoops::isInvalidLoopOperation( | |
850 | const MachineInstr *MI) const { | |
223e47cc LB |
851 | |
852 | // call is not allowed because the callee may use a hardware loop | |
970d7e83 | 853 | if (MI->getDesc().isCall()) |
223e47cc | 854 | return true; |
970d7e83 | 855 | |
223e47cc | 856 | // do not allow nested hardware loops |
970d7e83 | 857 | if (isHardwareLoop(MI)) |
223e47cc | 858 | return true; |
970d7e83 | 859 | |
223e47cc LB |
860 | // check if the instruction defines a hardware loop register |
861 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |
862 | const MachineOperand &MO = MI->getOperand(i); | |
970d7e83 LB |
863 | if (!MO.isReg() || !MO.isDef()) |
864 | continue; | |
865 | unsigned R = MO.getReg(); | |
866 | if (R == Hexagon::LC0 || R == Hexagon::LC1 || | |
867 | R == Hexagon::SA0 || R == Hexagon::SA1) | |
223e47cc | 868 | return true; |
223e47cc LB |
869 | } |
870 | return false; | |
871 | } | |
872 | ||
970d7e83 LB |
873 | |
874 | /// \brief - Return true if the loop contains an instruction that inhibits | |
875 | /// the use of the hardware loop function. | |
223e47cc | 876 | bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { |
1a4d82fc | 877 | const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); |
223e47cc LB |
878 | for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { |
879 | MachineBasicBlock *MBB = Blocks[i]; | |
880 | for (MachineBasicBlock::iterator | |
881 | MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { | |
882 | const MachineInstr *MI = &*MII; | |
970d7e83 | 883 | if (isInvalidLoopOperation(MI)) |
223e47cc | 884 | return true; |
223e47cc LB |
885 | } |
886 | } | |
887 | return false; | |
888 | } | |
889 | ||
970d7e83 LB |
890 | |
891 | /// \brief Returns true if the instruction is dead. This was essentially | |
892 | /// copied from DeadMachineInstructionElim::isDead, but with special cases | |
893 | /// for inline asm, physical registers and instructions with side effects | |
894 | /// removed. | |
895 | bool HexagonHardwareLoops::isDead(const MachineInstr *MI, | |
1a4d82fc | 896 | SmallVectorImpl<MachineInstr *> &DeadPhis) const { |
970d7e83 LB |
897 | // Examine each operand. |
898 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |
899 | const MachineOperand &MO = MI->getOperand(i); | |
900 | if (!MO.isReg() || !MO.isDef()) | |
901 | continue; | |
902 | ||
903 | unsigned Reg = MO.getReg(); | |
904 | if (MRI->use_nodbg_empty(Reg)) | |
905 | continue; | |
906 | ||
907 | typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator; | |
908 | ||
909 | // This instruction has users, but if the only user is the phi node for the | |
910 | // parent block, and the only use of that phi node is this instruction, then | |
911 | // this instruction is dead: both it (and the phi node) can be removed. | |
912 | use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); | |
913 | use_nodbg_iterator End = MRI->use_nodbg_end(); | |
1a4d82fc | 914 | if (std::next(I) != End || !I->getParent()->isPHI()) |
970d7e83 LB |
915 | return false; |
916 | ||
1a4d82fc | 917 | MachineInstr *OnePhi = I->getParent(); |
970d7e83 LB |
918 | for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { |
919 | const MachineOperand &OPO = OnePhi->getOperand(j); | |
920 | if (!OPO.isReg() || !OPO.isDef()) | |
921 | continue; | |
922 | ||
923 | unsigned OPReg = OPO.getReg(); | |
924 | use_nodbg_iterator nextJ; | |
925 | for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); | |
926 | J != End; J = nextJ) { | |
1a4d82fc JJ |
927 | nextJ = std::next(J); |
928 | MachineOperand &Use = *J; | |
970d7e83 LB |
929 | MachineInstr *UseMI = Use.getParent(); |
930 | ||
931 | // If the phi node has a user that is not MI, bail... | |
932 | if (MI != UseMI) | |
933 | return false; | |
934 | } | |
935 | } | |
936 | DeadPhis.push_back(OnePhi); | |
937 | } | |
938 | ||
939 | // If there are no defs with uses, the instruction is dead. | |
940 | return true; | |
941 | } | |
942 | ||
943 | void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { | |
944 | // This procedure was essentially copied from DeadMachineInstructionElim. | |
945 | ||
946 | SmallVector<MachineInstr*, 1> DeadPhis; | |
947 | if (isDead(MI, DeadPhis)) { | |
948 | DEBUG(dbgs() << "HW looping will remove: " << *MI); | |
949 | ||
950 | // It is possible that some DBG_VALUE instructions refer to this | |
951 | // instruction. Examine each def operand for such references; | |
952 | // if found, mark the DBG_VALUE as undef (but don't delete it). | |
953 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |
954 | const MachineOperand &MO = MI->getOperand(i); | |
955 | if (!MO.isReg() || !MO.isDef()) | |
956 | continue; | |
957 | unsigned Reg = MO.getReg(); | |
958 | MachineRegisterInfo::use_iterator nextI; | |
959 | for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), | |
960 | E = MRI->use_end(); I != E; I = nextI) { | |
1a4d82fc JJ |
961 | nextI = std::next(I); // I is invalidated by the setReg |
962 | MachineOperand &Use = *I; | |
963 | MachineInstr *UseMI = I->getParent(); | |
970d7e83 LB |
964 | if (UseMI == MI) |
965 | continue; | |
966 | if (Use.isDebug()) | |
967 | UseMI->getOperand(0).setReg(0U); | |
968 | // This may also be a "instr -> phi -> instr" case which can | |
969 | // be removed too. | |
970 | } | |
971 | } | |
972 | ||
973 | MI->eraseFromParent(); | |
974 | for (unsigned i = 0; i < DeadPhis.size(); ++i) | |
975 | DeadPhis[i]->eraseFromParent(); | |
976 | } | |
977 | } | |
978 | ||
979 | /// \brief Check if the loop is a candidate for converting to a hardware | |
980 | /// loop. If so, then perform the transformation. | |
223e47cc | 981 | /// |
970d7e83 LB |
982 | /// This function works on innermost loops first. A loop can be converted |
983 | /// if it is a counting loop; either a register value or an immediate. | |
223e47cc | 984 | /// |
970d7e83 LB |
985 | /// The code makes several assumptions about the representation of the loop |
986 | /// in llvm. | |
223e47cc | 987 | bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { |
970d7e83 LB |
988 | // This is just for sanity. |
989 | assert(L->getHeader() && "Loop without a header?"); | |
990 | ||
223e47cc LB |
991 | bool Changed = false; |
992 | // Process nested loops first. | |
970d7e83 | 993 | for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) |
223e47cc | 994 | Changed |= convertToHardwareLoop(*I); |
970d7e83 | 995 | |
223e47cc | 996 | // If a nested loop has been converted, then we can't convert this loop. |
970d7e83 | 997 | if (Changed) |
223e47cc | 998 | return Changed; |
970d7e83 LB |
999 | |
1000 | #ifndef NDEBUG | |
1001 | // Stop trying after reaching the limit (if any). | |
1002 | int Limit = HWLoopLimit; | |
1003 | if (Limit >= 0) { | |
1004 | if (Counter >= HWLoopLimit) | |
1005 | return false; | |
1006 | Counter++; | |
223e47cc | 1007 | } |
970d7e83 LB |
1008 | #endif |
1009 | ||
223e47cc | 1010 | // Does the loop contain any invalid instructions? |
970d7e83 | 1011 | if (containsInvalidInstruction(L)) |
223e47cc | 1012 | return false; |
970d7e83 LB |
1013 | |
1014 | // Is the induction variable bump feeding the latch condition? | |
1015 | if (!fixupInductionVariable(L)) | |
223e47cc | 1016 | return false; |
223e47cc LB |
1017 | |
1018 | MachineBasicBlock *LastMBB = L->getExitingBlock(); | |
1019 | // Don't generate hw loop if the loop has more than one exit. | |
1a4d82fc | 1020 | if (!LastMBB) |
223e47cc | 1021 | return false; |
970d7e83 | 1022 | |
223e47cc | 1023 | MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); |
970d7e83 LB |
1024 | if (LastI == LastMBB->end()) |
1025 | return false; | |
1026 | ||
1027 | // Ensure the loop has a preheader: the loop instruction will be | |
1028 | // placed there. | |
1029 | bool NewPreheader = false; | |
1030 | MachineBasicBlock *Preheader = L->getLoopPreheader(); | |
1031 | if (!Preheader) { | |
1032 | Preheader = createPreheaderForLoop(L); | |
1033 | if (!Preheader) | |
1034 | return false; | |
1035 | NewPreheader = true; | |
1036 | } | |
1037 | MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); | |
1038 | ||
1039 | SmallVector<MachineInstr*, 2> OldInsts; | |
1040 | // Are we able to determine the trip count for the loop? | |
1041 | CountValue *TripCount = getLoopTripCount(L, OldInsts); | |
1a4d82fc | 1042 | if (!TripCount) |
970d7e83 LB |
1043 | return false; |
1044 | ||
1045 | // Is the trip count available in the preheader? | |
1046 | if (TripCount->isReg()) { | |
1047 | // There will be a use of the register inserted into the preheader, | |
1048 | // so make sure that the register is actually defined at that point. | |
1049 | MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); | |
1050 | MachineBasicBlock *BBDef = TCDef->getParent(); | |
1051 | if (!NewPreheader) { | |
1052 | if (!MDT->dominates(BBDef, Preheader)) | |
1053 | return false; | |
1054 | } else { | |
1055 | // If we have just created a preheader, the dominator tree won't be | |
1056 | // aware of it. Check if the definition of the register dominates | |
1057 | // the header, but is not the header itself. | |
1058 | if (!MDT->properlyDominates(BBDef, L->getHeader())) | |
1059 | return false; | |
1060 | } | |
1061 | } | |
223e47cc LB |
1062 | |
1063 | // Determine the loop start. | |
1064 | MachineBasicBlock *LoopStart = L->getTopBlock(); | |
1065 | if (L->getLoopLatch() != LastMBB) { | |
1066 | // When the exit and latch are not the same, use the latch block as the | |
1067 | // start. | |
970d7e83 LB |
1068 | // The loop start address is used only after the 1st iteration, and the |
1069 | // loop latch may contains instrs. that need to be executed after the | |
1070 | // first iteration. | |
223e47cc LB |
1071 | LoopStart = L->getLoopLatch(); |
1072 | // Make sure the latch is a successor of the exit, otherwise it won't work. | |
970d7e83 | 1073 | if (!LastMBB->isSuccessor(LoopStart)) |
223e47cc | 1074 | return false; |
223e47cc LB |
1075 | } |
1076 | ||
970d7e83 | 1077 | // Convert the loop to a hardware loop. |
223e47cc | 1078 | DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); |
970d7e83 LB |
1079 | DebugLoc DL; |
1080 | if (InsertPos != Preheader->end()) | |
1081 | DL = InsertPos->getDebugLoc(); | |
223e47cc LB |
1082 | |
1083 | if (TripCount->isReg()) { | |
1084 | // Create a copy of the loop count register. | |
970d7e83 LB |
1085 | unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); |
1086 | BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) | |
1087 | .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); | |
223e47cc | 1088 | // Add the Loop instruction to the beginning of the loop. |
85aaf69f | 1089 | BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r)) |
970d7e83 LB |
1090 | .addMBB(LoopStart) |
1091 | .addReg(CountReg); | |
223e47cc | 1092 | } else { |
970d7e83 LB |
1093 | assert(TripCount->isImm() && "Expecting immediate value for trip count"); |
1094 | // Add the Loop immediate instruction to the beginning of the loop, | |
1095 | // if the immediate fits in the instructions. Otherwise, we need to | |
1096 | // create a new virtual register. | |
223e47cc | 1097 | int64_t CountImm = TripCount->getImm(); |
85aaf69f | 1098 | if (!TII->isValidOffset(Hexagon::J2_loop0i, CountImm)) { |
970d7e83 | 1099 | unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); |
85aaf69f | 1100 | BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) |
970d7e83 | 1101 | .addImm(CountImm); |
85aaf69f | 1102 | BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r)) |
970d7e83 LB |
1103 | .addMBB(LoopStart).addReg(CountReg); |
1104 | } else | |
85aaf69f | 1105 | BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0i)) |
970d7e83 | 1106 | .addMBB(LoopStart).addImm(CountImm); |
223e47cc LB |
1107 | } |
1108 | ||
970d7e83 LB |
1109 | // Make sure the loop start always has a reference in the CFG. We need |
1110 | // to create a BlockAddress operand to get this mechanism to work both the | |
223e47cc LB |
1111 | // MachineBasicBlock and BasicBlock objects need the flag set. |
1112 | LoopStart->setHasAddressTaken(); | |
1113 | // This line is needed to set the hasAddressTaken flag on the BasicBlock | |
970d7e83 | 1114 | // object. |
223e47cc LB |
1115 | BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); |
1116 | ||
1117 | // Replace the loop branch with an endloop instruction. | |
970d7e83 LB |
1118 | DebugLoc LastIDL = LastI->getDebugLoc(); |
1119 | BuildMI(*LastMBB, LastI, LastIDL, | |
1120 | TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); | |
223e47cc LB |
1121 | |
1122 | // The loop ends with either: | |
1123 | // - a conditional branch followed by an unconditional branch, or | |
1124 | // - a conditional branch to the loop start. | |
85aaf69f SL |
1125 | if (LastI->getOpcode() == Hexagon::J2_jumpt || |
1126 | LastI->getOpcode() == Hexagon::J2_jumpf) { | |
970d7e83 | 1127 | // Delete one and change/add an uncond. branch to out of the loop. |
223e47cc LB |
1128 | MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); |
1129 | LastI = LastMBB->erase(LastI); | |
1130 | if (!L->contains(BranchTarget)) { | |
970d7e83 LB |
1131 | if (LastI != LastMBB->end()) |
1132 | LastI = LastMBB->erase(LastI); | |
223e47cc | 1133 | SmallVector<MachineOperand, 0> Cond; |
1a4d82fc | 1134 | TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); |
223e47cc LB |
1135 | } |
1136 | } else { | |
1137 | // Conditional branch to loop start; just delete it. | |
1138 | LastMBB->erase(LastI); | |
1139 | } | |
1140 | delete TripCount; | |
1141 | ||
970d7e83 LB |
1142 | // The induction operation and the comparison may now be |
1143 | // unneeded. If these are unneeded, then remove them. | |
1144 | for (unsigned i = 0; i < OldInsts.size(); ++i) | |
1145 | removeIfDead(OldInsts[i]); | |
1146 | ||
223e47cc LB |
1147 | ++NumHWLoops; |
1148 | return true; | |
1149 | } | |
1150 | ||
970d7e83 LB |
1151 | |
1152 | bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, | |
1153 | MachineInstr *CmpI) { | |
1154 | assert (BumpI != CmpI && "Bump and compare in the same instruction?"); | |
1155 | ||
1156 | MachineBasicBlock *BB = BumpI->getParent(); | |
1157 | if (CmpI->getParent() != BB) | |
1158 | return false; | |
1159 | ||
1160 | typedef MachineBasicBlock::instr_iterator instr_iterator; | |
1161 | // Check if things are in order to begin with. | |
1162 | for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I) | |
1163 | if (&*I == CmpI) | |
1164 | return true; | |
1165 | ||
1166 | // Out of order. | |
1167 | unsigned PredR = CmpI->getOperand(0).getReg(); | |
1168 | bool FoundBump = false; | |
1a4d82fc | 1169 | instr_iterator CmpIt = CmpI, NextIt = std::next(CmpIt); |
970d7e83 LB |
1170 | for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { |
1171 | MachineInstr *In = &*I; | |
1172 | for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { | |
1173 | MachineOperand &MO = In->getOperand(i); | |
1174 | if (MO.isReg() && MO.isUse()) { | |
1175 | if (MO.getReg() == PredR) // Found an intervening use of PredR. | |
1176 | return false; | |
1177 | } | |
1178 | } | |
1179 | ||
1180 | if (In == BumpI) { | |
1181 | instr_iterator After = BumpI; | |
1182 | instr_iterator From = CmpI; | |
1a4d82fc | 1183 | BB->splice(std::next(After), BB, From); |
970d7e83 LB |
1184 | FoundBump = true; |
1185 | break; | |
1186 | } | |
1187 | } | |
1188 | assert (FoundBump && "Cannot determine instruction order"); | |
1189 | return FoundBump; | |
223e47cc LB |
1190 | } |
1191 | ||
223e47cc | 1192 | |
970d7e83 LB |
1193 | MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) { |
1194 | MachineInstr *DI = MRI->getVRegDef(R); | |
1195 | unsigned DOpc = DI->getOpcode(); | |
1196 | switch (DOpc) { | |
85aaf69f SL |
1197 | case Hexagon::A2_tfrsi: |
1198 | case Hexagon::A2_tfrpi: | |
970d7e83 LB |
1199 | case Hexagon::CONST32_Int_Real: |
1200 | case Hexagon::CONST64_Int_Real: | |
1201 | return DI; | |
1202 | } | |
1a4d82fc | 1203 | return nullptr; |
223e47cc LB |
1204 | } |
1205 | ||
970d7e83 LB |
1206 | |
1207 | int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) { | |
1208 | if (MO.isImm()) | |
1209 | return MO.getImm(); | |
1210 | assert(MO.isReg()); | |
1211 | unsigned R = MO.getReg(); | |
1212 | MachineInstr *DI = defWithImmediate(R); | |
1213 | assert(DI && "Need an immediate operand"); | |
1214 | // All currently supported "define-with-immediate" instructions have the | |
1215 | // actual immediate value in the operand(1). | |
1216 | int64_t v = DI->getOperand(1).getImm(); | |
1217 | return v; | |
1218 | } | |
1219 | ||
1220 | ||
1221 | void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { | |
1222 | if (MO.isImm()) { | |
1223 | MO.setImm(Val); | |
1224 | return; | |
1225 | } | |
1226 | ||
1227 | assert(MO.isReg()); | |
1228 | unsigned R = MO.getReg(); | |
1229 | MachineInstr *DI = defWithImmediate(R); | |
1230 | if (MRI->hasOneNonDBGUse(R)) { | |
1231 | // If R has only one use, then just change its defining instruction to | |
1232 | // the new immediate value. | |
1233 | DI->getOperand(1).setImm(Val); | |
1234 | return; | |
1235 | } | |
1236 | ||
1237 | const TargetRegisterClass *RC = MRI->getRegClass(R); | |
1238 | unsigned NewR = MRI->createVirtualRegister(RC); | |
1239 | MachineBasicBlock &B = *DI->getParent(); | |
1240 | DebugLoc DL = DI->getDebugLoc(); | |
1241 | BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR) | |
1242 | .addImm(Val); | |
1243 | MO.setReg(NewR); | |
1244 | } | |
1245 | ||
1246 | ||
1247 | bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { | |
1248 | MachineBasicBlock *Header = L->getHeader(); | |
1249 | MachineBasicBlock *Preheader = L->getLoopPreheader(); | |
1250 | MachineBasicBlock *Latch = L->getLoopLatch(); | |
1251 | ||
1252 | if (!Header || !Preheader || !Latch) | |
1253 | return false; | |
1254 | ||
1255 | // These data structures follow the same concept as the corresponding | |
1256 | // ones in findInductionRegister (where some comments are). | |
1257 | typedef std::pair<unsigned,int64_t> RegisterBump; | |
1258 | typedef std::pair<unsigned,RegisterBump> RegisterInduction; | |
1259 | typedef std::set<RegisterInduction> RegisterInductionSet; | |
1260 | ||
1261 | // Register candidates for induction variables, with their associated bumps. | |
1262 | RegisterInductionSet IndRegs; | |
1263 | ||
1264 | // Look for induction patterns: | |
1265 | // vreg1 = PHI ..., [ latch, vreg2 ] | |
1266 | // vreg2 = ADD vreg1, imm | |
1267 | typedef MachineBasicBlock::instr_iterator instr_iterator; | |
1268 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); | |
1269 | I != E && I->isPHI(); ++I) { | |
1270 | MachineInstr *Phi = &*I; | |
1271 | ||
1272 | // Have a PHI instruction. | |
1273 | for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { | |
1274 | if (Phi->getOperand(i+1).getMBB() != Latch) | |
1275 | continue; | |
1276 | ||
1277 | unsigned PhiReg = Phi->getOperand(i).getReg(); | |
1278 | MachineInstr *DI = MRI->getVRegDef(PhiReg); | |
1279 | unsigned UpdOpc = DI->getOpcode(); | |
1280 | bool isAdd = (UpdOpc == Hexagon::ADD_ri); | |
1281 | ||
1282 | if (isAdd) { | |
1283 | // If the register operand to the add/sub is the PHI we are looking | |
1284 | // at, this meets the induction pattern. | |
1285 | unsigned IndReg = DI->getOperand(1).getReg(); | |
1286 | if (MRI->getVRegDef(IndReg) == Phi) { | |
1287 | unsigned UpdReg = DI->getOperand(0).getReg(); | |
1288 | int64_t V = DI->getOperand(2).getImm(); | |
1289 | IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); | |
223e47cc | 1290 | } |
223e47cc | 1291 | } |
970d7e83 LB |
1292 | } // for (i) |
1293 | } // for (instr) | |
1294 | ||
1295 | if (IndRegs.empty()) | |
1296 | return false; | |
1297 | ||
1a4d82fc | 1298 | MachineBasicBlock *TB = nullptr, *FB = nullptr; |
970d7e83 LB |
1299 | SmallVector<MachineOperand,2> Cond; |
1300 | // AnalyzeBranch returns true if it fails to analyze branch. | |
1301 | bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); | |
1302 | if (NotAnalyzed) | |
1303 | return false; | |
1304 | ||
1305 | // Check if the latch branch is unconditional. | |
1306 | if (Cond.empty()) | |
1307 | return false; | |
1308 | ||
1309 | if (TB != Header && FB != Header) | |
1310 | // The latch does not go back to the header. Not a latch we know and love. | |
1311 | return false; | |
1312 | ||
1313 | // Expecting a predicate register as a condition. It won't be a hardware | |
1314 | // predicate register at this point yet, just a vreg. | |
1315 | // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0) | |
1316 | // into Cond, followed by the predicate register. For non-negated branches | |
1317 | // it's just the register. | |
1318 | unsigned CSz = Cond.size(); | |
1319 | if (CSz != 1 && CSz != 2) | |
1320 | return false; | |
1321 | ||
1322 | unsigned P = Cond[CSz-1].getReg(); | |
1323 | MachineInstr *PredDef = MRI->getVRegDef(P); | |
1324 | ||
1325 | if (!PredDef->isCompare()) | |
1326 | return false; | |
1327 | ||
1328 | SmallSet<unsigned,2> CmpRegs; | |
1a4d82fc | 1329 | MachineOperand *CmpImmOp = nullptr; |
970d7e83 LB |
1330 | |
1331 | // Go over all operands to the compare and look for immediate and register | |
1332 | // operands. Assume that if the compare has a single register use and a | |
1333 | // single immediate operand, then the register is being compared with the | |
1334 | // immediate value. | |
1335 | for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { | |
1336 | MachineOperand &MO = PredDef->getOperand(i); | |
1337 | if (MO.isReg()) { | |
1338 | // Skip all implicit references. In one case there was: | |
1339 | // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use> | |
1340 | if (MO.isImplicit()) | |
1341 | continue; | |
1342 | if (MO.isUse()) { | |
1343 | unsigned R = MO.getReg(); | |
1344 | if (!defWithImmediate(R)) { | |
1345 | CmpRegs.insert(MO.getReg()); | |
1346 | continue; | |
1347 | } | |
1348 | // Consider the register to be the "immediate" operand. | |
1349 | if (CmpImmOp) | |
1350 | return false; | |
1351 | CmpImmOp = &MO; | |
1352 | } | |
1353 | } else if (MO.isImm()) { | |
1354 | if (CmpImmOp) // A second immediate argument? Confusing. Bail out. | |
1355 | return false; | |
1356 | CmpImmOp = &MO; | |
223e47cc LB |
1357 | } |
1358 | } | |
1359 | ||
970d7e83 LB |
1360 | if (CmpRegs.empty()) |
1361 | return false; | |
1362 | ||
1363 | // Check if the compared register follows the order we want. Fix if needed. | |
1364 | for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); | |
1365 | I != E; ++I) { | |
1366 | // This is a success. If the register used in the comparison is one that | |
1367 | // we have identified as a bumped (updated) induction register, there is | |
1368 | // nothing to do. | |
1369 | if (CmpRegs.count(I->first)) | |
1370 | return true; | |
1371 | ||
1372 | // Otherwise, if the register being compared comes out of a PHI node, | |
1373 | // and has been recognized as following the induction pattern, and is | |
1374 | // compared against an immediate, we can fix it. | |
1375 | const RegisterBump &RB = I->second; | |
1376 | if (CmpRegs.count(RB.first)) { | |
1377 | if (!CmpImmOp) | |
1378 | return false; | |
1379 | ||
1380 | int64_t CmpImm = getImmediate(*CmpImmOp); | |
1381 | int64_t V = RB.second; | |
1382 | if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit). | |
1383 | return false; | |
1384 | if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit). | |
1385 | return false; | |
1386 | CmpImm += V; | |
1387 | // Some forms of cmp-immediate allow u9 and s10. Assume the worst case | |
1388 | // scenario, i.e. an 8-bit value. | |
1389 | if (CmpImmOp->isImm() && !isInt<8>(CmpImm)) | |
1390 | return false; | |
1391 | ||
1392 | // Make sure that the compare happens after the bump. Otherwise, | |
1393 | // after the fixup, the compare would use a yet-undefined register. | |
1394 | MachineInstr *BumpI = MRI->getVRegDef(I->first); | |
1395 | bool Order = orderBumpCompare(BumpI, PredDef); | |
1396 | if (!Order) | |
1397 | return false; | |
1398 | ||
1399 | // Finally, fix the compare instruction. | |
1400 | setImmediate(*CmpImmOp, CmpImm); | |
1401 | for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { | |
1402 | MachineOperand &MO = PredDef->getOperand(i); | |
1403 | if (MO.isReg() && MO.getReg() == RB.first) { | |
1404 | MO.setReg(I->first); | |
1405 | return true; | |
1406 | } | |
1407 | } | |
1408 | } | |
1409 | } | |
223e47cc | 1410 | |
970d7e83 | 1411 | return false; |
223e47cc LB |
1412 | } |
1413 | ||
970d7e83 LB |
1414 | |
1415 | /// \brief Create a preheader for a given loop. | |
1416 | MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( | |
1417 | MachineLoop *L) { | |
1418 | if (MachineBasicBlock *TmpPH = L->getLoopPreheader()) | |
1419 | return TmpPH; | |
1420 | ||
1421 | MachineBasicBlock *Header = L->getHeader(); | |
1422 | MachineBasicBlock *Latch = L->getLoopLatch(); | |
1423 | MachineFunction *MF = Header->getParent(); | |
1424 | DebugLoc DL; | |
1425 | ||
1426 | if (!Latch || Header->hasAddressTaken()) | |
1a4d82fc | 1427 | return nullptr; |
970d7e83 LB |
1428 | |
1429 | typedef MachineBasicBlock::instr_iterator instr_iterator; | |
970d7e83 LB |
1430 | |
1431 | // Verify that all existing predecessors have analyzable branches | |
1432 | // (or no branches at all). | |
1433 | typedef std::vector<MachineBasicBlock*> MBBVector; | |
1434 | MBBVector Preds(Header->pred_begin(), Header->pred_end()); | |
1435 | SmallVector<MachineOperand,2> Tmp1; | |
1a4d82fc | 1436 | MachineBasicBlock *TB = nullptr, *FB = nullptr; |
970d7e83 LB |
1437 | |
1438 | if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false)) | |
1a4d82fc | 1439 | return nullptr; |
970d7e83 LB |
1440 | |
1441 | for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { | |
1442 | MachineBasicBlock *PB = *I; | |
1443 | if (PB != Latch) { | |
1444 | bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false); | |
1445 | if (NotAnalyzed) | |
1a4d82fc | 1446 | return nullptr; |
970d7e83 LB |
1447 | } |
1448 | } | |
1449 | ||
1450 | MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); | |
1451 | MF->insert(Header, NewPH); | |
1452 | ||
1453 | if (Header->pred_size() > 2) { | |
1454 | // Ensure that the header has only two predecessors: the preheader and | |
1455 | // the loop latch. Any additional predecessors of the header should | |
1456 | // join at the newly created preheader. Inspect all PHI nodes from the | |
1457 | // header and create appropriate corresponding PHI nodes in the preheader. | |
1458 | ||
1459 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); | |
1460 | I != E && I->isPHI(); ++I) { | |
1461 | MachineInstr *PN = &*I; | |
1462 | ||
1463 | const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); | |
1464 | MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); | |
1465 | NewPH->insert(NewPH->end(), NewPN); | |
1466 | ||
1467 | unsigned PR = PN->getOperand(0).getReg(); | |
1468 | const TargetRegisterClass *RC = MRI->getRegClass(PR); | |
1469 | unsigned NewPR = MRI->createVirtualRegister(RC); | |
1470 | NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); | |
1471 | ||
1472 | // Copy all non-latch operands of a header's PHI node to the newly | |
1473 | // created PHI node in the preheader. | |
1474 | for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { | |
1475 | unsigned PredR = PN->getOperand(i).getReg(); | |
1476 | MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); | |
1477 | if (PredB == Latch) | |
1478 | continue; | |
1479 | ||
1480 | NewPN->addOperand(MachineOperand::CreateReg(PredR, false)); | |
1481 | NewPN->addOperand(MachineOperand::CreateMBB(PredB)); | |
1482 | } | |
1483 | ||
1484 | // Remove copied operands from the old PHI node and add the value | |
1485 | // coming from the preheader's PHI. | |
1486 | for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { | |
1487 | MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); | |
1488 | if (PredB != Latch) { | |
1489 | PN->RemoveOperand(i+1); | |
1490 | PN->RemoveOperand(i); | |
1491 | } | |
1492 | } | |
1493 | PN->addOperand(MachineOperand::CreateReg(NewPR, false)); | |
1494 | PN->addOperand(MachineOperand::CreateMBB(NewPH)); | |
1495 | } | |
1496 | ||
223e47cc | 1497 | } else { |
970d7e83 LB |
1498 | assert(Header->pred_size() == 2); |
1499 | ||
1500 | // The header has only two predecessors, but the non-latch predecessor | |
1501 | // is not a preheader (e.g. it has other successors, etc.) | |
1502 | // In such a case we don't need any extra PHI nodes in the new preheader, | |
1503 | // all we need is to adjust existing PHIs in the header to now refer to | |
1504 | // the new preheader. | |
1505 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); | |
1506 | I != E && I->isPHI(); ++I) { | |
1507 | MachineInstr *PN = &*I; | |
1508 | for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { | |
1509 | MachineOperand &MO = PN->getOperand(i+1); | |
1510 | if (MO.getMBB() != Latch) | |
1511 | MO.setMBB(NewPH); | |
1512 | } | |
1513 | } | |
1514 | } | |
1515 | ||
1516 | // "Reroute" the CFG edges to link in the new preheader. | |
1517 | // If any of the predecessors falls through to the header, insert a branch | |
1518 | // to the new preheader in that place. | |
1519 | SmallVector<MachineOperand,1> Tmp2; | |
1520 | SmallVector<MachineOperand,1> EmptyCond; | |
1521 | ||
1a4d82fc | 1522 | TB = FB = nullptr; |
970d7e83 LB |
1523 | |
1524 | for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { | |
1525 | MachineBasicBlock *PB = *I; | |
1526 | if (PB != Latch) { | |
1527 | Tmp2.clear(); | |
1528 | bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false); | |
1a4d82fc | 1529 | (void)NotAnalyzed; // suppress compiler warning |
970d7e83 LB |
1530 | assert (!NotAnalyzed && "Should be analyzable!"); |
1531 | if (TB != Header && (Tmp2.empty() || FB != Header)) | |
1a4d82fc | 1532 | TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL); |
970d7e83 LB |
1533 | PB->ReplaceUsesOfBlockWith(Header, NewPH); |
1534 | } | |
1535 | } | |
1536 | ||
1537 | // It can happen that the latch block will fall through into the header. | |
1538 | // Insert an unconditional branch to the header. | |
1a4d82fc | 1539 | TB = FB = nullptr; |
970d7e83 | 1540 | bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false); |
1a4d82fc | 1541 | (void)LatchNotAnalyzed; // suppress compiler warning |
970d7e83 LB |
1542 | assert (!LatchNotAnalyzed && "Should be analyzable!"); |
1543 | if (!TB && !FB) | |
1a4d82fc | 1544 | TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL); |
970d7e83 LB |
1545 | |
1546 | // Finally, the branch from the preheader to the header. | |
1a4d82fc | 1547 | TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL); |
970d7e83 LB |
1548 | NewPH->addSuccessor(Header); |
1549 | ||
1550 | return NewPH; | |
223e47cc | 1551 | } |