]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===----- SchedulePostRAList.cpp - list scheduler ------------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This implements a top-down list scheduler, using standard algorithms. | |
11 | // The basic approach uses a priority queue of available nodes to schedule. | |
12 | // One at a time, nodes are taken from the priority queue (thus in priority | |
13 | // order), checked for legality to schedule, and emitted if legal. | |
14 | // | |
15 | // Nodes may not be legal to schedule either due to structural hazards (e.g. | |
16 | // pipeline or resource constraints) or because an input to the instruction has | |
17 | // not completed execution. | |
18 | // | |
19 | //===----------------------------------------------------------------------===// | |
20 | ||
970d7e83 | 21 | #include "llvm/CodeGen/Passes.h" |
223e47cc | 22 | #include "AggressiveAntiDepBreaker.h" |
970d7e83 | 23 | #include "AntiDepBreaker.h" |
223e47cc | 24 | #include "CriticalAntiDepBreaker.h" |
970d7e83 LB |
25 | #include "llvm/ADT/BitVector.h" |
26 | #include "llvm/ADT/Statistic.h" | |
27 | #include "llvm/Analysis/AliasAnalysis.h" | |
223e47cc | 28 | #include "llvm/CodeGen/LatencyPriorityQueue.h" |
223e47cc LB |
29 | #include "llvm/CodeGen/MachineDominators.h" |
30 | #include "llvm/CodeGen/MachineFrameInfo.h" | |
31 | #include "llvm/CodeGen/MachineFunctionPass.h" | |
32 | #include "llvm/CodeGen/MachineLoopInfo.h" | |
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |
34 | #include "llvm/CodeGen/RegisterClassInfo.h" | |
35 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" | |
36 | #include "llvm/CodeGen/ScheduleHazardRecognizer.h" | |
970d7e83 | 37 | #include "llvm/CodeGen/SchedulerRegistry.h" |
223e47cc LB |
38 | #include "llvm/Support/CommandLine.h" |
39 | #include "llvm/Support/Debug.h" | |
40 | #include "llvm/Support/ErrorHandling.h" | |
41 | #include "llvm/Support/raw_ostream.h" | |
970d7e83 LB |
42 | #include "llvm/Target/TargetInstrInfo.h" |
43 | #include "llvm/Target/TargetLowering.h" | |
970d7e83 LB |
44 | #include "llvm/Target/TargetRegisterInfo.h" |
45 | #include "llvm/Target/TargetSubtargetInfo.h" | |
223e47cc LB |
46 | using namespace llvm; |
47 | ||
1a4d82fc JJ |
48 | #define DEBUG_TYPE "post-RA-sched" |
49 | ||
223e47cc LB |
50 | STATISTIC(NumNoops, "Number of noops inserted"); |
51 | STATISTIC(NumStalls, "Number of pipeline stalls"); | |
52 | STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); | |
53 | ||
54 | // Post-RA scheduling is enabled with | |
55 | // TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to | |
56 | // override the target. | |
57 | static cl::opt<bool> | |
58 | EnablePostRAScheduler("post-RA-scheduler", | |
59 | cl::desc("Enable scheduling after register allocation"), | |
60 | cl::init(false), cl::Hidden); | |
61 | static cl::opt<std::string> | |
62 | EnableAntiDepBreaking("break-anti-dependencies", | |
63 | cl::desc("Break post-RA scheduling anti-dependencies: " | |
64 | "\"critical\", \"all\", or \"none\""), | |
65 | cl::init("none"), cl::Hidden); | |
66 | ||
67 | // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod | |
68 | static cl::opt<int> | |
69 | DebugDiv("postra-sched-debugdiv", | |
70 | cl::desc("Debug control MBBs that are scheduled"), | |
71 | cl::init(0), cl::Hidden); | |
72 | static cl::opt<int> | |
73 | DebugMod("postra-sched-debugmod", | |
74 | cl::desc("Debug control MBBs that are scheduled"), | |
75 | cl::init(0), cl::Hidden); | |
76 | ||
77 | AntiDepBreaker::~AntiDepBreaker() { } | |
78 | ||
79 | namespace { | |
80 | class PostRAScheduler : public MachineFunctionPass { | |
81 | const TargetInstrInfo *TII; | |
82 | RegisterClassInfo RegClassInfo; | |
83 | ||
84 | public: | |
85 | static char ID; | |
86 | PostRAScheduler() : MachineFunctionPass(ID) {} | |
87 | ||
1a4d82fc | 88 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
223e47cc LB |
89 | AU.setPreservesCFG(); |
90 | AU.addRequired<AliasAnalysis>(); | |
91 | AU.addRequired<TargetPassConfig>(); | |
92 | AU.addRequired<MachineDominatorTree>(); | |
93 | AU.addPreserved<MachineDominatorTree>(); | |
94 | AU.addRequired<MachineLoopInfo>(); | |
95 | AU.addPreserved<MachineLoopInfo>(); | |
96 | MachineFunctionPass::getAnalysisUsage(AU); | |
97 | } | |
98 | ||
1a4d82fc | 99 | bool runOnMachineFunction(MachineFunction &Fn) override; |
85aaf69f | 100 | |
1a4d82fc JJ |
101 | bool enablePostRAScheduler( |
102 | const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel, | |
103 | TargetSubtargetInfo::AntiDepBreakMode &Mode, | |
104 | TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const; | |
223e47cc LB |
105 | }; |
106 | char PostRAScheduler::ID = 0; | |
107 | ||
108 | class SchedulePostRATDList : public ScheduleDAGInstrs { | |
109 | /// AvailableQueue - The priority queue to use for the available SUnits. | |
110 | /// | |
111 | LatencyPriorityQueue AvailableQueue; | |
112 | ||
113 | /// PendingQueue - This contains all of the instructions whose operands have | |
114 | /// been issued, but their results are not ready yet (due to the latency of | |
115 | /// the operation). Once the operands becomes available, the instruction is | |
116 | /// added to the AvailableQueue. | |
117 | std::vector<SUnit*> PendingQueue; | |
118 | ||
223e47cc LB |
119 | /// HazardRec - The hazard recognizer to use. |
120 | ScheduleHazardRecognizer *HazardRec; | |
121 | ||
122 | /// AntiDepBreak - Anti-dependence breaking object, or NULL if none | |
123 | AntiDepBreaker *AntiDepBreak; | |
124 | ||
125 | /// AA - AliasAnalysis for making memory reference queries. | |
126 | AliasAnalysis *AA; | |
127 | ||
223e47cc LB |
128 | /// The schedule. Null SUnit*'s represent noop instructions. |
129 | std::vector<SUnit*> Sequence; | |
130 | ||
1a4d82fc JJ |
131 | /// The index in BB of RegionEnd. |
132 | /// | |
133 | /// This is the instruction number from the top of the current block, not | |
134 | /// the SlotIndex. It is only used by the AntiDepBreaker. | |
135 | unsigned EndIndex; | |
136 | ||
223e47cc LB |
137 | public: |
138 | SchedulePostRATDList( | |
1a4d82fc JJ |
139 | MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, |
140 | const RegisterClassInfo &, | |
141 | TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, | |
142 | SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs); | |
223e47cc LB |
143 | |
144 | ~SchedulePostRATDList(); | |
145 | ||
146 | /// startBlock - Initialize register live-range state for scheduling in | |
147 | /// this block. | |
148 | /// | |
1a4d82fc JJ |
149 | void startBlock(MachineBasicBlock *BB) override; |
150 | ||
151 | // Set the index of RegionEnd within the current BB. | |
152 | void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; } | |
223e47cc LB |
153 | |
154 | /// Initialize the scheduler state for the next scheduling region. | |
1a4d82fc JJ |
155 | void enterRegion(MachineBasicBlock *bb, |
156 | MachineBasicBlock::iterator begin, | |
157 | MachineBasicBlock::iterator end, | |
158 | unsigned regioninstrs) override; | |
223e47cc LB |
159 | |
160 | /// Notify that the scheduler has finished scheduling the current region. | |
1a4d82fc | 161 | void exitRegion() override; |
223e47cc LB |
162 | |
163 | /// Schedule - Schedule the instruction range using list scheduling. | |
164 | /// | |
1a4d82fc | 165 | void schedule() override; |
223e47cc LB |
166 | |
167 | void EmitSchedule(); | |
168 | ||
169 | /// Observe - Update liveness information to account for the current | |
170 | /// instruction, which will not be scheduled. | |
171 | /// | |
172 | void Observe(MachineInstr *MI, unsigned Count); | |
173 | ||
174 | /// finishBlock - Clean up register live-range state. | |
175 | /// | |
1a4d82fc | 176 | void finishBlock() override; |
223e47cc LB |
177 | |
178 | private: | |
179 | void ReleaseSucc(SUnit *SU, SDep *SuccEdge); | |
180 | void ReleaseSuccessors(SUnit *SU); | |
181 | void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); | |
182 | void ListScheduleTopDown(); | |
223e47cc LB |
183 | |
184 | void dumpSchedule() const; | |
1a4d82fc | 185 | void emitNoop(unsigned CurCycle); |
223e47cc LB |
186 | }; |
187 | } | |
188 | ||
189 | char &llvm::PostRASchedulerID = PostRAScheduler::ID; | |
190 | ||
191 | INITIALIZE_PASS(PostRAScheduler, "post-RA-sched", | |
192 | "Post RA top-down list latency scheduler", false, false) | |
193 | ||
194 | SchedulePostRATDList::SchedulePostRATDList( | |
1a4d82fc JJ |
195 | MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, |
196 | const RegisterClassInfo &RCI, | |
197 | TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, | |
198 | SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs) | |
199 | : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) { | |
200 | ||
1a4d82fc | 201 | const InstrItineraryData *InstrItins = |
85aaf69f | 202 | MF.getSubtarget().getInstrItineraryData(); |
223e47cc | 203 | HazardRec = |
85aaf69f | 204 | MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer( |
1a4d82fc | 205 | InstrItins, this); |
223e47cc LB |
206 | |
207 | assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || | |
208 | MRI.tracksLiveness()) && | |
209 | "Live-ins must be accurate for anti-dependency breaking"); | |
210 | AntiDepBreak = | |
211 | ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? | |
212 | (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : | |
213 | ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ? | |
1a4d82fc | 214 | (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : nullptr)); |
223e47cc LB |
215 | } |
216 | ||
217 | SchedulePostRATDList::~SchedulePostRATDList() { | |
218 | delete HazardRec; | |
219 | delete AntiDepBreak; | |
220 | } | |
221 | ||
222 | /// Initialize state associated with the next scheduling region. | |
223 | void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, | |
224 | MachineBasicBlock::iterator begin, | |
225 | MachineBasicBlock::iterator end, | |
1a4d82fc JJ |
226 | unsigned regioninstrs) { |
227 | ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); | |
223e47cc LB |
228 | Sequence.clear(); |
229 | } | |
230 | ||
231 | /// Print the schedule before exiting the region. | |
232 | void SchedulePostRATDList::exitRegion() { | |
233 | DEBUG({ | |
234 | dbgs() << "*** Final schedule ***\n"; | |
235 | dumpSchedule(); | |
236 | dbgs() << '\n'; | |
237 | }); | |
238 | ScheduleDAGInstrs::exitRegion(); | |
239 | } | |
240 | ||
241 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |
242 | /// dumpSchedule - dump the scheduled Sequence. | |
243 | void SchedulePostRATDList::dumpSchedule() const { | |
244 | for (unsigned i = 0, e = Sequence.size(); i != e; i++) { | |
245 | if (SUnit *SU = Sequence[i]) | |
246 | SU->dump(this); | |
247 | else | |
248 | dbgs() << "**** NOOP ****\n"; | |
249 | } | |
250 | } | |
251 | #endif | |
252 | ||
1a4d82fc JJ |
253 | bool PostRAScheduler::enablePostRAScheduler( |
254 | const TargetSubtargetInfo &ST, | |
255 | CodeGenOpt::Level OptLevel, | |
256 | TargetSubtargetInfo::AntiDepBreakMode &Mode, | |
257 | TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const { | |
258 | Mode = ST.getAntiDepBreakMode(); | |
259 | ST.getCriticalPathRCs(CriticalPathRCs); | |
260 | return ST.enablePostMachineScheduler() && | |
261 | OptLevel >= ST.getOptLevelToEnablePostRAScheduler(); | |
262 | } | |
263 | ||
223e47cc | 264 | bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { |
1a4d82fc JJ |
265 | if (skipOptnoneFunction(*Fn.getFunction())) |
266 | return false; | |
267 | ||
268 | TII = Fn.getSubtarget().getInstrInfo(); | |
223e47cc | 269 | MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); |
223e47cc LB |
270 | AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); |
271 | TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); | |
272 | ||
273 | RegClassInfo.runOnMachineFunction(Fn); | |
274 | ||
275 | // Check for explicit enable/disable of post-ra scheduling. | |
276 | TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = | |
277 | TargetSubtargetInfo::ANTIDEP_NONE; | |
278 | SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs; | |
279 | if (EnablePostRAScheduler.getPosition() > 0) { | |
280 | if (!EnablePostRAScheduler) | |
281 | return false; | |
282 | } else { | |
283 | // Check that post-RA scheduling is enabled for this target. | |
284 | // This may upgrade the AntiDepMode. | |
1a4d82fc JJ |
285 | const TargetSubtargetInfo &ST = |
286 | Fn.getTarget().getSubtarget<TargetSubtargetInfo>(); | |
287 | if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(), | |
288 | AntiDepMode, CriticalPathRCs)) | |
223e47cc LB |
289 | return false; |
290 | } | |
291 | ||
292 | // Check for antidep breaking override... | |
293 | if (EnableAntiDepBreaking.getPosition() > 0) { | |
294 | AntiDepMode = (EnableAntiDepBreaking == "all") | |
295 | ? TargetSubtargetInfo::ANTIDEP_ALL | |
296 | : ((EnableAntiDepBreaking == "critical") | |
297 | ? TargetSubtargetInfo::ANTIDEP_CRITICAL | |
298 | : TargetSubtargetInfo::ANTIDEP_NONE); | |
299 | } | |
300 | ||
301 | DEBUG(dbgs() << "PostRAScheduler\n"); | |
302 | ||
1a4d82fc | 303 | SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode, |
223e47cc LB |
304 | CriticalPathRCs); |
305 | ||
306 | // Loop over all of the basic blocks | |
307 | for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); | |
308 | MBB != MBBe; ++MBB) { | |
309 | #ifndef NDEBUG | |
310 | // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod | |
311 | if (DebugDiv > 0) { | |
312 | static int bbcnt = 0; | |
313 | if (bbcnt++ % DebugDiv != DebugMod) | |
314 | continue; | |
315 | dbgs() << "*** DEBUG scheduling " << Fn.getName() | |
316 | << ":BB#" << MBB->getNumber() << " ***\n"; | |
317 | } | |
318 | #endif | |
319 | ||
320 | // Initialize register live-range state for scheduling in this block. | |
321 | Scheduler.startBlock(MBB); | |
322 | ||
323 | // Schedule each sequence of instructions not interrupted by a label | |
324 | // or anything else that effectively needs to shut down scheduling. | |
325 | MachineBasicBlock::iterator Current = MBB->end(); | |
326 | unsigned Count = MBB->size(), CurrentCount = Count; | |
327 | for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { | |
1a4d82fc JJ |
328 | MachineInstr *MI = std::prev(I); |
329 | --Count; | |
223e47cc LB |
330 | // Calls are not scheduling boundaries before register allocation, but |
331 | // post-ra we don't gain anything by scheduling across calls since we | |
332 | // don't need to worry about register pressure. | |
333 | if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { | |
1a4d82fc JJ |
334 | Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count); |
335 | Scheduler.setEndIndex(CurrentCount); | |
223e47cc LB |
336 | Scheduler.schedule(); |
337 | Scheduler.exitRegion(); | |
338 | Scheduler.EmitSchedule(); | |
339 | Current = MI; | |
1a4d82fc | 340 | CurrentCount = Count; |
223e47cc LB |
341 | Scheduler.Observe(MI, CurrentCount); |
342 | } | |
343 | I = MI; | |
223e47cc LB |
344 | if (MI->isBundle()) |
345 | Count -= MI->getBundleSize(); | |
346 | } | |
347 | assert(Count == 0 && "Instruction count mismatch!"); | |
348 | assert((MBB->begin() == Current || CurrentCount != 0) && | |
349 | "Instruction count mismatch!"); | |
350 | Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); | |
1a4d82fc | 351 | Scheduler.setEndIndex(CurrentCount); |
223e47cc LB |
352 | Scheduler.schedule(); |
353 | Scheduler.exitRegion(); | |
354 | Scheduler.EmitSchedule(); | |
355 | ||
356 | // Clean up register live-range state. | |
357 | Scheduler.finishBlock(); | |
358 | ||
359 | // Update register kills | |
1a4d82fc | 360 | Scheduler.fixupKills(MBB); |
223e47cc LB |
361 | } |
362 | ||
363 | return true; | |
364 | } | |
365 | ||
366 | /// StartBlock - Initialize register live-range state for scheduling in | |
367 | /// this block. | |
368 | /// | |
369 | void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { | |
370 | // Call the superclass. | |
371 | ScheduleDAGInstrs::startBlock(BB); | |
372 | ||
373 | // Reset the hazard recognizer and anti-dep breaker. | |
374 | HazardRec->Reset(); | |
1a4d82fc | 375 | if (AntiDepBreak) |
223e47cc LB |
376 | AntiDepBreak->StartBlock(BB); |
377 | } | |
378 | ||
379 | /// Schedule - Schedule the instruction range using list scheduling. | |
380 | /// | |
381 | void SchedulePostRATDList::schedule() { | |
382 | // Build the scheduling graph. | |
383 | buildSchedGraph(AA); | |
384 | ||
1a4d82fc | 385 | if (AntiDepBreak) { |
223e47cc LB |
386 | unsigned Broken = |
387 | AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, | |
388 | EndIndex, DbgValues); | |
389 | ||
390 | if (Broken != 0) { | |
391 | // We made changes. Update the dependency graph. | |
392 | // Theoretically we could update the graph in place: | |
393 | // When a live range is changed to use a different register, remove | |
394 | // the def's anti-dependence *and* output-dependence edges due to | |
395 | // that register, and add new anti-dependence and output-dependence | |
396 | // edges based on the next live range of the register. | |
397 | ScheduleDAG::clearDAG(); | |
398 | buildSchedGraph(AA); | |
399 | ||
400 | NumFixedAnti += Broken; | |
401 | } | |
402 | } | |
403 | ||
404 | DEBUG(dbgs() << "********** List Scheduling **********\n"); | |
405 | DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) | |
406 | SUnits[su].dumpAll(this)); | |
407 | ||
408 | AvailableQueue.initNodes(SUnits); | |
409 | ListScheduleTopDown(); | |
410 | AvailableQueue.releaseState(); | |
411 | } | |
412 | ||
413 | /// Observe - Update liveness information to account for the current | |
414 | /// instruction, which will not be scheduled. | |
415 | /// | |
416 | void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { | |
1a4d82fc | 417 | if (AntiDepBreak) |
223e47cc LB |
418 | AntiDepBreak->Observe(MI, Count, EndIndex); |
419 | } | |
420 | ||
421 | /// FinishBlock - Clean up register live-range state. | |
422 | /// | |
423 | void SchedulePostRATDList::finishBlock() { | |
1a4d82fc | 424 | if (AntiDepBreak) |
223e47cc LB |
425 | AntiDepBreak->FinishBlock(); |
426 | ||
427 | // Call the superclass. | |
428 | ScheduleDAGInstrs::finishBlock(); | |
429 | } | |
430 | ||
223e47cc LB |
431 | //===----------------------------------------------------------------------===// |
432 | // Top-Down Scheduling | |
433 | //===----------------------------------------------------------------------===// | |
434 | ||
435 | /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to | |
970d7e83 | 436 | /// the PendingQueue if the count reaches zero. |
223e47cc LB |
437 | void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { |
438 | SUnit *SuccSU = SuccEdge->getSUnit(); | |
439 | ||
970d7e83 LB |
440 | if (SuccEdge->isWeak()) { |
441 | --SuccSU->WeakPredsLeft; | |
442 | return; | |
443 | } | |
223e47cc LB |
444 | #ifndef NDEBUG |
445 | if (SuccSU->NumPredsLeft == 0) { | |
446 | dbgs() << "*** Scheduling failed! ***\n"; | |
447 | SuccSU->dump(this); | |
448 | dbgs() << " has been released too many times!\n"; | |
1a4d82fc | 449 | llvm_unreachable(nullptr); |
223e47cc LB |
450 | } |
451 | #endif | |
452 | --SuccSU->NumPredsLeft; | |
453 | ||
454 | // Standard scheduler algorithms will recompute the depth of the successor | |
455 | // here as such: | |
456 | // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); | |
457 | // | |
458 | // However, we lazily compute node depth instead. Note that | |
459 | // ScheduleNodeTopDown has already updated the depth of this node which causes | |
460 | // all descendents to be marked dirty. Setting the successor depth explicitly | |
461 | // here would cause depth to be recomputed for all its ancestors. If the | |
462 | // successor is not yet ready (because of a transitively redundant edge) then | |
463 | // this causes depth computation to be quadratic in the size of the DAG. | |
464 | ||
465 | // If all the node's predecessors are scheduled, this node is ready | |
466 | // to be scheduled. Ignore the special ExitSU node. | |
467 | if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) | |
468 | PendingQueue.push_back(SuccSU); | |
469 | } | |
470 | ||
471 | /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. | |
472 | void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { | |
473 | for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | |
474 | I != E; ++I) { | |
475 | ReleaseSucc(SU, &*I); | |
476 | } | |
477 | } | |
478 | ||
479 | /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending | |
480 | /// count of its successors. If a successor pending count is zero, add it to | |
481 | /// the Available queue. | |
482 | void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { | |
483 | DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); | |
484 | DEBUG(SU->dump(this)); | |
485 | ||
486 | Sequence.push_back(SU); | |
487 | assert(CurCycle >= SU->getDepth() && | |
488 | "Node scheduled above its depth!"); | |
489 | SU->setDepthToAtLeast(CurCycle); | |
490 | ||
491 | ReleaseSuccessors(SU); | |
492 | SU->isScheduled = true; | |
493 | AvailableQueue.scheduledNode(SU); | |
494 | } | |
495 | ||
1a4d82fc JJ |
496 | /// emitNoop - Add a noop to the current instruction sequence. |
497 | void SchedulePostRATDList::emitNoop(unsigned CurCycle) { | |
498 | DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); | |
499 | HazardRec->EmitNoop(); | |
500 | Sequence.push_back(nullptr); // NULL here means noop | |
501 | ++NumNoops; | |
502 | } | |
503 | ||
223e47cc LB |
504 | /// ListScheduleTopDown - The main loop of list scheduling for top-down |
505 | /// schedulers. | |
506 | void SchedulePostRATDList::ListScheduleTopDown() { | |
507 | unsigned CurCycle = 0; | |
508 | ||
509 | // We're scheduling top-down but we're visiting the regions in | |
510 | // bottom-up order, so we don't know the hazards at the start of a | |
511 | // region. So assume no hazards (this should usually be ok as most | |
512 | // blocks are a single region). | |
513 | HazardRec->Reset(); | |
514 | ||
515 | // Release any successors of the special Entry node. | |
516 | ReleaseSuccessors(&EntrySU); | |
517 | ||
518 | // Add all leaves to Available queue. | |
519 | for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { | |
520 | // It is available if it has no predecessors. | |
970d7e83 | 521 | if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) { |
223e47cc LB |
522 | AvailableQueue.push(&SUnits[i]); |
523 | SUnits[i].isAvailable = true; | |
524 | } | |
525 | } | |
526 | ||
527 | // In any cycle where we can't schedule any instructions, we must | |
528 | // stall or emit a noop, depending on the target. | |
529 | bool CycleHasInsts = false; | |
530 | ||
531 | // While Available queue is not empty, grab the node with the highest | |
532 | // priority. If it is not ready put it back. Schedule the node. | |
533 | std::vector<SUnit*> NotReady; | |
534 | Sequence.reserve(SUnits.size()); | |
535 | while (!AvailableQueue.empty() || !PendingQueue.empty()) { | |
536 | // Check to see if any of the pending instructions are ready to issue. If | |
537 | // so, add them to the available queue. | |
538 | unsigned MinDepth = ~0u; | |
539 | for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { | |
540 | if (PendingQueue[i]->getDepth() <= CurCycle) { | |
541 | AvailableQueue.push(PendingQueue[i]); | |
542 | PendingQueue[i]->isAvailable = true; | |
543 | PendingQueue[i] = PendingQueue.back(); | |
544 | PendingQueue.pop_back(); | |
545 | --i; --e; | |
546 | } else if (PendingQueue[i]->getDepth() < MinDepth) | |
547 | MinDepth = PendingQueue[i]->getDepth(); | |
548 | } | |
549 | ||
550 | DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); | |
551 | ||
1a4d82fc | 552 | SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr; |
223e47cc LB |
553 | bool HasNoopHazards = false; |
554 | while (!AvailableQueue.empty()) { | |
555 | SUnit *CurSUnit = AvailableQueue.pop(); | |
556 | ||
557 | ScheduleHazardRecognizer::HazardType HT = | |
558 | HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); | |
559 | if (HT == ScheduleHazardRecognizer::NoHazard) { | |
1a4d82fc JJ |
560 | if (HazardRec->ShouldPreferAnother(CurSUnit)) { |
561 | if (!NotPreferredSUnit) { | |
85aaf69f SL |
562 | // If this is the first non-preferred node for this cycle, then |
563 | // record it and continue searching for a preferred node. If this | |
564 | // is not the first non-preferred node, then treat it as though | |
565 | // there had been a hazard. | |
1a4d82fc JJ |
566 | NotPreferredSUnit = CurSUnit; |
567 | continue; | |
568 | } | |
569 | } else { | |
570 | FoundSUnit = CurSUnit; | |
571 | break; | |
572 | } | |
223e47cc LB |
573 | } |
574 | ||
575 | // Remember if this is a noop hazard. | |
576 | HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; | |
577 | ||
578 | NotReady.push_back(CurSUnit); | |
579 | } | |
580 | ||
1a4d82fc JJ |
581 | // If we have a non-preferred node, push it back onto the available list. |
582 | // If we did not find a preferred node, then schedule this first | |
583 | // non-preferred node. | |
584 | if (NotPreferredSUnit) { | |
585 | if (!FoundSUnit) { | |
586 | DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n"); | |
587 | FoundSUnit = NotPreferredSUnit; | |
588 | } else { | |
589 | AvailableQueue.push(NotPreferredSUnit); | |
590 | } | |
591 | ||
592 | NotPreferredSUnit = nullptr; | |
593 | } | |
594 | ||
223e47cc LB |
595 | // Add the nodes that aren't ready back onto the available list. |
596 | if (!NotReady.empty()) { | |
597 | AvailableQueue.push_all(NotReady); | |
598 | NotReady.clear(); | |
599 | } | |
600 | ||
601 | // If we found a node to schedule... | |
602 | if (FoundSUnit) { | |
1a4d82fc JJ |
603 | // If we need to emit noops prior to this instruction, then do so. |
604 | unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit); | |
605 | for (unsigned i = 0; i != NumPreNoops; ++i) | |
606 | emitNoop(CurCycle); | |
607 | ||
223e47cc LB |
608 | // ... schedule the node... |
609 | ScheduleNodeTopDown(FoundSUnit, CurCycle); | |
610 | HazardRec->EmitInstruction(FoundSUnit); | |
611 | CycleHasInsts = true; | |
612 | if (HazardRec->atIssueLimit()) { | |
613 | DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n'); | |
614 | HazardRec->AdvanceCycle(); | |
615 | ++CurCycle; | |
616 | CycleHasInsts = false; | |
617 | } | |
618 | } else { | |
619 | if (CycleHasInsts) { | |
620 | DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); | |
621 | HazardRec->AdvanceCycle(); | |
622 | } else if (!HasNoopHazards) { | |
623 | // Otherwise, we have a pipeline stall, but no other problem, | |
624 | // just advance the current cycle and try again. | |
625 | DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); | |
626 | HazardRec->AdvanceCycle(); | |
627 | ++NumStalls; | |
628 | } else { | |
629 | // Otherwise, we have no instructions to issue and we have instructions | |
630 | // that will fault if we don't do this right. This is the case for | |
631 | // processors without pipeline interlocks and other cases. | |
1a4d82fc | 632 | emitNoop(CurCycle); |
223e47cc LB |
633 | } |
634 | ||
635 | ++CurCycle; | |
636 | CycleHasInsts = false; | |
637 | } | |
638 | } | |
639 | ||
640 | #ifndef NDEBUG | |
641 | unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false); | |
642 | unsigned Noops = 0; | |
643 | for (unsigned i = 0, e = Sequence.size(); i != e; ++i) | |
644 | if (!Sequence[i]) | |
645 | ++Noops; | |
646 | assert(Sequence.size() - Noops == ScheduledNodes && | |
647 | "The number of nodes scheduled doesn't match the expected number!"); | |
648 | #endif // NDEBUG | |
649 | } | |
650 | ||
651 | // EmitSchedule - Emit the machine code in scheduled order. | |
652 | void SchedulePostRATDList::EmitSchedule() { | |
653 | RegionBegin = RegionEnd; | |
654 | ||
655 | // If first instruction was a DBG_VALUE then put it back. | |
656 | if (FirstDbgValue) | |
657 | BB->splice(RegionEnd, BB, FirstDbgValue); | |
658 | ||
659 | // Then re-insert them according to the given schedule. | |
660 | for (unsigned i = 0, e = Sequence.size(); i != e; i++) { | |
661 | if (SUnit *SU = Sequence[i]) | |
662 | BB->splice(RegionEnd, BB, SU->getInstr()); | |
663 | else | |
664 | // Null SUnit* is a noop. | |
665 | TII->insertNoop(*BB, RegionEnd); | |
666 | ||
667 | // Update the Begin iterator, as the first instruction in the block | |
668 | // may have been scheduled later. | |
669 | if (i == 0) | |
1a4d82fc | 670 | RegionBegin = std::prev(RegionEnd); |
223e47cc LB |
671 | } |
672 | ||
673 | // Reinsert any remaining debug_values. | |
674 | for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator | |
675 | DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { | |
1a4d82fc | 676 | std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); |
223e47cc LB |
677 | MachineInstr *DbgValue = P.first; |
678 | MachineBasicBlock::iterator OrigPrivMI = P.second; | |
679 | BB->splice(++OrigPrivMI, BB, DbgValue); | |
680 | } | |
681 | DbgValues.clear(); | |
1a4d82fc | 682 | FirstDbgValue = nullptr; |
223e47cc | 683 | } |