]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file defines the RAGreedy function pass for register allocation in | |
11 | // optimized builds. | |
12 | // | |
13 | //===----------------------------------------------------------------------===// | |
14 | ||
970d7e83 | 15 | #include "llvm/CodeGen/Passes.h" |
223e47cc LB |
16 | #include "AllocationOrder.h" |
17 | #include "InterferenceCache.h" | |
18 | #include "LiveDebugVariables.h" | |
223e47cc | 19 | #include "RegAllocBase.h" |
223e47cc | 20 | #include "SpillPlacement.h" |
970d7e83 | 21 | #include "Spiller.h" |
223e47cc | 22 | #include "SplitKit.h" |
223e47cc LB |
23 | #include "llvm/ADT/Statistic.h" |
24 | #include "llvm/Analysis/AliasAnalysis.h" | |
223e47cc LB |
25 | #include "llvm/CodeGen/CalcSpillWeights.h" |
26 | #include "llvm/CodeGen/EdgeBundles.h" | |
27 | #include "llvm/CodeGen/LiveIntervalAnalysis.h" | |
28 | #include "llvm/CodeGen/LiveRangeEdit.h" | |
970d7e83 | 29 | #include "llvm/CodeGen/LiveRegMatrix.h" |
223e47cc | 30 | #include "llvm/CodeGen/LiveStackAnalysis.h" |
1a4d82fc | 31 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
223e47cc LB |
32 | #include "llvm/CodeGen/MachineDominators.h" |
33 | #include "llvm/CodeGen/MachineFunctionPass.h" | |
34 | #include "llvm/CodeGen/MachineLoopInfo.h" | |
35 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |
223e47cc | 36 | #include "llvm/CodeGen/RegAllocRegistry.h" |
1a4d82fc | 37 | #include "llvm/CodeGen/RegisterClassInfo.h" |
970d7e83 | 38 | #include "llvm/CodeGen/VirtRegMap.h" |
1a4d82fc | 39 | #include "llvm/IR/LLVMContext.h" |
970d7e83 | 40 | #include "llvm/PassAnalysisSupport.h" |
1a4d82fc | 41 | #include "llvm/Support/BranchProbability.h" |
223e47cc LB |
42 | #include "llvm/Support/CommandLine.h" |
43 | #include "llvm/Support/Debug.h" | |
44 | #include "llvm/Support/ErrorHandling.h" | |
223e47cc | 45 | #include "llvm/Support/Timer.h" |
970d7e83 | 46 | #include "llvm/Support/raw_ostream.h" |
1a4d82fc | 47 | #include "llvm/Target/TargetSubtargetInfo.h" |
223e47cc LB |
48 | #include <queue> |
49 | ||
50 | using namespace llvm; | |
51 | ||
1a4d82fc JJ |
52 | #define DEBUG_TYPE "regalloc" |
53 | ||
223e47cc LB |
54 | STATISTIC(NumGlobalSplits, "Number of split global live ranges"); |
55 | STATISTIC(NumLocalSplits, "Number of split local live ranges"); | |
56 | STATISTIC(NumEvicted, "Number of interferences evicted"); | |
57 | ||
58 | static cl::opt<SplitEditor::ComplementSpillMode> | |
59 | SplitSpillMode("split-spill-mode", cl::Hidden, | |
60 | cl::desc("Spill mode for splitting live ranges"), | |
61 | cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), | |
62 | clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), | |
63 | clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), | |
64 | clEnumValEnd), | |
65 | cl::init(SplitEditor::SM_Partition)); | |
66 | ||
1a4d82fc JJ |
67 | static cl::opt<unsigned> |
68 | LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, | |
69 | cl::desc("Last chance recoloring max depth"), | |
70 | cl::init(5)); | |
71 | ||
72 | static cl::opt<unsigned> LastChanceRecoloringMaxInterference( | |
73 | "lcr-max-interf", cl::Hidden, | |
74 | cl::desc("Last chance recoloring maximum number of considered" | |
75 | " interference at a time"), | |
76 | cl::init(8)); | |
77 | ||
78 | static cl::opt<bool> | |
79 | ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, | |
80 | cl::desc("Exhaustive Search for registers bypassing the depth " | |
81 | "and interference cutoffs of last chance recoloring")); | |
82 | ||
83 | static cl::opt<bool> EnableLocalReassignment( | |
84 | "enable-local-reassign", cl::Hidden, | |
85 | cl::desc("Local reassignment can yield better allocation decisions, but " | |
86 | "may be compile time intensive"), | |
87 | cl::init(false)); | |
88 | ||
89 | // FIXME: Find a good default for this flag and remove the flag. | |
90 | static cl::opt<unsigned> | |
91 | CSRFirstTimeCost("regalloc-csr-first-time-cost", | |
92 | cl::desc("Cost for first time use of callee-saved register."), | |
93 | cl::init(0), cl::Hidden); | |
94 | ||
223e47cc LB |
95 | static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", |
96 | createGreedyRegisterAllocator); | |
97 | ||
98 | namespace { | |
99 | class RAGreedy : public MachineFunctionPass, | |
100 | public RegAllocBase, | |
101 | private LiveRangeEdit::Delegate { | |
1a4d82fc JJ |
102 | // Convenient shortcuts. |
103 | typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue; | |
104 | typedef SmallPtrSet<LiveInterval *, 4> SmallLISet; | |
105 | typedef SmallSet<unsigned, 16> SmallVirtRegSet; | |
223e47cc LB |
106 | |
107 | // context | |
108 | MachineFunction *MF; | |
109 | ||
1a4d82fc JJ |
110 | // Shortcuts to some useful interface. |
111 | const TargetInstrInfo *TII; | |
112 | const TargetRegisterInfo *TRI; | |
113 | RegisterClassInfo RCI; | |
114 | ||
223e47cc LB |
115 | // analyses |
116 | SlotIndexes *Indexes; | |
1a4d82fc | 117 | MachineBlockFrequencyInfo *MBFI; |
223e47cc LB |
118 | MachineDominatorTree *DomTree; |
119 | MachineLoopInfo *Loops; | |
120 | EdgeBundles *Bundles; | |
121 | SpillPlacement *SpillPlacer; | |
122 | LiveDebugVariables *DebugVars; | |
123 | ||
124 | // state | |
1a4d82fc JJ |
125 | std::unique_ptr<Spiller> SpillerInstance; |
126 | PQueue Queue; | |
223e47cc LB |
127 | unsigned NextCascade; |
128 | ||
129 | // Live ranges pass through a number of stages as we try to allocate them. | |
130 | // Some of the stages may also create new live ranges: | |
131 | // | |
132 | // - Region splitting. | |
133 | // - Per-block splitting. | |
134 | // - Local splitting. | |
135 | // - Spilling. | |
136 | // | |
137 | // Ranges produced by one of the stages skip the previous stages when they are | |
138 | // dequeued. This improves performance because we can skip interference checks | |
139 | // that are unlikely to give any results. It also guarantees that the live | |
140 | // range splitting algorithm terminates, something that is otherwise hard to | |
141 | // ensure. | |
142 | enum LiveRangeStage { | |
143 | /// Newly created live range that has never been queued. | |
144 | RS_New, | |
145 | ||
146 | /// Only attempt assignment and eviction. Then requeue as RS_Split. | |
147 | RS_Assign, | |
148 | ||
149 | /// Attempt live range splitting if assignment is impossible. | |
150 | RS_Split, | |
151 | ||
152 | /// Attempt more aggressive live range splitting that is guaranteed to make | |
153 | /// progress. This is used for split products that may not be making | |
154 | /// progress. | |
155 | RS_Split2, | |
156 | ||
157 | /// Live range will be spilled. No more splitting will be attempted. | |
158 | RS_Spill, | |
159 | ||
160 | /// There is nothing more we can do to this live range. Abort compilation | |
161 | /// if it can't be assigned. | |
162 | RS_Done | |
163 | }; | |
164 | ||
1a4d82fc JJ |
165 | // Enum CutOffStage to keep a track whether the register allocation failed |
166 | // because of the cutoffs encountered in last chance recoloring. | |
167 | // Note: This is used as bitmask. New value should be next power of 2. | |
168 | enum CutOffStage { | |
169 | // No cutoffs encountered | |
170 | CO_None = 0, | |
171 | ||
172 | // lcr-max-depth cutoff encountered | |
173 | CO_Depth = 1, | |
174 | ||
175 | // lcr-max-interf cutoff encountered | |
176 | CO_Interf = 2 | |
177 | }; | |
178 | ||
179 | uint8_t CutOffInfo; | |
180 | ||
181 | #ifndef NDEBUG | |
223e47cc | 182 | static const char *const StageName[]; |
1a4d82fc | 183 | #endif |
223e47cc LB |
184 | |
185 | // RegInfo - Keep additional information about each live range. | |
186 | struct RegInfo { | |
187 | LiveRangeStage Stage; | |
188 | ||
189 | // Cascade - Eviction loop prevention. See canEvictInterference(). | |
190 | unsigned Cascade; | |
191 | ||
192 | RegInfo() : Stage(RS_New), Cascade(0) {} | |
193 | }; | |
194 | ||
195 | IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; | |
196 | ||
197 | LiveRangeStage getStage(const LiveInterval &VirtReg) const { | |
198 | return ExtraRegInfo[VirtReg.reg].Stage; | |
199 | } | |
200 | ||
201 | void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { | |
202 | ExtraRegInfo.resize(MRI->getNumVirtRegs()); | |
203 | ExtraRegInfo[VirtReg.reg].Stage = Stage; | |
204 | } | |
205 | ||
206 | template<typename Iterator> | |
207 | void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { | |
208 | ExtraRegInfo.resize(MRI->getNumVirtRegs()); | |
209 | for (;Begin != End; ++Begin) { | |
1a4d82fc | 210 | unsigned Reg = *Begin; |
223e47cc LB |
211 | if (ExtraRegInfo[Reg].Stage == RS_New) |
212 | ExtraRegInfo[Reg].Stage = NewStage; | |
213 | } | |
214 | } | |
215 | ||
216 | /// Cost of evicting interference. | |
217 | struct EvictionCost { | |
218 | unsigned BrokenHints; ///< Total number of broken hints. | |
219 | float MaxWeight; ///< Maximum spill weight evicted. | |
220 | ||
1a4d82fc JJ |
221 | EvictionCost(): BrokenHints(0), MaxWeight(0) {} |
222 | ||
223 | bool isMax() const { return BrokenHints == ~0u; } | |
224 | ||
225 | void setMax() { BrokenHints = ~0u; } | |
226 | ||
227 | void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } | |
223e47cc LB |
228 | |
229 | bool operator<(const EvictionCost &O) const { | |
1a4d82fc JJ |
230 | return std::tie(BrokenHints, MaxWeight) < |
231 | std::tie(O.BrokenHints, O.MaxWeight); | |
223e47cc LB |
232 | } |
233 | }; | |
234 | ||
235 | // splitting state. | |
1a4d82fc JJ |
236 | std::unique_ptr<SplitAnalysis> SA; |
237 | std::unique_ptr<SplitEditor> SE; | |
223e47cc LB |
238 | |
239 | /// Cached per-block interference maps | |
240 | InterferenceCache IntfCache; | |
241 | ||
242 | /// All basic blocks where the current register has uses. | |
243 | SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; | |
244 | ||
245 | /// Global live range splitting candidate info. | |
246 | struct GlobalSplitCandidate { | |
247 | // Register intended for assignment, or 0. | |
248 | unsigned PhysReg; | |
249 | ||
250 | // SplitKit interval index for this candidate. | |
251 | unsigned IntvIdx; | |
252 | ||
253 | // Interference for PhysReg. | |
254 | InterferenceCache::Cursor Intf; | |
255 | ||
256 | // Bundles where this candidate should be live. | |
257 | BitVector LiveBundles; | |
258 | SmallVector<unsigned, 8> ActiveBlocks; | |
259 | ||
260 | void reset(InterferenceCache &Cache, unsigned Reg) { | |
261 | PhysReg = Reg; | |
262 | IntvIdx = 0; | |
263 | Intf.setPhysReg(Cache, Reg); | |
264 | LiveBundles.clear(); | |
265 | ActiveBlocks.clear(); | |
266 | } | |
267 | ||
268 | // Set B[i] = C for every live bundle where B[i] was NoCand. | |
269 | unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { | |
270 | unsigned Count = 0; | |
271 | for (int i = LiveBundles.find_first(); i >= 0; | |
272 | i = LiveBundles.find_next(i)) | |
273 | if (B[i] == NoCand) { | |
274 | B[i] = C; | |
275 | Count++; | |
276 | } | |
277 | return Count; | |
278 | } | |
279 | }; | |
280 | ||
1a4d82fc | 281 | /// Candidate info for each PhysReg in AllocationOrder. |
223e47cc LB |
282 | /// This vector never shrinks, but grows to the size of the largest register |
283 | /// class. | |
284 | SmallVector<GlobalSplitCandidate, 32> GlobalCand; | |
285 | ||
1a4d82fc | 286 | enum : unsigned { NoCand = ~0u }; |
223e47cc LB |
287 | |
288 | /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to | |
289 | /// NoCand which indicates the stack interval. | |
290 | SmallVector<unsigned, 32> BundleCand; | |
291 | ||
1a4d82fc JJ |
292 | /// Callee-save register cost, calculated once per machine function. |
293 | BlockFrequency CSRCost; | |
294 | ||
295 | /// Run or not the local reassignment heuristic. This information is | |
296 | /// obtained from the TargetSubtargetInfo. | |
297 | bool EnableLocalReassign; | |
298 | ||
85aaf69f SL |
299 | /// Set of broken hints that may be reconciled later because of eviction. |
300 | SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; | |
301 | ||
223e47cc LB |
302 | public: |
303 | RAGreedy(); | |
304 | ||
305 | /// Return the pass name. | |
1a4d82fc | 306 | const char* getPassName() const override { |
223e47cc LB |
307 | return "Greedy Register Allocator"; |
308 | } | |
309 | ||
310 | /// RAGreedy analysis usage. | |
1a4d82fc JJ |
311 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
312 | void releaseMemory() override; | |
313 | Spiller &spiller() override { return *SpillerInstance; } | |
314 | void enqueue(LiveInterval *LI) override; | |
315 | LiveInterval *dequeue() override; | |
316 | unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; | |
85aaf69f | 317 | void aboutToRemoveInterval(LiveInterval &) override; |
223e47cc LB |
318 | |
319 | /// Perform register allocation. | |
1a4d82fc | 320 | bool runOnMachineFunction(MachineFunction &mf) override; |
223e47cc LB |
321 | |
322 | static char ID; | |
323 | ||
324 | private: | |
1a4d82fc JJ |
325 | unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, |
326 | SmallVirtRegSet &, unsigned = 0); | |
327 | ||
328 | bool LRE_CanEraseVirtReg(unsigned) override; | |
329 | void LRE_WillShrinkVirtReg(unsigned) override; | |
330 | void LRE_DidCloneVirtReg(unsigned, unsigned) override; | |
331 | void enqueue(PQueue &CurQueue, LiveInterval *LI); | |
332 | LiveInterval *dequeue(PQueue &CurQueue); | |
223e47cc | 333 | |
1a4d82fc JJ |
334 | BlockFrequency calcSpillCost(); |
335 | bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); | |
223e47cc LB |
336 | void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); |
337 | void growRegion(GlobalSplitCandidate &Cand); | |
1a4d82fc | 338 | BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); |
223e47cc LB |
339 | bool calcCompactRegion(GlobalSplitCandidate&); |
340 | void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); | |
341 | void calcGapWeights(unsigned, SmallVectorImpl<float>&); | |
1a4d82fc | 342 | unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); |
223e47cc LB |
343 | bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); |
344 | bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); | |
345 | void evictInterference(LiveInterval&, unsigned, | |
1a4d82fc JJ |
346 | SmallVectorImpl<unsigned>&); |
347 | bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, | |
348 | SmallLISet &RecoloringCandidates, | |
349 | const SmallVirtRegSet &FixedRegisters); | |
223e47cc LB |
350 | |
351 | unsigned tryAssign(LiveInterval&, AllocationOrder&, | |
1a4d82fc | 352 | SmallVectorImpl<unsigned>&); |
223e47cc | 353 | unsigned tryEvict(LiveInterval&, AllocationOrder&, |
1a4d82fc | 354 | SmallVectorImpl<unsigned>&, unsigned = ~0u); |
223e47cc | 355 | unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, |
1a4d82fc JJ |
356 | SmallVectorImpl<unsigned>&); |
357 | /// Calculate cost of region splitting. | |
358 | unsigned calculateRegionSplitCost(LiveInterval &VirtReg, | |
359 | AllocationOrder &Order, | |
360 | BlockFrequency &BestCost, | |
361 | unsigned &NumCands, bool IgnoreCSR); | |
362 | /// Perform region splitting. | |
363 | unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, | |
364 | bool HasCompact, | |
365 | SmallVectorImpl<unsigned> &NewVRegs); | |
366 | /// Check other options before using a callee-saved register for the first | |
367 | /// time. | |
368 | unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, | |
369 | unsigned PhysReg, unsigned &CostPerUseLimit, | |
370 | SmallVectorImpl<unsigned> &NewVRegs); | |
371 | void initializeCSRCost(); | |
223e47cc | 372 | unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, |
1a4d82fc | 373 | SmallVectorImpl<unsigned>&); |
223e47cc | 374 | unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, |
1a4d82fc | 375 | SmallVectorImpl<unsigned>&); |
223e47cc | 376 | unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, |
1a4d82fc | 377 | SmallVectorImpl<unsigned>&); |
223e47cc | 378 | unsigned trySplit(LiveInterval&, AllocationOrder&, |
1a4d82fc JJ |
379 | SmallVectorImpl<unsigned>&); |
380 | unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, | |
381 | SmallVectorImpl<unsigned> &, | |
382 | SmallVirtRegSet &, unsigned); | |
383 | bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, | |
384 | SmallVirtRegSet &, unsigned); | |
85aaf69f SL |
385 | void tryHintRecoloring(LiveInterval &); |
386 | void tryHintsRecoloring(); | |
387 | ||
388 | /// Model the information carried by one end of a copy. | |
389 | struct HintInfo { | |
390 | /// The frequency of the copy. | |
391 | BlockFrequency Freq; | |
392 | /// The virtual register or physical register. | |
393 | unsigned Reg; | |
394 | /// Its currently assigned register. | |
395 | /// In case of a physical register Reg == PhysReg. | |
396 | unsigned PhysReg; | |
397 | HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) | |
398 | : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} | |
399 | }; | |
400 | typedef SmallVector<HintInfo, 4> HintsInfo; | |
401 | BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); | |
402 | void collectHintInfo(unsigned, HintsInfo &); | |
223e47cc LB |
403 | }; |
404 | } // end anonymous namespace | |
405 | ||
406 | char RAGreedy::ID = 0; | |
407 | ||
408 | #ifndef NDEBUG | |
409 | const char *const RAGreedy::StageName[] = { | |
410 | "RS_New", | |
411 | "RS_Assign", | |
412 | "RS_Split", | |
413 | "RS_Split2", | |
414 | "RS_Spill", | |
415 | "RS_Done" | |
416 | }; | |
417 | #endif | |
418 | ||
419 | // Hysteresis to use when comparing floats. | |
420 | // This helps stabilize decisions based on float comparisons. | |
1a4d82fc | 421 | const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 |
223e47cc LB |
422 | |
423 | ||
424 | FunctionPass* llvm::createGreedyRegisterAllocator() { | |
425 | return new RAGreedy(); | |
426 | } | |
427 | ||
428 | RAGreedy::RAGreedy(): MachineFunctionPass(ID) { | |
429 | initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); | |
430 | initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); | |
431 | initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); | |
432 | initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); | |
433 | initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); | |
434 | initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); | |
223e47cc LB |
435 | initializeLiveStacksPass(*PassRegistry::getPassRegistry()); |
436 | initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); | |
437 | initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); | |
438 | initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); | |
439 | initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); | |
440 | initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); | |
441 | initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); | |
442 | } | |
443 | ||
444 | void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { | |
445 | AU.setPreservesCFG(); | |
1a4d82fc JJ |
446 | AU.addRequired<MachineBlockFrequencyInfo>(); |
447 | AU.addPreserved<MachineBlockFrequencyInfo>(); | |
223e47cc LB |
448 | AU.addRequired<AliasAnalysis>(); |
449 | AU.addPreserved<AliasAnalysis>(); | |
450 | AU.addRequired<LiveIntervals>(); | |
451 | AU.addPreserved<LiveIntervals>(); | |
452 | AU.addRequired<SlotIndexes>(); | |
453 | AU.addPreserved<SlotIndexes>(); | |
454 | AU.addRequired<LiveDebugVariables>(); | |
455 | AU.addPreserved<LiveDebugVariables>(); | |
456 | AU.addRequired<LiveStacks>(); | |
457 | AU.addPreserved<LiveStacks>(); | |
223e47cc LB |
458 | AU.addRequired<MachineDominatorTree>(); |
459 | AU.addPreserved<MachineDominatorTree>(); | |
460 | AU.addRequired<MachineLoopInfo>(); | |
461 | AU.addPreserved<MachineLoopInfo>(); | |
462 | AU.addRequired<VirtRegMap>(); | |
463 | AU.addPreserved<VirtRegMap>(); | |
464 | AU.addRequired<LiveRegMatrix>(); | |
465 | AU.addPreserved<LiveRegMatrix>(); | |
466 | AU.addRequired<EdgeBundles>(); | |
467 | AU.addRequired<SpillPlacement>(); | |
468 | MachineFunctionPass::getAnalysisUsage(AU); | |
469 | } | |
470 | ||
471 | ||
472 | //===----------------------------------------------------------------------===// | |
473 | // LiveRangeEdit delegate methods | |
474 | //===----------------------------------------------------------------------===// | |
475 | ||
476 | bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { | |
477 | if (VRM->hasPhys(VirtReg)) { | |
85aaf69f SL |
478 | LiveInterval &LI = LIS->getInterval(VirtReg); |
479 | Matrix->unassign(LI); | |
480 | aboutToRemoveInterval(LI); | |
223e47cc LB |
481 | return true; |
482 | } | |
483 | // Unassigned virtreg is probably in the priority queue. | |
484 | // RegAllocBase will erase it after dequeueing. | |
485 | return false; | |
486 | } | |
487 | ||
488 | void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { | |
489 | if (!VRM->hasPhys(VirtReg)) | |
490 | return; | |
491 | ||
492 | // Register is assigned, put it back on the queue for reassignment. | |
493 | LiveInterval &LI = LIS->getInterval(VirtReg); | |
494 | Matrix->unassign(LI); | |
495 | enqueue(&LI); | |
496 | } | |
497 | ||
498 | void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { | |
499 | // Cloning a register we haven't even heard about yet? Just ignore it. | |
500 | if (!ExtraRegInfo.inBounds(Old)) | |
501 | return; | |
502 | ||
503 | // LRE may clone a virtual register because dead code elimination causes it to | |
504 | // be split into connected components. The new components are much smaller | |
505 | // than the original, so they should get a new chance at being assigned. | |
506 | // same stage as the parent. | |
507 | ExtraRegInfo[Old].Stage = RS_Assign; | |
508 | ExtraRegInfo.grow(New); | |
509 | ExtraRegInfo[New] = ExtraRegInfo[Old]; | |
510 | } | |
511 | ||
512 | void RAGreedy::releaseMemory() { | |
1a4d82fc | 513 | SpillerInstance.reset(); |
223e47cc LB |
514 | ExtraRegInfo.clear(); |
515 | GlobalCand.clear(); | |
516 | } | |
517 | ||
1a4d82fc JJ |
518 | void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } |
519 | ||
520 | void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { | |
223e47cc LB |
521 | // Prioritize live ranges by size, assigning larger ranges first. |
522 | // The queue holds (size, reg) pairs. | |
523 | const unsigned Size = LI->getSize(); | |
524 | const unsigned Reg = LI->reg; | |
525 | assert(TargetRegisterInfo::isVirtualRegister(Reg) && | |
526 | "Can only enqueue virtual registers"); | |
527 | unsigned Prio; | |
528 | ||
529 | ExtraRegInfo.grow(Reg); | |
530 | if (ExtraRegInfo[Reg].Stage == RS_New) | |
531 | ExtraRegInfo[Reg].Stage = RS_Assign; | |
532 | ||
533 | if (ExtraRegInfo[Reg].Stage == RS_Split) { | |
534 | // Unsplit ranges that couldn't be allocated immediately are deferred until | |
535 | // everything else has been allocated. | |
536 | Prio = Size; | |
537 | } else { | |
1a4d82fc JJ |
538 | // Giant live ranges fall back to the global assignment heuristic, which |
539 | // prevents excessive spilling in pathological cases. | |
540 | bool ReverseLocal = TRI->reverseLocalAssignment(); | |
85aaf69f | 541 | bool ForceGlobal = !ReverseLocal && |
1a4d82fc JJ |
542 | (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); |
543 | ||
544 | if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && | |
545 | LIS->intervalIsInOneMBB(*LI)) { | |
546 | // Allocate original local ranges in linear instruction order. Since they | |
547 | // are singly defined, this produces optimal coloring in the absence of | |
548 | // global interference and other constraints. | |
549 | if (!ReverseLocal) | |
550 | Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); | |
551 | else { | |
552 | // Allocating bottom up may allow many short LRGs to be assigned first | |
553 | // to one of the cheap registers. This could be much faster for very | |
554 | // large blocks on targets with many physical registers. | |
555 | Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); | |
556 | } | |
557 | } | |
558 | else { | |
559 | // Allocate global and split ranges in long->short order. Long ranges that | |
560 | // don't fit should be spilled (or split) ASAP so they don't create | |
561 | // interference. Mark a bit to prioritize global above local ranges. | |
562 | Prio = (1u << 29) + Size; | |
563 | } | |
564 | // Mark a higher bit to prioritize global and local above RS_Split. | |
565 | Prio |= (1u << 31); | |
223e47cc LB |
566 | |
567 | // Boost ranges that have a physical register hint. | |
970d7e83 | 568 | if (VRM->hasKnownPreference(Reg)) |
223e47cc LB |
569 | Prio |= (1u << 30); |
570 | } | |
1a4d82fc JJ |
571 | // The virtual register number is a tie breaker for same-sized ranges. |
572 | // Give lower vreg numbers higher priority to assign them first. | |
573 | CurQueue.push(std::make_pair(Prio, ~Reg)); | |
223e47cc LB |
574 | } |
575 | ||
1a4d82fc JJ |
576 | LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } |
577 | ||
578 | LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { | |
579 | if (CurQueue.empty()) | |
580 | return nullptr; | |
581 | LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); | |
582 | CurQueue.pop(); | |
223e47cc LB |
583 | return LI; |
584 | } | |
585 | ||
586 | ||
587 | //===----------------------------------------------------------------------===// | |
588 | // Direct Assignment | |
589 | //===----------------------------------------------------------------------===// | |
590 | ||
591 | /// tryAssign - Try to assign VirtReg to an available register. | |
592 | unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, | |
593 | AllocationOrder &Order, | |
1a4d82fc | 594 | SmallVectorImpl<unsigned> &NewVRegs) { |
223e47cc LB |
595 | Order.rewind(); |
596 | unsigned PhysReg; | |
597 | while ((PhysReg = Order.next())) | |
598 | if (!Matrix->checkInterference(VirtReg, PhysReg)) | |
599 | break; | |
970d7e83 | 600 | if (!PhysReg || Order.isHint()) |
223e47cc LB |
601 | return PhysReg; |
602 | ||
603 | // PhysReg is available, but there may be a better choice. | |
604 | ||
605 | // If we missed a simple hint, try to cheaply evict interference from the | |
606 | // preferred register. | |
607 | if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) | |
608 | if (Order.isHint(Hint)) { | |
609 | DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); | |
1a4d82fc JJ |
610 | EvictionCost MaxCost; |
611 | MaxCost.setBrokenHints(1); | |
223e47cc LB |
612 | if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { |
613 | evictInterference(VirtReg, Hint, NewVRegs); | |
614 | return Hint; | |
615 | } | |
616 | } | |
617 | ||
618 | // Try to evict interference from a cheaper alternative. | |
619 | unsigned Cost = TRI->getCostPerUse(PhysReg); | |
620 | ||
621 | // Most registers have 0 additional cost. | |
622 | if (!Cost) | |
623 | return PhysReg; | |
624 | ||
625 | DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost | |
626 | << '\n'); | |
627 | unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); | |
628 | return CheapReg ? CheapReg : PhysReg; | |
629 | } | |
630 | ||
631 | ||
632 | //===----------------------------------------------------------------------===// | |
633 | // Interference eviction | |
634 | //===----------------------------------------------------------------------===// | |
635 | ||
1a4d82fc JJ |
636 | unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { |
637 | AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); | |
638 | unsigned PhysReg; | |
639 | while ((PhysReg = Order.next())) { | |
640 | if (PhysReg == PrevReg) | |
641 | continue; | |
642 | ||
643 | MCRegUnitIterator Units(PhysReg, TRI); | |
644 | for (; Units.isValid(); ++Units) { | |
645 | // Instantiate a "subquery", not to be confused with the Queries array. | |
646 | LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); | |
647 | if (subQ.checkInterference()) | |
648 | break; | |
649 | } | |
650 | // If no units have interference, break out with the current PhysReg. | |
651 | if (!Units.isValid()) | |
652 | break; | |
653 | } | |
654 | if (PhysReg) | |
655 | DEBUG(dbgs() << "can reassign: " << VirtReg << " from " | |
656 | << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) | |
657 | << '\n'); | |
658 | return PhysReg; | |
659 | } | |
660 | ||
223e47cc LB |
661 | /// shouldEvict - determine if A should evict the assigned live range B. The |
662 | /// eviction policy defined by this function together with the allocation order | |
663 | /// defined by enqueue() decides which registers ultimately end up being split | |
664 | /// and spilled. | |
665 | /// | |
666 | /// Cascade numbers are used to prevent infinite loops if this function is a | |
667 | /// cyclic relation. | |
668 | /// | |
669 | /// @param A The live range to be assigned. | |
670 | /// @param IsHint True when A is about to be assigned to its preferred | |
671 | /// register. | |
672 | /// @param B The live range to be evicted. | |
673 | /// @param BreaksHint True when B is already assigned to its preferred register. | |
674 | bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, | |
675 | LiveInterval &B, bool BreaksHint) { | |
676 | bool CanSplit = getStage(B) < RS_Spill; | |
677 | ||
678 | // Be fairly aggressive about following hints as long as the evictee can be | |
679 | // split. | |
680 | if (CanSplit && IsHint && !BreaksHint) | |
681 | return true; | |
682 | ||
1a4d82fc JJ |
683 | if (A.weight > B.weight) { |
684 | DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); | |
685 | return true; | |
686 | } | |
687 | return false; | |
223e47cc LB |
688 | } |
689 | ||
690 | /// canEvictInterference - Return true if all interferences between VirtReg and | |
1a4d82fc | 691 | /// PhysReg can be evicted. |
223e47cc LB |
692 | /// |
693 | /// @param VirtReg Live range that is about to be assigned. | |
694 | /// @param PhysReg Desired register for assignment. | |
695 | /// @param IsHint True when PhysReg is VirtReg's preferred register. | |
696 | /// @param MaxCost Only look for cheaper candidates and update with new cost | |
697 | /// when returning true. | |
698 | /// @returns True when interference can be evicted cheaper than MaxCost. | |
699 | bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, | |
700 | bool IsHint, EvictionCost &MaxCost) { | |
701 | // It is only possible to evict virtual register interference. | |
702 | if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) | |
703 | return false; | |
704 | ||
1a4d82fc JJ |
705 | bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); |
706 | ||
223e47cc LB |
707 | // Find VirtReg's cascade number. This will be unassigned if VirtReg was never |
708 | // involved in an eviction before. If a cascade number was assigned, deny | |
709 | // evicting anything with the same or a newer cascade number. This prevents | |
710 | // infinite eviction loops. | |
711 | // | |
712 | // This works out so a register without a cascade number is allowed to evict | |
713 | // anything, and it can be evicted by anything. | |
714 | unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; | |
715 | if (!Cascade) | |
716 | Cascade = NextCascade; | |
717 | ||
718 | EvictionCost Cost; | |
719 | for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { | |
720 | LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); | |
721 | // If there is 10 or more interferences, chances are one is heavier. | |
722 | if (Q.collectInterferingVRegs(10) >= 10) | |
723 | return false; | |
724 | ||
725 | // Check if any interfering live range is heavier than MaxWeight. | |
726 | for (unsigned i = Q.interferingVRegs().size(); i; --i) { | |
727 | LiveInterval *Intf = Q.interferingVRegs()[i - 1]; | |
728 | assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && | |
729 | "Only expecting virtual register interference from query"); | |
730 | // Never evict spill products. They cannot split or spill. | |
731 | if (getStage(*Intf) == RS_Done) | |
732 | return false; | |
733 | // Once a live range becomes small enough, it is urgent that we find a | |
734 | // register for it. This is indicated by an infinite spill weight. These | |
735 | // urgent live ranges get to evict almost anything. | |
736 | // | |
737 | // Also allow urgent evictions of unspillable ranges from a strictly | |
738 | // larger allocation order. | |
739 | bool Urgent = !VirtReg.isSpillable() && | |
740 | (Intf->isSpillable() || | |
741 | RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < | |
742 | RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); | |
743 | // Only evict older cascades or live ranges without a cascade. | |
744 | unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; | |
745 | if (Cascade <= IntfCascade) { | |
746 | if (!Urgent) | |
747 | return false; | |
748 | // We permit breaking cascades for urgent evictions. It should be the | |
749 | // last resort, though, so make it really expensive. | |
750 | Cost.BrokenHints += 10; | |
751 | } | |
752 | // Would this break a satisfied hint? | |
753 | bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); | |
754 | // Update eviction cost. | |
755 | Cost.BrokenHints += BreaksHint; | |
756 | Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); | |
757 | // Abort if this would be too expensive. | |
758 | if (!(Cost < MaxCost)) | |
759 | return false; | |
1a4d82fc JJ |
760 | if (Urgent) |
761 | continue; | |
762 | // Apply the eviction policy for non-urgent evictions. | |
763 | if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) | |
764 | return false; | |
765 | // If !MaxCost.isMax(), then we're just looking for a cheap register. | |
766 | // Evicting another local live range in this case could lead to suboptimal | |
767 | // coloring. | |
768 | if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && | |
769 | (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { | |
223e47cc | 770 | return false; |
1a4d82fc | 771 | } |
223e47cc LB |
772 | } |
773 | } | |
774 | MaxCost = Cost; | |
775 | return true; | |
776 | } | |
777 | ||
778 | /// evictInterference - Evict any interferring registers that prevent VirtReg | |
779 | /// from being assigned to Physreg. This assumes that canEvictInterference | |
780 | /// returned true. | |
781 | void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, | |
1a4d82fc | 782 | SmallVectorImpl<unsigned> &NewVRegs) { |
223e47cc LB |
783 | // Make sure that VirtReg has a cascade number, and assign that cascade |
784 | // number to every evicted register. These live ranges than then only be | |
785 | // evicted by a newer cascade, preventing infinite loops. | |
786 | unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; | |
787 | if (!Cascade) | |
788 | Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; | |
789 | ||
790 | DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) | |
791 | << " interference: Cascade " << Cascade << '\n'); | |
792 | ||
793 | // Collect all interfering virtregs first. | |
794 | SmallVector<LiveInterval*, 8> Intfs; | |
795 | for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { | |
796 | LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); | |
797 | assert(Q.seenAllInterferences() && "Didn't check all interfererences."); | |
798 | ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); | |
799 | Intfs.append(IVR.begin(), IVR.end()); | |
800 | } | |
801 | ||
802 | // Evict them second. This will invalidate the queries. | |
803 | for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { | |
804 | LiveInterval *Intf = Intfs[i]; | |
805 | // The same VirtReg may be present in multiple RegUnits. Skip duplicates. | |
806 | if (!VRM->hasPhys(Intf->reg)) | |
807 | continue; | |
808 | Matrix->unassign(*Intf); | |
809 | assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || | |
810 | VirtReg.isSpillable() < Intf->isSpillable()) && | |
811 | "Cannot decrease cascade number, illegal eviction"); | |
812 | ExtraRegInfo[Intf->reg].Cascade = Cascade; | |
813 | ++NumEvicted; | |
1a4d82fc | 814 | NewVRegs.push_back(Intf->reg); |
223e47cc LB |
815 | } |
816 | } | |
817 | ||
818 | /// tryEvict - Try to evict all interferences for a physreg. | |
819 | /// @param VirtReg Currently unassigned virtual register. | |
820 | /// @param Order Physregs to try. | |
821 | /// @return Physreg to assign VirtReg, or 0. | |
822 | unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, | |
823 | AllocationOrder &Order, | |
1a4d82fc | 824 | SmallVectorImpl<unsigned> &NewVRegs, |
223e47cc LB |
825 | unsigned CostPerUseLimit) { |
826 | NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); | |
827 | ||
828 | // Keep track of the cheapest interference seen so far. | |
1a4d82fc JJ |
829 | EvictionCost BestCost; |
830 | BestCost.setMax(); | |
223e47cc | 831 | unsigned BestPhys = 0; |
970d7e83 | 832 | unsigned OrderLimit = Order.getOrder().size(); |
223e47cc LB |
833 | |
834 | // When we are just looking for a reduced cost per use, don't break any | |
835 | // hints, and only evict smaller spill weights. | |
836 | if (CostPerUseLimit < ~0u) { | |
837 | BestCost.BrokenHints = 0; | |
838 | BestCost.MaxWeight = VirtReg.weight; | |
970d7e83 LB |
839 | |
840 | // Check of any registers in RC are below CostPerUseLimit. | |
841 | const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); | |
842 | unsigned MinCost = RegClassInfo.getMinCost(RC); | |
843 | if (MinCost >= CostPerUseLimit) { | |
85aaf69f | 844 | DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost |
970d7e83 LB |
845 | << ", no cheaper registers to be found.\n"); |
846 | return 0; | |
847 | } | |
848 | ||
849 | // It is normal for register classes to have a long tail of registers with | |
850 | // the same cost. We don't need to look at them if they're too expensive. | |
851 | if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { | |
852 | OrderLimit = RegClassInfo.getLastCostChange(RC); | |
853 | DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); | |
854 | } | |
223e47cc LB |
855 | } |
856 | ||
857 | Order.rewind(); | |
1a4d82fc | 858 | while (unsigned PhysReg = Order.next(OrderLimit)) { |
223e47cc LB |
859 | if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) |
860 | continue; | |
861 | // The first use of a callee-saved register in a function has cost 1. | |
862 | // Don't start using a CSR when the CostPerUseLimit is low. | |
863 | if (CostPerUseLimit == 1) | |
864 | if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) | |
865 | if (!MRI->isPhysRegUsed(CSR)) { | |
866 | DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " | |
867 | << PrintReg(CSR, TRI) << '\n'); | |
868 | continue; | |
869 | } | |
870 | ||
871 | if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) | |
872 | continue; | |
873 | ||
874 | // Best so far. | |
875 | BestPhys = PhysReg; | |
876 | ||
877 | // Stop if the hint can be used. | |
970d7e83 | 878 | if (Order.isHint()) |
223e47cc LB |
879 | break; |
880 | } | |
881 | ||
882 | if (!BestPhys) | |
883 | return 0; | |
884 | ||
885 | evictInterference(VirtReg, BestPhys, NewVRegs); | |
886 | return BestPhys; | |
887 | } | |
888 | ||
889 | ||
890 | //===----------------------------------------------------------------------===// | |
891 | // Region Splitting | |
892 | //===----------------------------------------------------------------------===// | |
893 | ||
894 | /// addSplitConstraints - Fill out the SplitConstraints vector based on the | |
895 | /// interference pattern in Physreg and its aliases. Add the constraints to | |
896 | /// SpillPlacement and return the static cost of this split in Cost, assuming | |
897 | /// that all preferences in SplitConstraints are met. | |
898 | /// Return false if there are no bundles with positive bias. | |
899 | bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, | |
1a4d82fc | 900 | BlockFrequency &Cost) { |
223e47cc LB |
901 | ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); |
902 | ||
903 | // Reset interference dependent info. | |
904 | SplitConstraints.resize(UseBlocks.size()); | |
1a4d82fc | 905 | BlockFrequency StaticCost = 0; |
223e47cc LB |
906 | for (unsigned i = 0; i != UseBlocks.size(); ++i) { |
907 | const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; | |
908 | SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; | |
909 | ||
910 | BC.Number = BI.MBB->getNumber(); | |
911 | Intf.moveToBlock(BC.Number); | |
912 | BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; | |
913 | BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; | |
1a4d82fc | 914 | BC.ChangesValue = BI.FirstDef.isValid(); |
223e47cc LB |
915 | |
916 | if (!Intf.hasInterference()) | |
917 | continue; | |
918 | ||
919 | // Number of spill code instructions to insert. | |
920 | unsigned Ins = 0; | |
921 | ||
922 | // Interference for the live-in value. | |
923 | if (BI.LiveIn) { | |
924 | if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) | |
925 | BC.Entry = SpillPlacement::MustSpill, ++Ins; | |
926 | else if (Intf.first() < BI.FirstInstr) | |
927 | BC.Entry = SpillPlacement::PrefSpill, ++Ins; | |
928 | else if (Intf.first() < BI.LastInstr) | |
929 | ++Ins; | |
930 | } | |
931 | ||
932 | // Interference for the live-out value. | |
933 | if (BI.LiveOut) { | |
934 | if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) | |
935 | BC.Exit = SpillPlacement::MustSpill, ++Ins; | |
936 | else if (Intf.last() > BI.LastInstr) | |
937 | BC.Exit = SpillPlacement::PrefSpill, ++Ins; | |
938 | else if (Intf.last() > BI.FirstInstr) | |
939 | ++Ins; | |
940 | } | |
941 | ||
942 | // Accumulate the total frequency of inserted spill code. | |
1a4d82fc JJ |
943 | while (Ins--) |
944 | StaticCost += SpillPlacer->getBlockFrequency(BC.Number); | |
223e47cc LB |
945 | } |
946 | Cost = StaticCost; | |
947 | ||
948 | // Add constraints for use-blocks. Note that these are the only constraints | |
949 | // that may add a positive bias, it is downhill from here. | |
950 | SpillPlacer->addConstraints(SplitConstraints); | |
951 | return SpillPlacer->scanActiveBundles(); | |
952 | } | |
953 | ||
954 | ||
955 | /// addThroughConstraints - Add constraints and links to SpillPlacer from the | |
956 | /// live-through blocks in Blocks. | |
957 | void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, | |
958 | ArrayRef<unsigned> Blocks) { | |
959 | const unsigned GroupSize = 8; | |
960 | SpillPlacement::BlockConstraint BCS[GroupSize]; | |
961 | unsigned TBS[GroupSize]; | |
962 | unsigned B = 0, T = 0; | |
963 | ||
964 | for (unsigned i = 0; i != Blocks.size(); ++i) { | |
965 | unsigned Number = Blocks[i]; | |
966 | Intf.moveToBlock(Number); | |
967 | ||
968 | if (!Intf.hasInterference()) { | |
969 | assert(T < GroupSize && "Array overflow"); | |
970 | TBS[T] = Number; | |
971 | if (++T == GroupSize) { | |
972 | SpillPlacer->addLinks(makeArrayRef(TBS, T)); | |
973 | T = 0; | |
974 | } | |
975 | continue; | |
976 | } | |
977 | ||
978 | assert(B < GroupSize && "Array overflow"); | |
979 | BCS[B].Number = Number; | |
980 | ||
981 | // Interference for the live-in value. | |
982 | if (Intf.first() <= Indexes->getMBBStartIdx(Number)) | |
983 | BCS[B].Entry = SpillPlacement::MustSpill; | |
984 | else | |
985 | BCS[B].Entry = SpillPlacement::PrefSpill; | |
986 | ||
987 | // Interference for the live-out value. | |
988 | if (Intf.last() >= SA->getLastSplitPoint(Number)) | |
989 | BCS[B].Exit = SpillPlacement::MustSpill; | |
990 | else | |
991 | BCS[B].Exit = SpillPlacement::PrefSpill; | |
992 | ||
993 | if (++B == GroupSize) { | |
1a4d82fc | 994 | SpillPlacer->addConstraints(makeArrayRef(BCS, B)); |
223e47cc LB |
995 | B = 0; |
996 | } | |
997 | } | |
998 | ||
1a4d82fc | 999 | SpillPlacer->addConstraints(makeArrayRef(BCS, B)); |
223e47cc LB |
1000 | SpillPlacer->addLinks(makeArrayRef(TBS, T)); |
1001 | } | |
1002 | ||
1003 | void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { | |
1004 | // Keep track of through blocks that have not been added to SpillPlacer. | |
1005 | BitVector Todo = SA->getThroughBlocks(); | |
1006 | SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; | |
1007 | unsigned AddedTo = 0; | |
1008 | #ifndef NDEBUG | |
1009 | unsigned Visited = 0; | |
1010 | #endif | |
1011 | ||
1012 | for (;;) { | |
1013 | ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); | |
1014 | // Find new through blocks in the periphery of PrefRegBundles. | |
1015 | for (int i = 0, e = NewBundles.size(); i != e; ++i) { | |
1016 | unsigned Bundle = NewBundles[i]; | |
1017 | // Look at all blocks connected to Bundle in the full graph. | |
1018 | ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); | |
1019 | for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); | |
1020 | I != E; ++I) { | |
1021 | unsigned Block = *I; | |
1022 | if (!Todo.test(Block)) | |
1023 | continue; | |
1024 | Todo.reset(Block); | |
1025 | // This is a new through block. Add it to SpillPlacer later. | |
1026 | ActiveBlocks.push_back(Block); | |
1027 | #ifndef NDEBUG | |
1028 | ++Visited; | |
1029 | #endif | |
1030 | } | |
1031 | } | |
1032 | // Any new blocks to add? | |
1033 | if (ActiveBlocks.size() == AddedTo) | |
1034 | break; | |
1035 | ||
1036 | // Compute through constraints from the interference, or assume that all | |
1037 | // through blocks prefer spilling when forming compact regions. | |
1a4d82fc | 1038 | auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); |
223e47cc LB |
1039 | if (Cand.PhysReg) |
1040 | addThroughConstraints(Cand.Intf, NewBlocks); | |
1041 | else | |
1042 | // Provide a strong negative bias on through blocks to prevent unwanted | |
1043 | // liveness on loop backedges. | |
1044 | SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); | |
1045 | AddedTo = ActiveBlocks.size(); | |
1046 | ||
1047 | // Perhaps iterating can enable more bundles? | |
1048 | SpillPlacer->iterate(); | |
1049 | } | |
1050 | DEBUG(dbgs() << ", v=" << Visited); | |
1051 | } | |
1052 | ||
1053 | /// calcCompactRegion - Compute the set of edge bundles that should be live | |
1054 | /// when splitting the current live range into compact regions. Compact | |
1055 | /// regions can be computed without looking at interference. They are the | |
1056 | /// regions formed by removing all the live-through blocks from the live range. | |
1057 | /// | |
1058 | /// Returns false if the current live range is already compact, or if the | |
1059 | /// compact regions would form single block regions anyway. | |
1060 | bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { | |
1061 | // Without any through blocks, the live range is already compact. | |
1062 | if (!SA->getNumThroughBlocks()) | |
1063 | return false; | |
1064 | ||
1065 | // Compact regions don't correspond to any physreg. | |
1066 | Cand.reset(IntfCache, 0); | |
1067 | ||
1068 | DEBUG(dbgs() << "Compact region bundles"); | |
1069 | ||
1070 | // Use the spill placer to determine the live bundles. GrowRegion pretends | |
1071 | // that all the through blocks have interference when PhysReg is unset. | |
1072 | SpillPlacer->prepare(Cand.LiveBundles); | |
1073 | ||
1074 | // The static split cost will be zero since Cand.Intf reports no interference. | |
1a4d82fc | 1075 | BlockFrequency Cost; |
223e47cc LB |
1076 | if (!addSplitConstraints(Cand.Intf, Cost)) { |
1077 | DEBUG(dbgs() << ", none.\n"); | |
1078 | return false; | |
1079 | } | |
1080 | ||
1081 | growRegion(Cand); | |
1082 | SpillPlacer->finish(); | |
1083 | ||
1084 | if (!Cand.LiveBundles.any()) { | |
1085 | DEBUG(dbgs() << ", none.\n"); | |
1086 | return false; | |
1087 | } | |
1088 | ||
1089 | DEBUG({ | |
1090 | for (int i = Cand.LiveBundles.find_first(); i>=0; | |
1091 | i = Cand.LiveBundles.find_next(i)) | |
1092 | dbgs() << " EB#" << i; | |
1093 | dbgs() << ".\n"; | |
1094 | }); | |
1095 | return true; | |
1096 | } | |
1097 | ||
1098 | /// calcSpillCost - Compute how expensive it would be to split the live range in | |
1099 | /// SA around all use blocks instead of forming bundle regions. | |
1a4d82fc JJ |
1100 | BlockFrequency RAGreedy::calcSpillCost() { |
1101 | BlockFrequency Cost = 0; | |
223e47cc LB |
1102 | ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); |
1103 | for (unsigned i = 0; i != UseBlocks.size(); ++i) { | |
1104 | const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; | |
1105 | unsigned Number = BI.MBB->getNumber(); | |
1106 | // We normally only need one spill instruction - a load or a store. | |
1107 | Cost += SpillPlacer->getBlockFrequency(Number); | |
1108 | ||
1109 | // Unless the value is redefined in the block. | |
1110 | if (BI.LiveIn && BI.LiveOut && BI.FirstDef) | |
1111 | Cost += SpillPlacer->getBlockFrequency(Number); | |
1112 | } | |
1113 | return Cost; | |
1114 | } | |
1115 | ||
1116 | /// calcGlobalSplitCost - Return the global split cost of following the split | |
1117 | /// pattern in LiveBundles. This cost should be added to the local cost of the | |
1118 | /// interference pattern in SplitConstraints. | |
1119 | /// | |
1a4d82fc JJ |
1120 | BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { |
1121 | BlockFrequency GlobalCost = 0; | |
223e47cc LB |
1122 | const BitVector &LiveBundles = Cand.LiveBundles; |
1123 | ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); | |
1124 | for (unsigned i = 0; i != UseBlocks.size(); ++i) { | |
1125 | const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; | |
1126 | SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; | |
1127 | bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; | |
1128 | bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; | |
1129 | unsigned Ins = 0; | |
1130 | ||
1131 | if (BI.LiveIn) | |
1132 | Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); | |
1133 | if (BI.LiveOut) | |
1134 | Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); | |
1a4d82fc JJ |
1135 | while (Ins--) |
1136 | GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); | |
223e47cc LB |
1137 | } |
1138 | ||
1139 | for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { | |
1140 | unsigned Number = Cand.ActiveBlocks[i]; | |
1141 | bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; | |
1142 | bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; | |
1143 | if (!RegIn && !RegOut) | |
1144 | continue; | |
1145 | if (RegIn && RegOut) { | |
1146 | // We need double spill code if this block has interference. | |
1147 | Cand.Intf.moveToBlock(Number); | |
1a4d82fc JJ |
1148 | if (Cand.Intf.hasInterference()) { |
1149 | GlobalCost += SpillPlacer->getBlockFrequency(Number); | |
1150 | GlobalCost += SpillPlacer->getBlockFrequency(Number); | |
1151 | } | |
223e47cc LB |
1152 | continue; |
1153 | } | |
1154 | // live-in / stack-out or stack-in live-out. | |
1155 | GlobalCost += SpillPlacer->getBlockFrequency(Number); | |
1156 | } | |
1157 | return GlobalCost; | |
1158 | } | |
1159 | ||
1160 | /// splitAroundRegion - Split the current live range around the regions | |
1161 | /// determined by BundleCand and GlobalCand. | |
1162 | /// | |
1163 | /// Before calling this function, GlobalCand and BundleCand must be initialized | |
1164 | /// so each bundle is assigned to a valid candidate, or NoCand for the | |
1165 | /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor | |
1166 | /// objects must be initialized for the current live range, and intervals | |
1167 | /// created for the used candidates. | |
1168 | /// | |
1169 | /// @param LREdit The LiveRangeEdit object handling the current split. | |
1170 | /// @param UsedCands List of used GlobalCand entries. Every BundleCand value | |
1171 | /// must appear in this list. | |
1172 | void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, | |
1173 | ArrayRef<unsigned> UsedCands) { | |
1174 | // These are the intervals created for new global ranges. We may create more | |
1175 | // intervals for local ranges. | |
1176 | const unsigned NumGlobalIntvs = LREdit.size(); | |
1177 | DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); | |
1178 | assert(NumGlobalIntvs && "No global intervals configured"); | |
1179 | ||
1180 | // Isolate even single instructions when dealing with a proper sub-class. | |
1181 | // That guarantees register class inflation for the stack interval because it | |
1182 | // is all copies. | |
1183 | unsigned Reg = SA->getParent().reg; | |
1184 | bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); | |
1185 | ||
1186 | // First handle all the blocks with uses. | |
1187 | ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); | |
1188 | for (unsigned i = 0; i != UseBlocks.size(); ++i) { | |
1189 | const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; | |
1190 | unsigned Number = BI.MBB->getNumber(); | |
1191 | unsigned IntvIn = 0, IntvOut = 0; | |
1192 | SlotIndex IntfIn, IntfOut; | |
1193 | if (BI.LiveIn) { | |
1194 | unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; | |
1195 | if (CandIn != NoCand) { | |
1196 | GlobalSplitCandidate &Cand = GlobalCand[CandIn]; | |
1197 | IntvIn = Cand.IntvIdx; | |
1198 | Cand.Intf.moveToBlock(Number); | |
1199 | IntfIn = Cand.Intf.first(); | |
1200 | } | |
1201 | } | |
1202 | if (BI.LiveOut) { | |
1203 | unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; | |
1204 | if (CandOut != NoCand) { | |
1205 | GlobalSplitCandidate &Cand = GlobalCand[CandOut]; | |
1206 | IntvOut = Cand.IntvIdx; | |
1207 | Cand.Intf.moveToBlock(Number); | |
1208 | IntfOut = Cand.Intf.last(); | |
1209 | } | |
1210 | } | |
1211 | ||
1212 | // Create separate intervals for isolated blocks with multiple uses. | |
1213 | if (!IntvIn && !IntvOut) { | |
1214 | DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); | |
1215 | if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) | |
1216 | SE->splitSingleBlock(BI); | |
1217 | continue; | |
1218 | } | |
1219 | ||
1220 | if (IntvIn && IntvOut) | |
1221 | SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); | |
1222 | else if (IntvIn) | |
1223 | SE->splitRegInBlock(BI, IntvIn, IntfIn); | |
1224 | else | |
1225 | SE->splitRegOutBlock(BI, IntvOut, IntfOut); | |
1226 | } | |
1227 | ||
1228 | // Handle live-through blocks. The relevant live-through blocks are stored in | |
1229 | // the ActiveBlocks list with each candidate. We need to filter out | |
1230 | // duplicates. | |
1231 | BitVector Todo = SA->getThroughBlocks(); | |
1232 | for (unsigned c = 0; c != UsedCands.size(); ++c) { | |
1233 | ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; | |
1234 | for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { | |
1235 | unsigned Number = Blocks[i]; | |
1236 | if (!Todo.test(Number)) | |
1237 | continue; | |
1238 | Todo.reset(Number); | |
1239 | ||
1240 | unsigned IntvIn = 0, IntvOut = 0; | |
1241 | SlotIndex IntfIn, IntfOut; | |
1242 | ||
1243 | unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; | |
1244 | if (CandIn != NoCand) { | |
1245 | GlobalSplitCandidate &Cand = GlobalCand[CandIn]; | |
1246 | IntvIn = Cand.IntvIdx; | |
1247 | Cand.Intf.moveToBlock(Number); | |
1248 | IntfIn = Cand.Intf.first(); | |
1249 | } | |
1250 | ||
1251 | unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; | |
1252 | if (CandOut != NoCand) { | |
1253 | GlobalSplitCandidate &Cand = GlobalCand[CandOut]; | |
1254 | IntvOut = Cand.IntvIdx; | |
1255 | Cand.Intf.moveToBlock(Number); | |
1256 | IntfOut = Cand.Intf.last(); | |
1257 | } | |
1258 | if (!IntvIn && !IntvOut) | |
1259 | continue; | |
1260 | SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); | |
1261 | } | |
1262 | } | |
1263 | ||
1264 | ++NumGlobalSplits; | |
1265 | ||
1266 | SmallVector<unsigned, 8> IntvMap; | |
1267 | SE->finish(&IntvMap); | |
1a4d82fc | 1268 | DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); |
223e47cc LB |
1269 | |
1270 | ExtraRegInfo.resize(MRI->getNumVirtRegs()); | |
1271 | unsigned OrigBlocks = SA->getNumLiveBlocks(); | |
1272 | ||
1273 | // Sort out the new intervals created by splitting. We get four kinds: | |
1274 | // - Remainder intervals should not be split again. | |
1275 | // - Candidate intervals can be assigned to Cand.PhysReg. | |
1276 | // - Block-local splits are candidates for local splitting. | |
1277 | // - DCE leftovers should go back on the queue. | |
1278 | for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { | |
1a4d82fc | 1279 | LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); |
223e47cc LB |
1280 | |
1281 | // Ignore old intervals from DCE. | |
1282 | if (getStage(Reg) != RS_New) | |
1283 | continue; | |
1284 | ||
1285 | // Remainder interval. Don't try splitting again, spill if it doesn't | |
1286 | // allocate. | |
1287 | if (IntvMap[i] == 0) { | |
1288 | setStage(Reg, RS_Spill); | |
1289 | continue; | |
1290 | } | |
1291 | ||
1292 | // Global intervals. Allow repeated splitting as long as the number of live | |
1293 | // blocks is strictly decreasing. | |
1294 | if (IntvMap[i] < NumGlobalIntvs) { | |
1295 | if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { | |
1296 | DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks | |
1297 | << " blocks as original.\n"); | |
1298 | // Don't allow repeated splitting as a safe guard against looping. | |
1299 | setStage(Reg, RS_Split2); | |
1300 | } | |
1301 | continue; | |
1302 | } | |
1303 | ||
1304 | // Other intervals are treated as new. This includes local intervals created | |
1305 | // for blocks with multiple uses, and anything created by DCE. | |
1306 | } | |
1307 | ||
1308 | if (VerifyEnabled) | |
1309 | MF->verify(this, "After splitting live range around region"); | |
1310 | } | |
1311 | ||
1312 | unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, | |
1a4d82fc | 1313 | SmallVectorImpl<unsigned> &NewVRegs) { |
223e47cc | 1314 | unsigned NumCands = 0; |
1a4d82fc | 1315 | BlockFrequency BestCost; |
223e47cc LB |
1316 | |
1317 | // Check if we can split this live range around a compact region. | |
1318 | bool HasCompact = calcCompactRegion(GlobalCand.front()); | |
1319 | if (HasCompact) { | |
1320 | // Yes, keep GlobalCand[0] as the compact region candidate. | |
1321 | NumCands = 1; | |
1a4d82fc | 1322 | BestCost = BlockFrequency::getMaxFrequency(); |
223e47cc LB |
1323 | } else { |
1324 | // No benefit from the compact region, our fallback will be per-block | |
1325 | // splitting. Make sure we find a solution that is cheaper than spilling. | |
1a4d82fc JJ |
1326 | BestCost = calcSpillCost(); |
1327 | DEBUG(dbgs() << "Cost of isolating all blocks = "; | |
1328 | MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); | |
223e47cc LB |
1329 | } |
1330 | ||
1a4d82fc JJ |
1331 | unsigned BestCand = |
1332 | calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, | |
1333 | false/*IgnoreCSR*/); | |
1334 | ||
1335 | // No solutions found, fall back to single block splitting. | |
1336 | if (!HasCompact && BestCand == NoCand) | |
1337 | return 0; | |
1338 | ||
1339 | return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); | |
1340 | } | |
1341 | ||
1342 | unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, | |
1343 | AllocationOrder &Order, | |
1344 | BlockFrequency &BestCost, | |
1345 | unsigned &NumCands, | |
1346 | bool IgnoreCSR) { | |
1347 | unsigned BestCand = NoCand; | |
223e47cc LB |
1348 | Order.rewind(); |
1349 | while (unsigned PhysReg = Order.next()) { | |
1a4d82fc JJ |
1350 | if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) |
1351 | if (IgnoreCSR && !MRI->isPhysRegUsed(CSR)) | |
1352 | continue; | |
1353 | ||
223e47cc LB |
1354 | // Discard bad candidates before we run out of interference cache cursors. |
1355 | // This will only affect register classes with a lot of registers (>32). | |
1356 | if (NumCands == IntfCache.getMaxCursors()) { | |
1357 | unsigned WorstCount = ~0u; | |
1358 | unsigned Worst = 0; | |
1359 | for (unsigned i = 0; i != NumCands; ++i) { | |
1360 | if (i == BestCand || !GlobalCand[i].PhysReg) | |
1361 | continue; | |
1362 | unsigned Count = GlobalCand[i].LiveBundles.count(); | |
1363 | if (Count < WorstCount) | |
1364 | Worst = i, WorstCount = Count; | |
1365 | } | |
1366 | --NumCands; | |
1367 | GlobalCand[Worst] = GlobalCand[NumCands]; | |
1368 | if (BestCand == NumCands) | |
1369 | BestCand = Worst; | |
1370 | } | |
1371 | ||
1372 | if (GlobalCand.size() <= NumCands) | |
1373 | GlobalCand.resize(NumCands+1); | |
1374 | GlobalSplitCandidate &Cand = GlobalCand[NumCands]; | |
1375 | Cand.reset(IntfCache, PhysReg); | |
1376 | ||
1377 | SpillPlacer->prepare(Cand.LiveBundles); | |
1a4d82fc | 1378 | BlockFrequency Cost; |
223e47cc LB |
1379 | if (!addSplitConstraints(Cand.Intf, Cost)) { |
1380 | DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); | |
1381 | continue; | |
1382 | } | |
1a4d82fc JJ |
1383 | DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; |
1384 | MBFI->printBlockFreq(dbgs(), Cost)); | |
223e47cc LB |
1385 | if (Cost >= BestCost) { |
1386 | DEBUG({ | |
1387 | if (BestCand == NoCand) | |
1388 | dbgs() << " worse than no bundles\n"; | |
1389 | else | |
1390 | dbgs() << " worse than " | |
1391 | << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; | |
1392 | }); | |
1393 | continue; | |
1394 | } | |
1395 | growRegion(Cand); | |
1396 | ||
1397 | SpillPlacer->finish(); | |
1398 | ||
1399 | // No live bundles, defer to splitSingleBlocks(). | |
1400 | if (!Cand.LiveBundles.any()) { | |
1401 | DEBUG(dbgs() << " no bundles.\n"); | |
1402 | continue; | |
1403 | } | |
1404 | ||
1405 | Cost += calcGlobalSplitCost(Cand); | |
1406 | DEBUG({ | |
1a4d82fc JJ |
1407 | dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) |
1408 | << " with bundles"; | |
223e47cc LB |
1409 | for (int i = Cand.LiveBundles.find_first(); i>=0; |
1410 | i = Cand.LiveBundles.find_next(i)) | |
1411 | dbgs() << " EB#" << i; | |
1412 | dbgs() << ".\n"; | |
1413 | }); | |
1414 | if (Cost < BestCost) { | |
1415 | BestCand = NumCands; | |
1a4d82fc | 1416 | BestCost = Cost; |
223e47cc LB |
1417 | } |
1418 | ++NumCands; | |
1419 | } | |
1a4d82fc JJ |
1420 | return BestCand; |
1421 | } | |
223e47cc | 1422 | |
1a4d82fc JJ |
1423 | unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, |
1424 | bool HasCompact, | |
1425 | SmallVectorImpl<unsigned> &NewVRegs) { | |
1426 | SmallVector<unsigned, 8> UsedCands; | |
223e47cc LB |
1427 | // Prepare split editor. |
1428 | LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); | |
1429 | SE->reset(LREdit, SplitSpillMode); | |
1430 | ||
1431 | // Assign all edge bundles to the preferred candidate, or NoCand. | |
1432 | BundleCand.assign(Bundles->getNumBundles(), NoCand); | |
1433 | ||
1434 | // Assign bundles for the best candidate region. | |
1435 | if (BestCand != NoCand) { | |
1436 | GlobalSplitCandidate &Cand = GlobalCand[BestCand]; | |
1437 | if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { | |
1438 | UsedCands.push_back(BestCand); | |
1439 | Cand.IntvIdx = SE->openIntv(); | |
1440 | DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " | |
1441 | << B << " bundles, intv " << Cand.IntvIdx << ".\n"); | |
1442 | (void)B; | |
1443 | } | |
1444 | } | |
1445 | ||
1446 | // Assign bundles for the compact region. | |
1447 | if (HasCompact) { | |
1448 | GlobalSplitCandidate &Cand = GlobalCand.front(); | |
1449 | assert(!Cand.PhysReg && "Compact region has no physreg"); | |
1450 | if (unsigned B = Cand.getBundles(BundleCand, 0)) { | |
1451 | UsedCands.push_back(0); | |
1452 | Cand.IntvIdx = SE->openIntv(); | |
1453 | DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " | |
1454 | << Cand.IntvIdx << ".\n"); | |
1455 | (void)B; | |
1456 | } | |
1457 | } | |
1458 | ||
1459 | splitAroundRegion(LREdit, UsedCands); | |
1460 | return 0; | |
1461 | } | |
1462 | ||
1463 | ||
1464 | //===----------------------------------------------------------------------===// | |
1465 | // Per-Block Splitting | |
1466 | //===----------------------------------------------------------------------===// | |
1467 | ||
1468 | /// tryBlockSplit - Split a global live range around every block with uses. This | |
1469 | /// creates a lot of local live ranges, that will be split by tryLocalSplit if | |
1470 | /// they don't allocate. | |
1471 | unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, | |
1a4d82fc | 1472 | SmallVectorImpl<unsigned> &NewVRegs) { |
223e47cc LB |
1473 | assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); |
1474 | unsigned Reg = VirtReg.reg; | |
1475 | bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); | |
1476 | LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); | |
1477 | SE->reset(LREdit, SplitSpillMode); | |
1478 | ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); | |
1479 | for (unsigned i = 0; i != UseBlocks.size(); ++i) { | |
1480 | const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; | |
1481 | if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) | |
1482 | SE->splitSingleBlock(BI); | |
1483 | } | |
1484 | // No blocks were split. | |
1485 | if (LREdit.empty()) | |
1486 | return 0; | |
1487 | ||
1488 | // We did split for some blocks. | |
1489 | SmallVector<unsigned, 8> IntvMap; | |
1490 | SE->finish(&IntvMap); | |
1491 | ||
1492 | // Tell LiveDebugVariables about the new ranges. | |
1a4d82fc | 1493 | DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); |
223e47cc LB |
1494 | |
1495 | ExtraRegInfo.resize(MRI->getNumVirtRegs()); | |
1496 | ||
1497 | // Sort out the new intervals created by splitting. The remainder interval | |
1498 | // goes straight to spilling, the new local ranges get to stay RS_New. | |
1499 | for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { | |
1a4d82fc | 1500 | LiveInterval &LI = LIS->getInterval(LREdit.get(i)); |
223e47cc LB |
1501 | if (getStage(LI) == RS_New && IntvMap[i] == 0) |
1502 | setStage(LI, RS_Spill); | |
1503 | } | |
1504 | ||
1505 | if (VerifyEnabled) | |
1506 | MF->verify(this, "After splitting live range around basic blocks"); | |
1507 | return 0; | |
1508 | } | |
1509 | ||
1510 | ||
1511 | //===----------------------------------------------------------------------===// | |
1512 | // Per-Instruction Splitting | |
1513 | //===----------------------------------------------------------------------===// | |
1514 | ||
1a4d82fc JJ |
1515 | /// Get the number of allocatable registers that match the constraints of \p Reg |
1516 | /// on \p MI and that are also in \p SuperRC. | |
1517 | static unsigned getNumAllocatableRegsForConstraints( | |
1518 | const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, | |
1519 | const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, | |
1520 | const RegisterClassInfo &RCI) { | |
1521 | assert(SuperRC && "Invalid register class"); | |
1522 | ||
1523 | const TargetRegisterClass *ConstrainedRC = | |
1524 | MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, | |
1525 | /* ExploreBundle */ true); | |
1526 | if (!ConstrainedRC) | |
1527 | return 0; | |
1528 | return RCI.getNumAllocatableRegs(ConstrainedRC); | |
1529 | } | |
1530 | ||
223e47cc LB |
1531 | /// tryInstructionSplit - Split a live range around individual instructions. |
1532 | /// This is normally not worthwhile since the spiller is doing essentially the | |
1533 | /// same thing. However, when the live range is in a constrained register | |
1534 | /// class, it may help to insert copies such that parts of the live range can | |
1535 | /// be moved to a larger register class. | |
1536 | /// | |
1537 | /// This is similar to spilling to a larger register class. | |
1538 | unsigned | |
1539 | RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, | |
1a4d82fc JJ |
1540 | SmallVectorImpl<unsigned> &NewVRegs) { |
1541 | const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); | |
223e47cc | 1542 | // There is no point to this if there are no larger sub-classes. |
1a4d82fc | 1543 | if (!RegClassInfo.isProperSubClass(CurRC)) |
223e47cc LB |
1544 | return 0; |
1545 | ||
1546 | // Always enable split spill mode, since we're effectively spilling to a | |
1547 | // register. | |
1548 | LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); | |
1549 | SE->reset(LREdit, SplitEditor::SM_Size); | |
1550 | ||
1551 | ArrayRef<SlotIndex> Uses = SA->getUseSlots(); | |
1552 | if (Uses.size() <= 1) | |
1553 | return 0; | |
1554 | ||
1555 | DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); | |
1556 | ||
1a4d82fc JJ |
1557 | const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); |
1558 | unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); | |
1559 | // Split around every non-copy instruction if this split will relax | |
1560 | // the constraints on the virtual register. | |
1561 | // Otherwise, splitting just inserts uncoalescable copies that do not help | |
1562 | // the allocation. | |
223e47cc LB |
1563 | for (unsigned i = 0; i != Uses.size(); ++i) { |
1564 | if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) | |
1a4d82fc JJ |
1565 | if (MI->isFullCopy() || |
1566 | SuperRCNumAllocatableRegs == | |
1567 | getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, | |
1568 | TRI, RCI)) { | |
223e47cc LB |
1569 | DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); |
1570 | continue; | |
1571 | } | |
1572 | SE->openIntv(); | |
1573 | SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); | |
1574 | SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); | |
1575 | SE->useIntv(SegStart, SegStop); | |
1576 | } | |
1577 | ||
1578 | if (LREdit.empty()) { | |
1579 | DEBUG(dbgs() << "All uses were copies.\n"); | |
1580 | return 0; | |
1581 | } | |
1582 | ||
1583 | SmallVector<unsigned, 8> IntvMap; | |
1584 | SE->finish(&IntvMap); | |
1a4d82fc | 1585 | DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); |
223e47cc LB |
1586 | ExtraRegInfo.resize(MRI->getNumVirtRegs()); |
1587 | ||
1588 | // Assign all new registers to RS_Spill. This was the last chance. | |
1589 | setStage(LREdit.begin(), LREdit.end(), RS_Spill); | |
1590 | return 0; | |
1591 | } | |
1592 | ||
1593 | ||
1594 | //===----------------------------------------------------------------------===// | |
1595 | // Local Splitting | |
1596 | //===----------------------------------------------------------------------===// | |
1597 | ||
1598 | ||
1599 | /// calcGapWeights - Compute the maximum spill weight that needs to be evicted | |
1600 | /// in order to use PhysReg between two entries in SA->UseSlots. | |
1601 | /// | |
1602 | /// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. | |
1603 | /// | |
1604 | void RAGreedy::calcGapWeights(unsigned PhysReg, | |
1605 | SmallVectorImpl<float> &GapWeight) { | |
1606 | assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); | |
1607 | const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); | |
1608 | ArrayRef<SlotIndex> Uses = SA->getUseSlots(); | |
1609 | const unsigned NumGaps = Uses.size()-1; | |
1610 | ||
1611 | // Start and end points for the interference check. | |
1612 | SlotIndex StartIdx = | |
1613 | BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; | |
1614 | SlotIndex StopIdx = | |
1615 | BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; | |
1616 | ||
1617 | GapWeight.assign(NumGaps, 0.0f); | |
1618 | ||
1619 | // Add interference from each overlapping register. | |
1620 | for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { | |
1621 | if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) | |
1622 | .checkInterference()) | |
1623 | continue; | |
1624 | ||
1625 | // We know that VirtReg is a continuous interval from FirstInstr to | |
1626 | // LastInstr, so we don't need InterferenceQuery. | |
1627 | // | |
1628 | // Interference that overlaps an instruction is counted in both gaps | |
1629 | // surrounding the instruction. The exception is interference before | |
1630 | // StartIdx and after StopIdx. | |
1631 | // | |
1632 | LiveIntervalUnion::SegmentIter IntI = | |
1633 | Matrix->getLiveUnions()[*Units] .find(StartIdx); | |
1634 | for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { | |
1635 | // Skip the gaps before IntI. | |
1636 | while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) | |
1637 | if (++Gap == NumGaps) | |
1638 | break; | |
1639 | if (Gap == NumGaps) | |
1640 | break; | |
1641 | ||
1642 | // Update the gaps covered by IntI. | |
1643 | const float weight = IntI.value()->weight; | |
1644 | for (; Gap != NumGaps; ++Gap) { | |
1645 | GapWeight[Gap] = std::max(GapWeight[Gap], weight); | |
1646 | if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) | |
1647 | break; | |
1648 | } | |
1649 | if (Gap == NumGaps) | |
1650 | break; | |
1651 | } | |
1652 | } | |
1653 | ||
1654 | // Add fixed interference. | |
1655 | for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { | |
1a4d82fc JJ |
1656 | const LiveRange &LR = LIS->getRegUnit(*Units); |
1657 | LiveRange::const_iterator I = LR.find(StartIdx); | |
1658 | LiveRange::const_iterator E = LR.end(); | |
223e47cc LB |
1659 | |
1660 | // Same loop as above. Mark any overlapped gaps as HUGE_VALF. | |
1661 | for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { | |
1662 | while (Uses[Gap+1].getBoundaryIndex() < I->start) | |
1663 | if (++Gap == NumGaps) | |
1664 | break; | |
1665 | if (Gap == NumGaps) | |
1666 | break; | |
1667 | ||
1668 | for (; Gap != NumGaps; ++Gap) { | |
1a4d82fc | 1669 | GapWeight[Gap] = llvm::huge_valf; |
223e47cc LB |
1670 | if (Uses[Gap+1].getBaseIndex() >= I->end) |
1671 | break; | |
1672 | } | |
1673 | if (Gap == NumGaps) | |
1674 | break; | |
1675 | } | |
1676 | } | |
1677 | } | |
1678 | ||
1679 | /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only | |
1680 | /// basic block. | |
1681 | /// | |
1682 | unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, | |
1a4d82fc | 1683 | SmallVectorImpl<unsigned> &NewVRegs) { |
223e47cc LB |
1684 | assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); |
1685 | const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); | |
1686 | ||
1687 | // Note that it is possible to have an interval that is live-in or live-out | |
1688 | // while only covering a single block - A phi-def can use undef values from | |
1689 | // predecessors, and the block could be a single-block loop. | |
1690 | // We don't bother doing anything clever about such a case, we simply assume | |
1691 | // that the interval is continuous from FirstInstr to LastInstr. We should | |
1692 | // make sure that we don't do anything illegal to such an interval, though. | |
1693 | ||
1694 | ArrayRef<SlotIndex> Uses = SA->getUseSlots(); | |
1695 | if (Uses.size() <= 2) | |
1696 | return 0; | |
1697 | const unsigned NumGaps = Uses.size()-1; | |
1698 | ||
1699 | DEBUG({ | |
1700 | dbgs() << "tryLocalSplit: "; | |
1701 | for (unsigned i = 0, e = Uses.size(); i != e; ++i) | |
1702 | dbgs() << ' ' << Uses[i]; | |
1703 | dbgs() << '\n'; | |
1704 | }); | |
1705 | ||
1706 | // If VirtReg is live across any register mask operands, compute a list of | |
1707 | // gaps with register masks. | |
1708 | SmallVector<unsigned, 8> RegMaskGaps; | |
1709 | if (Matrix->checkRegMaskInterference(VirtReg)) { | |
1710 | // Get regmask slots for the whole block. | |
1711 | ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); | |
1712 | DEBUG(dbgs() << RMS.size() << " regmasks in block:"); | |
1713 | // Constrain to VirtReg's live range. | |
1714 | unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), | |
1715 | Uses.front().getRegSlot()) - RMS.begin(); | |
1716 | unsigned re = RMS.size(); | |
1717 | for (unsigned i = 0; i != NumGaps && ri != re; ++i) { | |
1718 | // Look for Uses[i] <= RMS <= Uses[i+1]. | |
1719 | assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); | |
1720 | if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) | |
1721 | continue; | |
1722 | // Skip a regmask on the same instruction as the last use. It doesn't | |
1723 | // overlap the live range. | |
1724 | if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) | |
1725 | break; | |
1726 | DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); | |
1727 | RegMaskGaps.push_back(i); | |
1728 | // Advance ri to the next gap. A regmask on one of the uses counts in | |
1729 | // both gaps. | |
1730 | while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) | |
1731 | ++ri; | |
1732 | } | |
1733 | DEBUG(dbgs() << '\n'); | |
1734 | } | |
1735 | ||
1736 | // Since we allow local split results to be split again, there is a risk of | |
1737 | // creating infinite loops. It is tempting to require that the new live | |
1738 | // ranges have less instructions than the original. That would guarantee | |
1739 | // convergence, but it is too strict. A live range with 3 instructions can be | |
1740 | // split 2+3 (including the COPY), and we want to allow that. | |
1741 | // | |
1742 | // Instead we use these rules: | |
1743 | // | |
1744 | // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the | |
1745 | // noop split, of course). | |
1746 | // 2. Require progress be made for ranges with getStage() == RS_Split2. All | |
1747 | // the new ranges must have fewer instructions than before the split. | |
1748 | // 3. New ranges with the same number of instructions are marked RS_Split2, | |
1749 | // smaller ranges are marked RS_New. | |
1750 | // | |
1751 | // These rules allow a 3 -> 2+3 split once, which we need. They also prevent | |
1752 | // excessive splitting and infinite loops. | |
1753 | // | |
1754 | bool ProgressRequired = getStage(VirtReg) >= RS_Split2; | |
1755 | ||
1756 | // Best split candidate. | |
1757 | unsigned BestBefore = NumGaps; | |
1758 | unsigned BestAfter = 0; | |
1759 | float BestDiff = 0; | |
1760 | ||
1a4d82fc JJ |
1761 | const float blockFreq = |
1762 | SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * | |
1763 | (1.0f / MBFI->getEntryFreq()); | |
223e47cc LB |
1764 | SmallVector<float, 8> GapWeight; |
1765 | ||
1766 | Order.rewind(); | |
1767 | while (unsigned PhysReg = Order.next()) { | |
1768 | // Keep track of the largest spill weight that would need to be evicted in | |
1769 | // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. | |
1770 | calcGapWeights(PhysReg, GapWeight); | |
1771 | ||
1772 | // Remove any gaps with regmask clobbers. | |
1773 | if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) | |
1774 | for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) | |
1a4d82fc | 1775 | GapWeight[RegMaskGaps[i]] = llvm::huge_valf; |
223e47cc LB |
1776 | |
1777 | // Try to find the best sequence of gaps to close. | |
1778 | // The new spill weight must be larger than any gap interference. | |
1779 | ||
1780 | // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. | |
1781 | unsigned SplitBefore = 0, SplitAfter = 1; | |
1782 | ||
1783 | // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). | |
1784 | // It is the spill weight that needs to be evicted. | |
1785 | float MaxGap = GapWeight[0]; | |
1786 | ||
1787 | for (;;) { | |
1788 | // Live before/after split? | |
1789 | const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; | |
1790 | const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; | |
1791 | ||
1792 | DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' | |
1793 | << Uses[SplitBefore] << '-' << Uses[SplitAfter] | |
1794 | << " i=" << MaxGap); | |
1795 | ||
1796 | // Stop before the interval gets so big we wouldn't be making progress. | |
1797 | if (!LiveBefore && !LiveAfter) { | |
1798 | DEBUG(dbgs() << " all\n"); | |
1799 | break; | |
1800 | } | |
1801 | // Should the interval be extended or shrunk? | |
1802 | bool Shrink = true; | |
1803 | ||
1804 | // How many gaps would the new range have? | |
1805 | unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; | |
1806 | ||
1807 | // Legally, without causing looping? | |
1808 | bool Legal = !ProgressRequired || NewGaps < NumGaps; | |
1809 | ||
1a4d82fc | 1810 | if (Legal && MaxGap < llvm::huge_valf) { |
223e47cc LB |
1811 | // Estimate the new spill weight. Each instruction reads or writes the |
1812 | // register. Conservatively assume there are no read-modify-write | |
1813 | // instructions. | |
1814 | // | |
1815 | // Try to guess the size of the new interval. | |
85aaf69f SL |
1816 | const float EstWeight = normalizeSpillWeight( |
1817 | blockFreq * (NewGaps + 1), | |
1818 | Uses[SplitBefore].distance(Uses[SplitAfter]) + | |
1819 | (LiveBefore + LiveAfter) * SlotIndex::InstrDist, | |
1820 | 1); | |
223e47cc LB |
1821 | // Would this split be possible to allocate? |
1822 | // Never allocate all gaps, we wouldn't be making progress. | |
1823 | DEBUG(dbgs() << " w=" << EstWeight); | |
1824 | if (EstWeight * Hysteresis >= MaxGap) { | |
1825 | Shrink = false; | |
1826 | float Diff = EstWeight - MaxGap; | |
1827 | if (Diff > BestDiff) { | |
1828 | DEBUG(dbgs() << " (best)"); | |
1829 | BestDiff = Hysteresis * Diff; | |
1830 | BestBefore = SplitBefore; | |
1831 | BestAfter = SplitAfter; | |
1832 | } | |
1833 | } | |
1834 | } | |
1835 | ||
1836 | // Try to shrink. | |
1837 | if (Shrink) { | |
1838 | if (++SplitBefore < SplitAfter) { | |
1839 | DEBUG(dbgs() << " shrink\n"); | |
1840 | // Recompute the max when necessary. | |
1841 | if (GapWeight[SplitBefore - 1] >= MaxGap) { | |
1842 | MaxGap = GapWeight[SplitBefore]; | |
1843 | for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) | |
1844 | MaxGap = std::max(MaxGap, GapWeight[i]); | |
1845 | } | |
1846 | continue; | |
1847 | } | |
1848 | MaxGap = 0; | |
1849 | } | |
1850 | ||
1851 | // Try to extend the interval. | |
1852 | if (SplitAfter >= NumGaps) { | |
1853 | DEBUG(dbgs() << " end\n"); | |
1854 | break; | |
1855 | } | |
1856 | ||
1857 | DEBUG(dbgs() << " extend\n"); | |
1858 | MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); | |
1859 | } | |
1860 | } | |
1861 | ||
1862 | // Didn't find any candidates? | |
1863 | if (BestBefore == NumGaps) | |
1864 | return 0; | |
1865 | ||
1866 | DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] | |
1867 | << '-' << Uses[BestAfter] << ", " << BestDiff | |
1868 | << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); | |
1869 | ||
1870 | LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); | |
1871 | SE->reset(LREdit); | |
1872 | ||
1873 | SE->openIntv(); | |
1874 | SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); | |
1875 | SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); | |
1876 | SE->useIntv(SegStart, SegStop); | |
1877 | SmallVector<unsigned, 8> IntvMap; | |
1878 | SE->finish(&IntvMap); | |
1a4d82fc | 1879 | DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); |
223e47cc LB |
1880 | |
1881 | // If the new range has the same number of instructions as before, mark it as | |
1882 | // RS_Split2 so the next split will be forced to make progress. Otherwise, | |
1883 | // leave the new intervals as RS_New so they can compete. | |
1884 | bool LiveBefore = BestBefore != 0 || BI.LiveIn; | |
1885 | bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; | |
1886 | unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; | |
1887 | if (NewGaps >= NumGaps) { | |
1888 | DEBUG(dbgs() << "Tagging non-progress ranges: "); | |
1889 | assert(!ProgressRequired && "Didn't make progress when it was required."); | |
1890 | for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) | |
1891 | if (IntvMap[i] == 1) { | |
1a4d82fc JJ |
1892 | setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); |
1893 | DEBUG(dbgs() << PrintReg(LREdit.get(i))); | |
223e47cc LB |
1894 | } |
1895 | DEBUG(dbgs() << '\n'); | |
1896 | } | |
1897 | ++NumLocalSplits; | |
1898 | ||
1899 | return 0; | |
1900 | } | |
1901 | ||
1902 | //===----------------------------------------------------------------------===// | |
1903 | // Live Range Splitting | |
1904 | //===----------------------------------------------------------------------===// | |
1905 | ||
1906 | /// trySplit - Try to split VirtReg or one of its interferences, making it | |
1907 | /// assignable. | |
1908 | /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. | |
1909 | unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, | |
1a4d82fc | 1910 | SmallVectorImpl<unsigned>&NewVRegs) { |
223e47cc LB |
1911 | // Ranges must be Split2 or less. |
1912 | if (getStage(VirtReg) >= RS_Spill) | |
1913 | return 0; | |
1914 | ||
1915 | // Local intervals are handled separately. | |
1916 | if (LIS->intervalIsInOneMBB(VirtReg)) { | |
1917 | NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); | |
1918 | SA->analyze(&VirtReg); | |
1919 | unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); | |
1920 | if (PhysReg || !NewVRegs.empty()) | |
1921 | return PhysReg; | |
1922 | return tryInstructionSplit(VirtReg, Order, NewVRegs); | |
1923 | } | |
1924 | ||
1925 | NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); | |
1926 | ||
1927 | SA->analyze(&VirtReg); | |
1928 | ||
1929 | // FIXME: SplitAnalysis may repair broken live ranges coming from the | |
1930 | // coalescer. That may cause the range to become allocatable which means that | |
1931 | // tryRegionSplit won't be making progress. This check should be replaced with | |
1932 | // an assertion when the coalescer is fixed. | |
1933 | if (SA->didRepairRange()) { | |
1934 | // VirtReg has changed, so all cached queries are invalid. | |
1935 | Matrix->invalidateVirtRegs(); | |
1936 | if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) | |
1937 | return PhysReg; | |
1938 | } | |
1939 | ||
1940 | // First try to split around a region spanning multiple blocks. RS_Split2 | |
1941 | // ranges already made dubious progress with region splitting, so they go | |
1942 | // straight to single block splitting. | |
1943 | if (getStage(VirtReg) < RS_Split2) { | |
1944 | unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); | |
1945 | if (PhysReg || !NewVRegs.empty()) | |
1946 | return PhysReg; | |
1947 | } | |
1948 | ||
1949 | // Then isolate blocks. | |
1950 | return tryBlockSplit(VirtReg, Order, NewVRegs); | |
1951 | } | |
1952 | ||
1a4d82fc JJ |
1953 | //===----------------------------------------------------------------------===// |
1954 | // Last Chance Recoloring | |
1955 | //===----------------------------------------------------------------------===// | |
1956 | ||
1957 | /// mayRecolorAllInterferences - Check if the virtual registers that | |
1958 | /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be | |
1959 | /// recolored to free \p PhysReg. | |
1960 | /// When true is returned, \p RecoloringCandidates has been augmented with all | |
1961 | /// the live intervals that need to be recolored in order to free \p PhysReg | |
1962 | /// for \p VirtReg. | |
1963 | /// \p FixedRegisters contains all the virtual registers that cannot be | |
1964 | /// recolored. | |
1965 | bool | |
1966 | RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, | |
1967 | SmallLISet &RecoloringCandidates, | |
1968 | const SmallVirtRegSet &FixedRegisters) { | |
1969 | const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); | |
1970 | ||
1971 | for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { | |
1972 | LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); | |
1973 | // If there is LastChanceRecoloringMaxInterference or more interferences, | |
1974 | // chances are one would not be recolorable. | |
1975 | if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= | |
1976 | LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { | |
1977 | DEBUG(dbgs() << "Early abort: too many interferences.\n"); | |
1978 | CutOffInfo |= CO_Interf; | |
1979 | return false; | |
1980 | } | |
1981 | for (unsigned i = Q.interferingVRegs().size(); i; --i) { | |
1982 | LiveInterval *Intf = Q.interferingVRegs()[i - 1]; | |
1983 | // If Intf is done and sit on the same register class as VirtReg, | |
1984 | // it would not be recolorable as it is in the same state as VirtReg. | |
1985 | if ((getStage(*Intf) == RS_Done && | |
1986 | MRI->getRegClass(Intf->reg) == CurRC) || | |
1987 | FixedRegisters.count(Intf->reg)) { | |
1988 | DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); | |
1989 | return false; | |
1990 | } | |
1991 | RecoloringCandidates.insert(Intf); | |
1992 | } | |
1993 | } | |
1994 | return true; | |
1995 | } | |
1996 | ||
1997 | /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring | |
1998 | /// its interferences. | |
1999 | /// Last chance recoloring chooses a color for \p VirtReg and recolors every | |
2000 | /// virtual register that was using it. The recoloring process may recursively | |
2001 | /// use the last chance recoloring. Therefore, when a virtual register has been | |
2002 | /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot | |
2003 | /// be last-chance-recolored again during this recoloring "session". | |
2004 | /// E.g., | |
2005 | /// Let | |
2006 | /// vA can use {R1, R2 } | |
2007 | /// vB can use { R2, R3} | |
2008 | /// vC can use {R1 } | |
2009 | /// Where vA, vB, and vC cannot be split anymore (they are reloads for | |
2010 | /// instance) and they all interfere. | |
2011 | /// | |
2012 | /// vA is assigned R1 | |
2013 | /// vB is assigned R2 | |
2014 | /// vC tries to evict vA but vA is already done. | |
2015 | /// Regular register allocation fails. | |
2016 | /// | |
2017 | /// Last chance recoloring kicks in: | |
2018 | /// vC does as if vA was evicted => vC uses R1. | |
2019 | /// vC is marked as fixed. | |
2020 | /// vA needs to find a color. | |
2021 | /// None are available. | |
2022 | /// vA cannot evict vC: vC is a fixed virtual register now. | |
2023 | /// vA does as if vB was evicted => vA uses R2. | |
2024 | /// vB needs to find a color. | |
2025 | /// R3 is available. | |
2026 | /// Recoloring => vC = R1, vA = R2, vB = R3 | |
2027 | /// | |
2028 | /// \p Order defines the preferred allocation order for \p VirtReg. | |
2029 | /// \p NewRegs will contain any new virtual register that have been created | |
2030 | /// (split, spill) during the process and that must be assigned. | |
2031 | /// \p FixedRegisters contains all the virtual registers that cannot be | |
2032 | /// recolored. | |
2033 | /// \p Depth gives the current depth of the last chance recoloring. | |
2034 | /// \return a physical register that can be used for VirtReg or ~0u if none | |
2035 | /// exists. | |
2036 | unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, | |
2037 | AllocationOrder &Order, | |
2038 | SmallVectorImpl<unsigned> &NewVRegs, | |
2039 | SmallVirtRegSet &FixedRegisters, | |
2040 | unsigned Depth) { | |
2041 | DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); | |
2042 | // Ranges must be Done. | |
2043 | assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && | |
2044 | "Last chance recoloring should really be last chance"); | |
2045 | // Set the max depth to LastChanceRecoloringMaxDepth. | |
2046 | // We may want to reconsider that if we end up with a too large search space | |
2047 | // for target with hundreds of registers. | |
2048 | // Indeed, in that case we may want to cut the search space earlier. | |
2049 | if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { | |
2050 | DEBUG(dbgs() << "Abort because max depth has been reached.\n"); | |
2051 | CutOffInfo |= CO_Depth; | |
2052 | return ~0u; | |
2053 | } | |
2054 | ||
2055 | // Set of Live intervals that will need to be recolored. | |
2056 | SmallLISet RecoloringCandidates; | |
2057 | // Record the original mapping virtual register to physical register in case | |
2058 | // the recoloring fails. | |
2059 | DenseMap<unsigned, unsigned> VirtRegToPhysReg; | |
2060 | // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in | |
2061 | // this recoloring "session". | |
2062 | FixedRegisters.insert(VirtReg.reg); | |
2063 | ||
2064 | Order.rewind(); | |
2065 | while (unsigned PhysReg = Order.next()) { | |
2066 | DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " | |
2067 | << PrintReg(PhysReg, TRI) << '\n'); | |
2068 | RecoloringCandidates.clear(); | |
2069 | VirtRegToPhysReg.clear(); | |
2070 | ||
2071 | // It is only possible to recolor virtual register interference. | |
2072 | if (Matrix->checkInterference(VirtReg, PhysReg) > | |
2073 | LiveRegMatrix::IK_VirtReg) { | |
2074 | DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); | |
2075 | ||
2076 | continue; | |
2077 | } | |
2078 | ||
2079 | // Early give up on this PhysReg if it is obvious we cannot recolor all | |
2080 | // the interferences. | |
2081 | if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, | |
2082 | FixedRegisters)) { | |
2083 | DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); | |
2084 | continue; | |
2085 | } | |
2086 | ||
2087 | // RecoloringCandidates contains all the virtual registers that interfer | |
2088 | // with VirtReg on PhysReg (or one of its aliases). | |
2089 | // Enqueue them for recoloring and perform the actual recoloring. | |
2090 | PQueue RecoloringQueue; | |
2091 | for (SmallLISet::iterator It = RecoloringCandidates.begin(), | |
2092 | EndIt = RecoloringCandidates.end(); | |
2093 | It != EndIt; ++It) { | |
2094 | unsigned ItVirtReg = (*It)->reg; | |
2095 | enqueue(RecoloringQueue, *It); | |
2096 | assert(VRM->hasPhys(ItVirtReg) && | |
2097 | "Interferences are supposed to be with allocated vairables"); | |
2098 | ||
2099 | // Record the current allocation. | |
2100 | VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); | |
2101 | // unset the related struct. | |
2102 | Matrix->unassign(**It); | |
2103 | } | |
2104 | ||
2105 | // Do as if VirtReg was assigned to PhysReg so that the underlying | |
2106 | // recoloring has the right information about the interferes and | |
2107 | // available colors. | |
2108 | Matrix->assign(VirtReg, PhysReg); | |
2109 | ||
2110 | // Save the current recoloring state. | |
2111 | // If we cannot recolor all the interferences, we will have to start again | |
2112 | // at this point for the next physical register. | |
2113 | SmallVirtRegSet SaveFixedRegisters(FixedRegisters); | |
2114 | if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, | |
2115 | Depth)) { | |
2116 | // Do not mess up with the global assignment process. | |
2117 | // I.e., VirtReg must be unassigned. | |
2118 | Matrix->unassign(VirtReg); | |
2119 | return PhysReg; | |
2120 | } | |
2121 | ||
2122 | DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " | |
2123 | << PrintReg(PhysReg, TRI) << '\n'); | |
2124 | ||
2125 | // The recoloring attempt failed, undo the changes. | |
2126 | FixedRegisters = SaveFixedRegisters; | |
2127 | Matrix->unassign(VirtReg); | |
2128 | ||
2129 | for (SmallLISet::iterator It = RecoloringCandidates.begin(), | |
2130 | EndIt = RecoloringCandidates.end(); | |
2131 | It != EndIt; ++It) { | |
2132 | unsigned ItVirtReg = (*It)->reg; | |
2133 | if (VRM->hasPhys(ItVirtReg)) | |
2134 | Matrix->unassign(**It); | |
2135 | Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]); | |
2136 | } | |
2137 | } | |
2138 | ||
2139 | // Last chance recoloring did not worked either, give up. | |
2140 | return ~0u; | |
2141 | } | |
2142 | ||
2143 | /// tryRecoloringCandidates - Try to assign a new color to every register | |
2144 | /// in \RecoloringQueue. | |
2145 | /// \p NewRegs will contain any new virtual register created during the | |
2146 | /// recoloring process. | |
2147 | /// \p FixedRegisters[in/out] contains all the registers that have been | |
2148 | /// recolored. | |
2149 | /// \return true if all virtual registers in RecoloringQueue were successfully | |
2150 | /// recolored, false otherwise. | |
2151 | bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, | |
2152 | SmallVectorImpl<unsigned> &NewVRegs, | |
2153 | SmallVirtRegSet &FixedRegisters, | |
2154 | unsigned Depth) { | |
2155 | while (!RecoloringQueue.empty()) { | |
2156 | LiveInterval *LI = dequeue(RecoloringQueue); | |
2157 | DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); | |
2158 | unsigned PhysReg; | |
2159 | PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); | |
2160 | if (PhysReg == ~0u || !PhysReg) | |
2161 | return false; | |
2162 | DEBUG(dbgs() << "Recoloring of " << *LI | |
2163 | << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); | |
2164 | Matrix->assign(*LI, PhysReg); | |
2165 | FixedRegisters.insert(LI->reg); | |
2166 | } | |
2167 | return true; | |
2168 | } | |
223e47cc LB |
2169 | |
2170 | //===----------------------------------------------------------------------===// | |
2171 | // Main Entry Point | |
2172 | //===----------------------------------------------------------------------===// | |
2173 | ||
2174 | unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, | |
1a4d82fc JJ |
2175 | SmallVectorImpl<unsigned> &NewVRegs) { |
2176 | CutOffInfo = CO_None; | |
2177 | LLVMContext &Ctx = MF->getFunction()->getContext(); | |
2178 | SmallVirtRegSet FixedRegisters; | |
2179 | unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); | |
2180 | if (Reg == ~0U && (CutOffInfo != CO_None)) { | |
2181 | uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); | |
2182 | if (CutOffEncountered == CO_Depth) | |
2183 | Ctx.emitError("register allocation failed: maximum depth for recoloring " | |
2184 | "reached. Use -fexhaustive-register-search to skip " | |
2185 | "cutoffs"); | |
2186 | else if (CutOffEncountered == CO_Interf) | |
2187 | Ctx.emitError("register allocation failed: maximum interference for " | |
2188 | "recoloring reached. Use -fexhaustive-register-search " | |
2189 | "to skip cutoffs"); | |
2190 | else if (CutOffEncountered == (CO_Depth | CO_Interf)) | |
2191 | Ctx.emitError("register allocation failed: maximum interference and " | |
2192 | "depth for recoloring reached. Use " | |
2193 | "-fexhaustive-register-search to skip cutoffs"); | |
2194 | } | |
2195 | return Reg; | |
2196 | } | |
2197 | ||
2198 | /// Using a CSR for the first time has a cost because it causes push|pop | |
2199 | /// to be added to prologue|epilogue. Splitting a cold section of the live | |
2200 | /// range can have lower cost than using the CSR for the first time; | |
2201 | /// Spilling a live range in the cold path can have lower cost than using | |
2202 | /// the CSR for the first time. Returns the physical register if we decide | |
2203 | /// to use the CSR; otherwise return 0. | |
2204 | unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, | |
2205 | AllocationOrder &Order, | |
2206 | unsigned PhysReg, | |
2207 | unsigned &CostPerUseLimit, | |
2208 | SmallVectorImpl<unsigned> &NewVRegs) { | |
2209 | if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { | |
2210 | // We choose spill over using the CSR for the first time if the spill cost | |
2211 | // is lower than CSRCost. | |
2212 | SA->analyze(&VirtReg); | |
2213 | if (calcSpillCost() >= CSRCost) | |
2214 | return PhysReg; | |
2215 | ||
2216 | // We are going to spill, set CostPerUseLimit to 1 to make sure that | |
2217 | // we will not use a callee-saved register in tryEvict. | |
2218 | CostPerUseLimit = 1; | |
2219 | return 0; | |
2220 | } | |
2221 | if (getStage(VirtReg) < RS_Split) { | |
2222 | // We choose pre-splitting over using the CSR for the first time if | |
2223 | // the cost of splitting is lower than CSRCost. | |
2224 | SA->analyze(&VirtReg); | |
2225 | unsigned NumCands = 0; | |
2226 | BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. | |
2227 | unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, | |
2228 | NumCands, true /*IgnoreCSR*/); | |
2229 | if (BestCand == NoCand) | |
2230 | // Use the CSR if we can't find a region split below CSRCost. | |
2231 | return PhysReg; | |
2232 | ||
2233 | // Perform the actual pre-splitting. | |
2234 | doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); | |
2235 | return 0; | |
2236 | } | |
2237 | return PhysReg; | |
2238 | } | |
2239 | ||
85aaf69f SL |
2240 | void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { |
2241 | // Do not keep invalid information around. | |
2242 | SetOfBrokenHints.remove(&LI); | |
2243 | } | |
2244 | ||
1a4d82fc JJ |
2245 | void RAGreedy::initializeCSRCost() { |
2246 | // We use the larger one out of the command-line option and the value report | |
2247 | // by TRI. | |
2248 | CSRCost = BlockFrequency( | |
2249 | std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); | |
2250 | if (!CSRCost.getFrequency()) | |
2251 | return; | |
2252 | ||
2253 | // Raw cost is relative to Entry == 2^14; scale it appropriately. | |
2254 | uint64_t ActualEntry = MBFI->getEntryFreq(); | |
2255 | if (!ActualEntry) { | |
2256 | CSRCost = 0; | |
2257 | return; | |
2258 | } | |
2259 | uint64_t FixedEntry = 1 << 14; | |
2260 | if (ActualEntry < FixedEntry) | |
2261 | CSRCost *= BranchProbability(ActualEntry, FixedEntry); | |
2262 | else if (ActualEntry <= UINT32_MAX) | |
2263 | // Invert the fraction and divide. | |
2264 | CSRCost /= BranchProbability(FixedEntry, ActualEntry); | |
2265 | else | |
2266 | // Can't use BranchProbability in general, since it takes 32-bit numbers. | |
2267 | CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); | |
2268 | } | |
2269 | ||
85aaf69f SL |
2270 | /// \brief Collect the hint info for \p Reg. |
2271 | /// The results are stored into \p Out. | |
2272 | /// \p Out is not cleared before being populated. | |
2273 | void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { | |
2274 | for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { | |
2275 | if (!Instr.isFullCopy()) | |
2276 | continue; | |
2277 | // Look for the other end of the copy. | |
2278 | unsigned OtherReg = Instr.getOperand(0).getReg(); | |
2279 | if (OtherReg == Reg) { | |
2280 | OtherReg = Instr.getOperand(1).getReg(); | |
2281 | if (OtherReg == Reg) | |
2282 | continue; | |
2283 | } | |
2284 | // Get the current assignment. | |
2285 | unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) | |
2286 | ? OtherReg | |
2287 | : VRM->getPhys(OtherReg); | |
2288 | // Push the collected information. | |
2289 | Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, | |
2290 | OtherPhysReg)); | |
2291 | } | |
2292 | } | |
2293 | ||
2294 | /// \brief Using the given \p List, compute the cost of the broken hints if | |
2295 | /// \p PhysReg was used. | |
2296 | /// \return The cost of \p List for \p PhysReg. | |
2297 | BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, | |
2298 | unsigned PhysReg) { | |
2299 | BlockFrequency Cost = 0; | |
2300 | for (const HintInfo &Info : List) { | |
2301 | if (Info.PhysReg != PhysReg) | |
2302 | Cost += Info.Freq; | |
2303 | } | |
2304 | return Cost; | |
2305 | } | |
2306 | ||
2307 | /// \brief Using the register assigned to \p VirtReg, try to recolor | |
2308 | /// all the live ranges that are copy-related with \p VirtReg. | |
2309 | /// The recoloring is then propagated to all the live-ranges that have | |
2310 | /// been recolored and so on, until no more copies can be coalesced or | |
2311 | /// it is not profitable. | |
2312 | /// For a given live range, profitability is determined by the sum of the | |
2313 | /// frequencies of the non-identity copies it would introduce with the old | |
2314 | /// and new register. | |
2315 | void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { | |
2316 | // We have a broken hint, check if it is possible to fix it by | |
2317 | // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted | |
2318 | // some register and PhysReg may be available for the other live-ranges. | |
2319 | SmallSet<unsigned, 4> Visited; | |
2320 | SmallVector<unsigned, 2> RecoloringCandidates; | |
2321 | HintsInfo Info; | |
2322 | unsigned Reg = VirtReg.reg; | |
2323 | unsigned PhysReg = VRM->getPhys(Reg); | |
2324 | // Start the recoloring algorithm from the input live-interval, then | |
2325 | // it will propagate to the ones that are copy-related with it. | |
2326 | Visited.insert(Reg); | |
2327 | RecoloringCandidates.push_back(Reg); | |
2328 | ||
2329 | DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' | |
2330 | << PrintReg(PhysReg, TRI) << ")\n"); | |
2331 | ||
2332 | do { | |
2333 | Reg = RecoloringCandidates.pop_back_val(); | |
2334 | ||
2335 | // We cannot recolor physcal register. | |
2336 | if (TargetRegisterInfo::isPhysicalRegister(Reg)) | |
2337 | continue; | |
2338 | ||
2339 | assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); | |
2340 | ||
2341 | // Get the live interval mapped with this virtual register to be able | |
2342 | // to check for the interference with the new color. | |
2343 | LiveInterval &LI = LIS->getInterval(Reg); | |
2344 | unsigned CurrPhys = VRM->getPhys(Reg); | |
2345 | // Check that the new color matches the register class constraints and | |
2346 | // that it is free for this live range. | |
2347 | if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || | |
2348 | Matrix->checkInterference(LI, PhysReg))) | |
2349 | continue; | |
2350 | ||
2351 | DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) | |
2352 | << ") is recolorable.\n"); | |
2353 | ||
2354 | // Gather the hint info. | |
2355 | Info.clear(); | |
2356 | collectHintInfo(Reg, Info); | |
2357 | // Check if recoloring the live-range will increase the cost of the | |
2358 | // non-identity copies. | |
2359 | if (CurrPhys != PhysReg) { | |
2360 | DEBUG(dbgs() << "Checking profitability:\n"); | |
2361 | BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); | |
2362 | BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); | |
2363 | DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() | |
2364 | << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); | |
2365 | if (OldCopiesCost < NewCopiesCost) { | |
2366 | DEBUG(dbgs() << "=> Not profitable.\n"); | |
2367 | continue; | |
2368 | } | |
2369 | // At this point, the cost is either cheaper or equal. If it is | |
2370 | // equal, we consider this is profitable because it may expose | |
2371 | // more recoloring opportunities. | |
2372 | DEBUG(dbgs() << "=> Profitable.\n"); | |
2373 | // Recolor the live-range. | |
2374 | Matrix->unassign(LI); | |
2375 | Matrix->assign(LI, PhysReg); | |
2376 | } | |
2377 | // Push all copy-related live-ranges to keep reconciling the broken | |
2378 | // hints. | |
2379 | for (const HintInfo &HI : Info) { | |
2380 | if (Visited.insert(HI.Reg).second) | |
2381 | RecoloringCandidates.push_back(HI.Reg); | |
2382 | } | |
2383 | } while (!RecoloringCandidates.empty()); | |
2384 | } | |
2385 | ||
2386 | /// \brief Try to recolor broken hints. | |
2387 | /// Broken hints may be repaired by recoloring when an evicted variable | |
2388 | /// freed up a register for a larger live-range. | |
2389 | /// Consider the following example: | |
2390 | /// BB1: | |
2391 | /// a = | |
2392 | /// b = | |
2393 | /// BB2: | |
2394 | /// ... | |
2395 | /// = b | |
2396 | /// = a | |
2397 | /// Let us assume b gets split: | |
2398 | /// BB1: | |
2399 | /// a = | |
2400 | /// b = | |
2401 | /// BB2: | |
2402 | /// c = b | |
2403 | /// ... | |
2404 | /// d = c | |
2405 | /// = d | |
2406 | /// = a | |
2407 | /// Because of how the allocation work, b, c, and d may be assigned different | |
2408 | /// colors. Now, if a gets evicted later: | |
2409 | /// BB1: | |
2410 | /// a = | |
2411 | /// st a, SpillSlot | |
2412 | /// b = | |
2413 | /// BB2: | |
2414 | /// c = b | |
2415 | /// ... | |
2416 | /// d = c | |
2417 | /// = d | |
2418 | /// e = ld SpillSlot | |
2419 | /// = e | |
2420 | /// This is likely that we can assign the same register for b, c, and d, | |
2421 | /// getting rid of 2 copies. | |
2422 | void RAGreedy::tryHintsRecoloring() { | |
2423 | for (LiveInterval *LI : SetOfBrokenHints) { | |
2424 | assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && | |
2425 | "Recoloring is possible only for virtual registers"); | |
2426 | // Some dead defs may be around (e.g., because of debug uses). | |
2427 | // Ignore those. | |
2428 | if (!VRM->hasPhys(LI->reg)) | |
2429 | continue; | |
2430 | tryHintRecoloring(*LI); | |
2431 | } | |
2432 | } | |
2433 | ||
1a4d82fc JJ |
2434 | unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, |
2435 | SmallVectorImpl<unsigned> &NewVRegs, | |
2436 | SmallVirtRegSet &FixedRegisters, | |
2437 | unsigned Depth) { | |
2438 | unsigned CostPerUseLimit = ~0u; | |
223e47cc LB |
2439 | // First try assigning a free register. |
2440 | AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); | |
1a4d82fc JJ |
2441 | if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { |
2442 | // We check other options if we are using a CSR for the first time. | |
2443 | bool CSRFirstUse = false; | |
2444 | if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) | |
2445 | if (!MRI->isPhysRegUsed(CSR)) | |
2446 | CSRFirstUse = true; | |
2447 | ||
2448 | // When NewVRegs is not empty, we may have made decisions such as evicting | |
2449 | // a virtual register, go with the earlier decisions and use the physical | |
2450 | // register. | |
2451 | if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) { | |
2452 | unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, | |
2453 | CostPerUseLimit, NewVRegs); | |
2454 | if (CSRReg || !NewVRegs.empty()) | |
2455 | // Return now if we decide to use a CSR or create new vregs due to | |
2456 | // pre-splitting. | |
2457 | return CSRReg; | |
2458 | } else | |
2459 | return PhysReg; | |
2460 | } | |
223e47cc LB |
2461 | |
2462 | LiveRangeStage Stage = getStage(VirtReg); | |
2463 | DEBUG(dbgs() << StageName[Stage] | |
2464 | << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); | |
2465 | ||
2466 | // Try to evict a less worthy live range, but only for ranges from the primary | |
2467 | // queue. The RS_Split ranges already failed to do this, and they should not | |
2468 | // get a second chance until they have been split. | |
2469 | if (Stage != RS_Split) | |
85aaf69f SL |
2470 | if (unsigned PhysReg = |
2471 | tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { | |
2472 | unsigned Hint = MRI->getSimpleHint(VirtReg.reg); | |
2473 | // If VirtReg has a hint and that hint is broken record this | |
2474 | // virtual register as a recoloring candidate for broken hint. | |
2475 | // Indeed, since we evicted a variable in its neighborhood it is | |
2476 | // likely we can at least partially recolor some of the | |
2477 | // copy-related live-ranges. | |
2478 | if (Hint && Hint != PhysReg) | |
2479 | SetOfBrokenHints.insert(&VirtReg); | |
223e47cc | 2480 | return PhysReg; |
85aaf69f | 2481 | } |
223e47cc LB |
2482 | |
2483 | assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); | |
2484 | ||
2485 | // The first time we see a live range, don't try to split or spill. | |
2486 | // Wait until the second time, when all smaller ranges have been allocated. | |
2487 | // This gives a better picture of the interference to split around. | |
2488 | if (Stage < RS_Split) { | |
2489 | setStage(VirtReg, RS_Split); | |
2490 | DEBUG(dbgs() << "wait for second round\n"); | |
1a4d82fc | 2491 | NewVRegs.push_back(VirtReg.reg); |
223e47cc LB |
2492 | return 0; |
2493 | } | |
2494 | ||
2495 | // If we couldn't allocate a register from spilling, there is probably some | |
2496 | // invalid inline assembly. The base class wil report it. | |
2497 | if (Stage >= RS_Done || !VirtReg.isSpillable()) | |
1a4d82fc JJ |
2498 | return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, |
2499 | Depth); | |
223e47cc LB |
2500 | |
2501 | // Try splitting VirtReg or interferences. | |
2502 | unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); | |
2503 | if (PhysReg || !NewVRegs.empty()) | |
2504 | return PhysReg; | |
2505 | ||
2506 | // Finally spill VirtReg itself. | |
2507 | NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); | |
2508 | LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); | |
2509 | spiller().spill(LRE); | |
2510 | setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); | |
2511 | ||
2512 | if (VerifyEnabled) | |
2513 | MF->verify(this, "After spilling"); | |
2514 | ||
2515 | // The live virtual register requesting allocation was spilled, so tell | |
2516 | // the caller not to allocate anything during this round. | |
2517 | return 0; | |
2518 | } | |
2519 | ||
2520 | bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { | |
2521 | DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" | |
2522 | << "********** Function: " << mf.getName() << '\n'); | |
2523 | ||
2524 | MF = &mf; | |
85aaf69f SL |
2525 | TRI = MF->getSubtarget().getRegisterInfo(); |
2526 | TII = MF->getSubtarget().getInstrInfo(); | |
1a4d82fc JJ |
2527 | RCI.runOnMachineFunction(mf); |
2528 | ||
2529 | EnableLocalReassign = EnableLocalReassignment || | |
85aaf69f SL |
2530 | MF->getSubtarget().enableRALocalReassignment( |
2531 | MF->getTarget().getOptLevel()); | |
1a4d82fc | 2532 | |
223e47cc LB |
2533 | if (VerifyEnabled) |
2534 | MF->verify(this, "Before greedy register allocator"); | |
2535 | ||
2536 | RegAllocBase::init(getAnalysis<VirtRegMap>(), | |
2537 | getAnalysis<LiveIntervals>(), | |
2538 | getAnalysis<LiveRegMatrix>()); | |
2539 | Indexes = &getAnalysis<SlotIndexes>(); | |
1a4d82fc | 2540 | MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); |
223e47cc LB |
2541 | DomTree = &getAnalysis<MachineDominatorTree>(); |
2542 | SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); | |
2543 | Loops = &getAnalysis<MachineLoopInfo>(); | |
2544 | Bundles = &getAnalysis<EdgeBundles>(); | |
2545 | SpillPlacer = &getAnalysis<SpillPlacement>(); | |
2546 | DebugVars = &getAnalysis<LiveDebugVariables>(); | |
2547 | ||
1a4d82fc JJ |
2548 | initializeCSRCost(); |
2549 | ||
2550 | calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); | |
2551 | ||
2552 | DEBUG(LIS->dump()); | |
2553 | ||
223e47cc | 2554 | SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); |
1a4d82fc | 2555 | SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI)); |
223e47cc LB |
2556 | ExtraRegInfo.clear(); |
2557 | ExtraRegInfo.resize(MRI->getNumVirtRegs()); | |
2558 | NextCascade = 1; | |
2559 | IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); | |
2560 | GlobalCand.resize(32); // This will grow as needed. | |
85aaf69f | 2561 | SetOfBrokenHints.clear(); |
223e47cc LB |
2562 | |
2563 | allocatePhysRegs(); | |
85aaf69f | 2564 | tryHintsRecoloring(); |
223e47cc LB |
2565 | releaseMemory(); |
2566 | return true; | |
2567 | } |