1 //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // The machine combiner pass uses machine trace metrics to ensure the combined
11 // instructions does not lengthen the critical path or the resource depth.
12 //===----------------------------------------------------------------------===//
13 #define DEBUG_TYPE "machine-combiner"
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/CodeGen/MachineDominators.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/MachineTraceMetrics.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Target/TargetInstrInfo.h"
30 #include "llvm/Target/TargetRegisterInfo.h"
31 #include "llvm/Target/TargetSubtargetInfo.h"
35 STATISTIC(NumInstCombined
, "Number of machineinst combined");
38 class MachineCombiner
: public MachineFunctionPass
{
39 const TargetInstrInfo
*TII
;
40 const TargetRegisterInfo
*TRI
;
41 MCSchedModel SchedModel
;
42 MachineRegisterInfo
*MRI
;
43 MachineTraceMetrics
*Traces
;
44 MachineTraceMetrics::Ensemble
*MinInstr
;
46 TargetSchedModel TSchedModel
;
48 /// OptSize - True if optimizing for code size.
53 MachineCombiner() : MachineFunctionPass(ID
) {
54 initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
56 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
57 bool runOnMachineFunction(MachineFunction
&MF
) override
;
58 const char *getPassName() const override
{ return "Machine InstCombiner"; }
61 bool doSubstitute(unsigned NewSize
, unsigned OldSize
);
62 bool combineInstructions(MachineBasicBlock
*);
63 MachineInstr
*getOperandDef(const MachineOperand
&MO
);
64 unsigned getDepth(SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
65 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
,
66 MachineTraceMetrics::Trace BlockTrace
);
67 unsigned getLatency(MachineInstr
*Root
, MachineInstr
*NewRoot
,
68 MachineTraceMetrics::Trace BlockTrace
);
70 preservesCriticalPathLen(MachineBasicBlock
*MBB
, MachineInstr
*Root
,
71 MachineTraceMetrics::Trace BlockTrace
,
72 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
73 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
);
74 bool preservesResourceLen(MachineBasicBlock
*MBB
,
75 MachineTraceMetrics::Trace BlockTrace
,
76 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
77 SmallVectorImpl
<MachineInstr
*> &DelInstrs
);
78 void instr2instrSC(SmallVectorImpl
<MachineInstr
*> &Instrs
,
79 SmallVectorImpl
<const MCSchedClassDesc
*> &InstrsSC
);
83 char MachineCombiner::ID
= 0;
84 char &llvm::MachineCombinerID
= MachineCombiner::ID
;
86 INITIALIZE_PASS_BEGIN(MachineCombiner
, "machine-combiner",
87 "Machine InstCombiner", false, false)
88 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics
)
89 INITIALIZE_PASS_END(MachineCombiner
, "machine-combiner", "Machine InstCombiner",
92 void MachineCombiner::getAnalysisUsage(AnalysisUsage
&AU
) const {
94 AU
.addPreserved
<MachineDominatorTree
>();
95 AU
.addPreserved
<MachineLoopInfo
>();
96 AU
.addRequired
<MachineTraceMetrics
>();
97 AU
.addPreserved
<MachineTraceMetrics
>();
98 MachineFunctionPass::getAnalysisUsage(AU
);
101 MachineInstr
*MachineCombiner::getOperandDef(const MachineOperand
&MO
) {
102 MachineInstr
*DefInstr
= nullptr;
103 // We need a virtual register definition.
104 if (MO
.isReg() && TargetRegisterInfo::isVirtualRegister(MO
.getReg()))
105 DefInstr
= MRI
->getUniqueVRegDef(MO
.getReg());
106 // PHI's have no depth etc.
107 if (DefInstr
&& DefInstr
->isPHI())
112 /// getDepth - Computes depth of instructions in vector \InsInstr.
114 /// \param InsInstrs is a vector of machine instructions
115 /// \param InstrIdxForVirtReg is a dense map of virtual register to index
116 /// of defining machine instruction in \p InsInstrs
117 /// \param BlockTrace is a trace of machine instructions
119 /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
121 MachineCombiner::getDepth(SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
122 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
,
123 MachineTraceMetrics::Trace BlockTrace
) {
125 SmallVector
<unsigned, 16> InstrDepth
;
126 assert(TSchedModel
.hasInstrSchedModel() && "Missing machine model\n");
128 // Foreach instruction in in the new sequence compute the depth based on the
129 // operands. Use the trace information when possible. For new operands which
130 // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
131 for (auto *InstrPtr
: InsInstrs
) { // for each Use
133 DEBUG(dbgs() << "NEW INSTR "; InstrPtr
->dump(); dbgs() << "\n";);
134 for (unsigned i
= 0, e
= InstrPtr
->getNumOperands(); i
!= e
; ++i
) {
135 const MachineOperand
&MO
= InstrPtr
->getOperand(i
);
136 // Check for virtual register operand.
137 if (!(MO
.isReg() && TargetRegisterInfo::isVirtualRegister(MO
.getReg())))
141 unsigned DepthOp
= 0;
142 unsigned LatencyOp
= 0;
143 DenseMap
<unsigned, unsigned>::iterator II
=
144 InstrIdxForVirtReg
.find(MO
.getReg());
145 if (II
!= InstrIdxForVirtReg
.end()) {
146 // Operand is new virtual register not in trace
147 assert(II
->second
< InstrDepth
.size() && "Bad Index");
148 MachineInstr
*DefInstr
= InsInstrs
[II
->second
];
150 "There must be a definition for a new virtual register");
151 DepthOp
= InstrDepth
[II
->second
];
152 LatencyOp
= TSchedModel
.computeOperandLatency(
153 DefInstr
, DefInstr
->findRegisterDefOperandIdx(MO
.getReg()),
154 InstrPtr
, InstrPtr
->findRegisterUseOperandIdx(MO
.getReg()));
156 MachineInstr
*DefInstr
= getOperandDef(MO
);
158 DepthOp
= BlockTrace
.getInstrCycles(DefInstr
).Depth
;
159 LatencyOp
= TSchedModel
.computeOperandLatency(
160 DefInstr
, DefInstr
->findRegisterDefOperandIdx(MO
.getReg()),
161 InstrPtr
, InstrPtr
->findRegisterUseOperandIdx(MO
.getReg()));
164 IDepth
= std::max(IDepth
, DepthOp
+ LatencyOp
);
166 InstrDepth
.push_back(IDepth
);
168 unsigned NewRootIdx
= InsInstrs
.size() - 1;
169 return InstrDepth
[NewRootIdx
];
172 /// getLatency - Computes instruction latency as max of latency of defined
175 /// \param Root is a machine instruction that could be replaced by NewRoot.
176 /// It is used to compute a more accurate latency information for NewRoot in
177 /// case there is a dependent instruction in the same trace (\p BlockTrace)
178 /// \param NewRoot is the instruction for which the latency is computed
179 /// \param BlockTrace is a trace of machine instructions
181 /// \returns Latency of \p NewRoot
182 unsigned MachineCombiner::getLatency(MachineInstr
*Root
, MachineInstr
*NewRoot
,
183 MachineTraceMetrics::Trace BlockTrace
) {
185 assert(TSchedModel
.hasInstrSchedModel() && "Missing machine model\n");
187 // Check each definition in NewRoot and compute the latency
188 unsigned NewRootLatency
= 0;
190 for (unsigned i
= 0, e
= NewRoot
->getNumOperands(); i
!= e
; ++i
) {
191 const MachineOperand
&MO
= NewRoot
->getOperand(i
);
192 // Check for virtual register operand.
193 if (!(MO
.isReg() && TargetRegisterInfo::isVirtualRegister(MO
.getReg())))
197 // Get the first instruction that uses MO
198 MachineRegisterInfo::reg_iterator RI
= MRI
->reg_begin(MO
.getReg());
200 MachineInstr
*UseMO
= RI
->getParent();
201 unsigned LatencyOp
= 0;
202 if (UseMO
&& BlockTrace
.isDepInTrace(Root
, UseMO
)) {
203 LatencyOp
= TSchedModel
.computeOperandLatency(
204 NewRoot
, NewRoot
->findRegisterDefOperandIdx(MO
.getReg()), UseMO
,
205 UseMO
->findRegisterUseOperandIdx(MO
.getReg()));
207 LatencyOp
= TSchedModel
.computeInstrLatency(NewRoot
->getOpcode());
209 NewRootLatency
= std::max(NewRootLatency
, LatencyOp
);
211 return NewRootLatency
;
214 /// preservesCriticalPathlen - True when the new instruction sequence does not
215 /// lengthen the critical path. The DAGCombine code sequence ends in MI
216 /// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
217 /// necessary condition for the new sequence to replace the old sequence is that
218 /// is cannot lengthen the critical path. This is decided by the formula
219 /// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
220 /// The slack is the number of cycles Root can be delayed before the critical
221 /// patch becomes longer.
222 bool MachineCombiner::preservesCriticalPathLen(
223 MachineBasicBlock
*MBB
, MachineInstr
*Root
,
224 MachineTraceMetrics::Trace BlockTrace
,
225 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
226 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
) {
228 assert(TSchedModel
.hasInstrSchedModel() && "Missing machine model\n");
229 // NewRoot is the last instruction in the \p InsInstrs vector
230 // Get depth and latency of NewRoot
231 unsigned NewRootIdx
= InsInstrs
.size() - 1;
232 MachineInstr
*NewRoot
= InsInstrs
[NewRootIdx
];
233 unsigned NewRootDepth
= getDepth(InsInstrs
, InstrIdxForVirtReg
, BlockTrace
);
234 unsigned NewRootLatency
= getLatency(Root
, NewRoot
, BlockTrace
);
236 // Get depth, latency and slack of Root
237 unsigned RootDepth
= BlockTrace
.getInstrCycles(Root
).Depth
;
238 unsigned RootLatency
= TSchedModel
.computeInstrLatency(Root
);
239 unsigned RootSlack
= BlockTrace
.getInstrSlack(Root
);
241 DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root
<< "\n";
242 dbgs() << " NewRootDepth: " << NewRootDepth
243 << " NewRootLatency: " << NewRootLatency
<< "\n";
244 dbgs() << " RootDepth: " << RootDepth
<< " RootLatency: " << RootLatency
245 << " RootSlack: " << RootSlack
<< "\n";
246 dbgs() << " NewRootDepth + NewRootLatency "
247 << NewRootDepth
+ NewRootLatency
<< "\n";
248 dbgs() << " RootDepth + RootLatency + RootSlack "
249 << RootDepth
+ RootLatency
+ RootSlack
<< "\n";);
251 /// True when the new sequence does not lenghten the critical path.
252 return ((NewRootDepth
+ NewRootLatency
) <=
253 (RootDepth
+ RootLatency
+ RootSlack
));
256 /// helper routine to convert instructions into SC
257 void MachineCombiner::instr2instrSC(
258 SmallVectorImpl
<MachineInstr
*> &Instrs
,
259 SmallVectorImpl
<const MCSchedClassDesc
*> &InstrsSC
) {
260 for (auto *InstrPtr
: Instrs
) {
261 unsigned Opc
= InstrPtr
->getOpcode();
262 unsigned Idx
= TII
->get(Opc
).getSchedClass();
263 const MCSchedClassDesc
*SC
= SchedModel
.getSchedClassDesc(Idx
);
264 InstrsSC
.push_back(SC
);
267 /// preservesResourceLen - True when the new instructions do not increase
269 bool MachineCombiner::preservesResourceLen(
270 MachineBasicBlock
*MBB
, MachineTraceMetrics::Trace BlockTrace
,
271 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
272 SmallVectorImpl
<MachineInstr
*> &DelInstrs
) {
274 // Compute current resource length
276 //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
277 SmallVector
<const MachineBasicBlock
*, 1> MBBarr
;
278 MBBarr
.push_back(MBB
);
279 unsigned ResLenBeforeCombine
= BlockTrace
.getResourceLength(MBBarr
);
281 // Deal with SC rather than Instructions.
282 SmallVector
<const MCSchedClassDesc
*, 16> InsInstrsSC
;
283 SmallVector
<const MCSchedClassDesc
*, 16> DelInstrsSC
;
285 instr2instrSC(InsInstrs
, InsInstrsSC
);
286 instr2instrSC(DelInstrs
, DelInstrsSC
);
288 ArrayRef
<const MCSchedClassDesc
*> MSCInsArr
= makeArrayRef(InsInstrsSC
);
289 ArrayRef
<const MCSchedClassDesc
*> MSCDelArr
= makeArrayRef(DelInstrsSC
);
291 // Compute new resource length
292 unsigned ResLenAfterCombine
=
293 BlockTrace
.getResourceLength(MBBarr
, MSCInsArr
, MSCDelArr
);
295 DEBUG(dbgs() << "RESOURCE DATA: \n";
296 dbgs() << " resource len before: " << ResLenBeforeCombine
297 << " after: " << ResLenAfterCombine
<< "\n";);
299 return ResLenAfterCombine
<= ResLenBeforeCombine
;
302 /// \returns true when new instruction sequence should be generated
303 /// independent if it lenghtens critical path or not
304 bool MachineCombiner::doSubstitute(unsigned NewSize
, unsigned OldSize
) {
305 if (OptSize
&& (NewSize
< OldSize
))
307 if (!TSchedModel
.hasInstrSchedModel())
312 /// combineInstructions - substitute a slow code sequence with a faster one by
313 /// evaluating instruction combining pattern.
314 /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
315 /// combining based on machine trace metrics. Only combine a sequence of
316 /// instructions when this neither lengthens the critical path nor increases
317 /// resource pressure. When optimizing for codesize always combine when the new
318 /// sequence is shorter.
319 bool MachineCombiner::combineInstructions(MachineBasicBlock
*MBB
) {
320 bool Changed
= false;
321 DEBUG(dbgs() << "Combining MBB " << MBB
->getName() << "\n");
323 auto BlockIter
= MBB
->begin();
325 while (BlockIter
!= MBB
->end()) {
326 auto &MI
= *BlockIter
++;
328 DEBUG(dbgs() << "INSTR "; MI
.dump(); dbgs() << "\n";);
329 SmallVector
<MachineCombinerPattern::MC_PATTERN
, 16> Pattern
;
330 // The motivating example is:
332 // MUL Other MUL_op1 MUL_op2 Other
334 // ADD/SUB => MADD/MSUB
335 // (=Root) (=NewRoot)
337 // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
338 // usually beneficial for code size it unfortunately can hurt performance
339 // when the ADD is on the critical path, but the MUL is not. With the
340 // substitution the MUL becomes part of the critical path (in form of the
341 // MADD) and can lengthen it on architectures where the MADD latency is
342 // longer than the ADD latency.
344 // For each instruction we check if it can be the root of a combiner
345 // pattern. Then for each pattern the new code sequence in form of MI is
346 // generated and evaluated. When the efficiency criteria (don't lengthen
347 // critical path, don't use more resources) is met the new sequence gets
348 // hooked up into the basic block before the old sequence is removed.
350 // The algorithm does not try to evaluate all patterns and pick the best.
351 // This is only an artificial restriction though. In practice there is
352 // mostly one pattern and hasPattern() can order patterns based on an
353 // internal cost heuristic.
355 if (TII
->hasPattern(MI
, Pattern
)) {
356 for (auto P
: Pattern
) {
357 SmallVector
<MachineInstr
*, 16> InsInstrs
;
358 SmallVector
<MachineInstr
*, 16> DelInstrs
;
359 DenseMap
<unsigned, unsigned> InstrIdxForVirtReg
;
361 MinInstr
= Traces
->getEnsemble(MachineTraceMetrics::TS_MinInstrCount
);
362 MachineTraceMetrics::Trace BlockTrace
= MinInstr
->getTrace(MBB
);
363 Traces
->verifyAnalysis();
364 TII
->genAlternativeCodeSequence(MI
, P
, InsInstrs
, DelInstrs
,
366 // Found pattern, but did not generate alternative sequence.
367 // This can happen e.g. when an immediate could not be materialized
368 // in a single instruction.
369 if (!InsInstrs
.size())
371 // Substitute when we optimize for codesize and the new sequence has
372 // fewer instructions OR
373 // the new sequence neither lenghten the critical path nor increases
374 // resource pressure.
375 if (doSubstitute(InsInstrs
.size(), DelInstrs
.size()) ||
376 (preservesCriticalPathLen(MBB
, &MI
, BlockTrace
, InsInstrs
,
377 InstrIdxForVirtReg
) &&
378 preservesResourceLen(MBB
, BlockTrace
, InsInstrs
, DelInstrs
))) {
379 for (auto *InstrPtr
: InsInstrs
)
380 MBB
->insert((MachineBasicBlock::iterator
) & MI
,
381 (MachineInstr
*)InstrPtr
);
382 for (auto *InstrPtr
: DelInstrs
)
383 InstrPtr
->eraseFromParentAndMarkDBGValuesForRemoval();
388 Traces
->invalidate(MBB
);
389 Traces
->verifyAnalysis();
390 // Eagerly stop after the first pattern fired
393 // Cleanup instructions of the alternative code sequence. There is no
395 for (auto *InstrPtr
: InsInstrs
) {
396 MachineFunction
*MF
= MBB
->getParent();
397 MF
->DeleteMachineInstr((MachineInstr
*)InstrPtr
);
400 InstrIdxForVirtReg
.clear();
408 bool MachineCombiner::runOnMachineFunction(MachineFunction
&MF
) {
409 const TargetSubtargetInfo
&STI
=
410 MF
.getTarget().getSubtarget
<TargetSubtargetInfo
>();
411 TII
= STI
.getInstrInfo();
412 TRI
= STI
.getRegisterInfo();
413 SchedModel
= STI
.getSchedModel();
414 TSchedModel
.init(SchedModel
, &STI
, TII
);
415 MRI
= &MF
.getRegInfo();
416 Traces
= &getAnalysis
<MachineTraceMetrics
>();
419 OptSize
= MF
.getFunction()->getAttributes().hasAttribute(
420 AttributeSet::FunctionIndex
, Attribute::OptimizeForSize
);
422 DEBUG(dbgs() << getPassName() << ": " << MF
.getName() << '\n');
423 if (!TII
->useMachineCombiner()) {
424 DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n");
428 bool Changed
= false;
430 // Try to combine instructions.
432 Changed
|= combineInstructions(&MBB
);