]>
git.proxmox.com Git - rustc.git/blob - src/llvm/lib/Target/ARM/MLxExpansionPass.cpp
1 //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
11 // multiple and add / sub instructions) when special VMLx hazards are detected.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMSubtarget.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstr.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include "llvm/Target/TargetRegisterInfo.h"
30 #define DEBUG_TYPE "mlx-expansion"
33 ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden
);
34 static cl::opt
<unsigned>
35 ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden
);
37 STATISTIC(NumExpand
, "Number of fp MLA / MLS instructions expanded");
40 struct MLxExpansion
: public MachineFunctionPass
{
42 MLxExpansion() : MachineFunctionPass(ID
) {}
44 bool runOnMachineFunction(MachineFunction
&Fn
) override
;
46 const char *getPassName() const override
{
47 return "ARM MLA / MLS expansion pass";
51 const ARMBaseInstrInfo
*TII
;
52 const TargetRegisterInfo
*TRI
;
53 MachineRegisterInfo
*MRI
;
58 MachineInstr
* LastMIs
[4];
59 SmallPtrSet
<MachineInstr
*, 4> IgnoreStall
;
62 void pushStack(MachineInstr
*MI
);
63 MachineInstr
*getAccDefMI(MachineInstr
*MI
) const;
64 unsigned getDefReg(MachineInstr
*MI
) const;
65 bool hasLoopHazard(MachineInstr
*MI
) const;
66 bool hasRAWHazard(unsigned Reg
, MachineInstr
*MI
) const;
67 bool FindMLxHazard(MachineInstr
*MI
);
68 void ExpandFPMLxInstruction(MachineBasicBlock
&MBB
, MachineInstr
*MI
,
69 unsigned MulOpc
, unsigned AddSubOpc
,
70 bool NegAcc
, bool HasLane
);
71 bool ExpandFPMLxInstructions(MachineBasicBlock
&MBB
);
73 char MLxExpansion::ID
= 0;
76 void MLxExpansion::clearStack() {
77 std::fill(LastMIs
, LastMIs
+ 4, nullptr);
81 void MLxExpansion::pushStack(MachineInstr
*MI
) {
87 MachineInstr
*MLxExpansion::getAccDefMI(MachineInstr
*MI
) const {
88 // Look past COPY and INSERT_SUBREG instructions to find the
89 // real definition MI. This is important for _sfp instructions.
90 unsigned Reg
= MI
->getOperand(1).getReg();
91 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
94 MachineBasicBlock
*MBB
= MI
->getParent();
95 MachineInstr
*DefMI
= MRI
->getVRegDef(Reg
);
97 if (DefMI
->getParent() != MBB
)
99 if (DefMI
->isCopyLike()) {
100 Reg
= DefMI
->getOperand(1).getReg();
101 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
102 DefMI
= MRI
->getVRegDef(Reg
);
105 } else if (DefMI
->isInsertSubreg()) {
106 Reg
= DefMI
->getOperand(2).getReg();
107 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
108 DefMI
= MRI
->getVRegDef(Reg
);
117 unsigned MLxExpansion::getDefReg(MachineInstr
*MI
) const {
118 unsigned Reg
= MI
->getOperand(0).getReg();
119 if (TargetRegisterInfo::isPhysicalRegister(Reg
) ||
120 !MRI
->hasOneNonDBGUse(Reg
))
123 MachineBasicBlock
*MBB
= MI
->getParent();
124 MachineInstr
*UseMI
= &*MRI
->use_instr_nodbg_begin(Reg
);
125 if (UseMI
->getParent() != MBB
)
128 while (UseMI
->isCopy() || UseMI
->isInsertSubreg()) {
129 Reg
= UseMI
->getOperand(0).getReg();
130 if (TargetRegisterInfo::isPhysicalRegister(Reg
) ||
131 !MRI
->hasOneNonDBGUse(Reg
))
133 UseMI
= &*MRI
->use_instr_nodbg_begin(Reg
);
134 if (UseMI
->getParent() != MBB
)
141 /// hasLoopHazard - Check whether an MLx instruction is chained to itself across
142 /// a single-MBB loop.
143 bool MLxExpansion::hasLoopHazard(MachineInstr
*MI
) const {
144 unsigned Reg
= MI
->getOperand(1).getReg();
145 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
148 MachineBasicBlock
*MBB
= MI
->getParent();
149 MachineInstr
*DefMI
= MRI
->getVRegDef(Reg
);
152 if (DefMI
->getParent() != MBB
)
155 if (DefMI
->isPHI()) {
156 for (unsigned i
= 1, e
= DefMI
->getNumOperands(); i
< e
; i
+= 2) {
157 if (DefMI
->getOperand(i
+ 1).getMBB() == MBB
) {
158 unsigned SrcReg
= DefMI
->getOperand(i
).getReg();
159 if (TargetRegisterInfo::isVirtualRegister(SrcReg
)) {
160 DefMI
= MRI
->getVRegDef(SrcReg
);
165 } else if (DefMI
->isCopyLike()) {
166 Reg
= DefMI
->getOperand(1).getReg();
167 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
168 DefMI
= MRI
->getVRegDef(Reg
);
171 } else if (DefMI
->isInsertSubreg()) {
172 Reg
= DefMI
->getOperand(2).getReg();
173 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
174 DefMI
= MRI
->getVRegDef(Reg
);
185 bool MLxExpansion::hasRAWHazard(unsigned Reg
, MachineInstr
*MI
) const {
186 // FIXME: Detect integer instructions properly.
187 const MCInstrDesc
&MCID
= MI
->getDesc();
188 unsigned Domain
= MCID
.TSFlags
& ARMII::DomainMask
;
191 unsigned Opcode
= MCID
.getOpcode();
192 if (Opcode
== ARM::VMOVRS
|| Opcode
== ARM::VMOVRRD
)
194 if ((Domain
& ARMII::DomainVFP
) || (Domain
& ARMII::DomainNEON
))
195 return MI
->readsRegister(Reg
, TRI
);
199 static bool isFpMulInstruction(unsigned Opcode
) {
213 bool MLxExpansion::FindMLxHazard(MachineInstr
*MI
) {
214 if (NumExpand
>= ExpandLimit
)
220 MachineInstr
*DefMI
= getAccDefMI(MI
);
221 if (TII
->isFpMLxInstruction(DefMI
->getOpcode())) {
223 // r3 = vmla r0, r1, r2
224 // takes 16 - 17 cycles
229 // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
230 IgnoreStall
.insert(DefMI
);
234 // On Swift, we mostly care about hazards from multiplication instructions
235 // writing the accumulator and the pipelining of loop iterations by out-of-
238 return isFpMulInstruction(DefMI
->getOpcode()) || hasLoopHazard(MI
);
240 if (IgnoreStall
.count(MI
))
243 // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
244 // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
245 // preserves the in-order retirement of the instructions.
246 // Look at the next few instructions, if *most* of them can cause hazards,
247 // then the scheduler can't *fix* this, we'd better break up the VMLA.
248 unsigned Limit1
= isLikeA9
? 1 : 4;
249 unsigned Limit2
= isLikeA9
? 1 : 4;
250 for (unsigned i
= 1; i
<= 4; ++i
) {
251 int Idx
= ((int)MIIdx
- i
+ 4) % 4;
252 MachineInstr
*NextMI
= LastMIs
[Idx
];
256 if (TII
->canCauseFpMLxStall(NextMI
->getOpcode())) {
261 // Look for VMLx RAW hazard.
262 if (i
<= Limit2
&& hasRAWHazard(getDefReg(MI
), NextMI
))
269 /// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
270 /// of MUL + ADD / SUB instructions.
272 MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock
&MBB
, MachineInstr
*MI
,
273 unsigned MulOpc
, unsigned AddSubOpc
,
274 bool NegAcc
, bool HasLane
) {
275 unsigned DstReg
= MI
->getOperand(0).getReg();
276 bool DstDead
= MI
->getOperand(0).isDead();
277 unsigned AccReg
= MI
->getOperand(1).getReg();
278 unsigned Src1Reg
= MI
->getOperand(2).getReg();
279 unsigned Src2Reg
= MI
->getOperand(3).getReg();
280 bool Src1Kill
= MI
->getOperand(2).isKill();
281 bool Src2Kill
= MI
->getOperand(3).isKill();
282 unsigned LaneImm
= HasLane
? MI
->getOperand(4).getImm() : 0;
283 unsigned NextOp
= HasLane
? 5 : 4;
284 ARMCC::CondCodes Pred
= (ARMCC::CondCodes
)MI
->getOperand(NextOp
).getImm();
285 unsigned PredReg
= MI
->getOperand(++NextOp
).getReg();
287 const MCInstrDesc
&MCID1
= TII
->get(MulOpc
);
288 const MCInstrDesc
&MCID2
= TII
->get(AddSubOpc
);
289 const MachineFunction
&MF
= *MI
->getParent()->getParent();
290 unsigned TmpReg
= MRI
->createVirtualRegister(
291 TII
->getRegClass(MCID1
, 0, TRI
, MF
));
293 MachineInstrBuilder MIB
= BuildMI(MBB
, MI
, MI
->getDebugLoc(), MCID1
, TmpReg
)
294 .addReg(Src1Reg
, getKillRegState(Src1Kill
))
295 .addReg(Src2Reg
, getKillRegState(Src2Kill
));
298 MIB
.addImm(Pred
).addReg(PredReg
);
300 MIB
= BuildMI(MBB
, MI
, MI
->getDebugLoc(), MCID2
)
301 .addReg(DstReg
, getDefRegState(true) | getDeadRegState(DstDead
));
304 bool AccKill
= MRI
->hasOneNonDBGUse(AccReg
);
305 MIB
.addReg(TmpReg
, getKillRegState(true))
306 .addReg(AccReg
, getKillRegState(AccKill
));
308 MIB
.addReg(AccReg
).addReg(TmpReg
, getKillRegState(true));
310 MIB
.addImm(Pred
).addReg(PredReg
);
313 dbgs() << "Expanding: " << *MI
;
315 MachineBasicBlock::iterator MII
= MI
;
316 MII
= std::prev(MII
);
317 MachineInstr
&MI2
= *MII
;
318 MII
= std::prev(MII
);
319 MachineInstr
&MI1
= *MII
;
320 dbgs() << " " << MI1
;
321 dbgs() << " " << MI2
;
324 MI
->eraseFromParent();
328 bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock
&MBB
) {
329 bool Changed
= false;
335 MachineBasicBlock::reverse_iterator MII
= MBB
.rbegin(), E
= MBB
.rend();
337 MachineInstr
*MI
= &*MII
;
339 if (MI
->isPosition() || MI
->isImplicitDef() || MI
->isCopy()) {
344 const MCInstrDesc
&MCID
= MI
->getDesc();
345 if (MI
->isBarrier()) {
352 unsigned Domain
= MCID
.TSFlags
& ARMII::DomainMask
;
353 if (Domain
== ARMII::DomainGeneral
) {
355 // Assume dual issues of non-VFP / NEON instructions.
360 unsigned MulOpc
, AddSubOpc
;
361 bool NegAcc
, HasLane
;
362 if (!TII
->isFpMLxInstruction(MCID
.getOpcode(),
363 MulOpc
, AddSubOpc
, NegAcc
, HasLane
) ||
367 ExpandFPMLxInstruction(MBB
, MI
, MulOpc
, AddSubOpc
, NegAcc
, HasLane
);
368 E
= MBB
.rend(); // May have changed if MI was the 1st instruction.
380 bool MLxExpansion::runOnMachineFunction(MachineFunction
&Fn
) {
381 TII
= static_cast<const ARMBaseInstrInfo
*>(Fn
.getSubtarget().getInstrInfo());
382 TRI
= Fn
.getSubtarget().getRegisterInfo();
383 MRI
= &Fn
.getRegInfo();
384 const ARMSubtarget
*STI
= &Fn
.getTarget().getSubtarget
<ARMSubtarget
>();
385 isLikeA9
= STI
->isLikeA9() || STI
->isSwift();
386 isSwift
= STI
->isSwift();
388 bool Modified
= false;
389 for (MachineBasicBlock
&MBB
: Fn
)
390 Modified
|= ExpandFPMLxInstructions(MBB
);
395 FunctionPass
*llvm::createMLxExpansionPass() {
396 return new MLxExpansion();