]>
git.proxmox.com Git - rustc.git/blob - src/llvm/lib/Target/R600/SIInsertWaits.cpp
1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Insert wait instructions for memory reads and writes.
13 /// Memory reads and writes are issued asynchronously, so we need to insert
14 /// S_WAITCNT instructions when we want to access any of their results or
15 /// overwrite any register that's used asynchronously.
17 //===----------------------------------------------------------------------===//
20 #include "AMDGPUSubtarget.h"
21 #include "SIDefines.h"
22 #include "SIInstrInfo.h"
23 #include "SIMachineFunctionInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 /// \brief One variable for each of the hardware counters
50 typedef Counters RegCounters
[512];
51 typedef std::pair
<unsigned, unsigned> RegInterval
;
53 class SIInsertWaits
: public MachineFunctionPass
{
57 const SIInstrInfo
*TII
;
58 const SIRegisterInfo
*TRI
;
59 const MachineRegisterInfo
*MRI
;
61 /// \brief Constant hardware limits
62 static const Counters WaitCounts
;
64 /// \brief Constant zero value
65 static const Counters ZeroCounts
;
67 /// \brief Counter values we have already waited on.
70 /// \brief Counter values for last instruction issued.
73 /// \brief Registers used by async instructions.
76 /// \brief Registers defined by async instructions.
77 RegCounters DefinedRegs
;
79 /// \brief Different export instruction types seen since last wait.
80 unsigned ExpInstrTypesSeen
;
82 /// \brief Type of the last opcode.
83 InstType LastOpcodeType
;
85 /// \brief Get increment/decrement amount for this instruction.
86 Counters
getHwCounts(MachineInstr
&MI
);
88 /// \brief Is operand relevant for async execution?
89 bool isOpRelevant(MachineOperand
&Op
);
91 /// \brief Get register interval an operand affects.
92 RegInterval
getRegInterval(MachineOperand
&Op
);
94 /// \brief Handle instructions async components
95 void pushInstruction(MachineBasicBlock
&MBB
,
96 MachineBasicBlock::iterator I
);
98 /// \brief Insert the actual wait instruction
99 bool insertWait(MachineBasicBlock
&MBB
,
100 MachineBasicBlock::iterator I
,
101 const Counters
&Counts
);
103 /// \brief Do we need def2def checks?
104 bool unorderedDefines(MachineInstr
&MI
);
106 /// \brief Resolve all operand dependencies to counter requirements
107 Counters
handleOperands(MachineInstr
&MI
);
110 SIInsertWaits(TargetMachine
&tm
) :
111 MachineFunctionPass(ID
),
114 ExpInstrTypesSeen(0) { }
116 bool runOnMachineFunction(MachineFunction
&MF
) override
;
118 const char *getPassName() const override
{
119 return "SI insert wait instructions";
124 } // End anonymous namespace
126 char SIInsertWaits::ID
= 0;
128 const Counters
SIInsertWaits::WaitCounts
= { { 15, 7, 7 } };
129 const Counters
SIInsertWaits::ZeroCounts
= { { 0, 0, 0 } };
131 FunctionPass
*llvm::createSIInsertWaits(TargetMachine
&tm
) {
132 return new SIInsertWaits(tm
);
135 Counters
SIInsertWaits::getHwCounts(MachineInstr
&MI
) {
137 uint64_t TSFlags
= TII
->get(MI
.getOpcode()).TSFlags
;
140 Result
.Named
.VM
= !!(TSFlags
& SIInstrFlags::VM_CNT
);
142 // Only consider stores or EXP for EXP_CNT
143 Result
.Named
.EXP
= !!(TSFlags
& SIInstrFlags::EXP_CNT
&&
144 (MI
.getOpcode() == AMDGPU::EXP
|| MI
.getDesc().mayStore()));
146 // LGKM may uses larger values
147 if (TSFlags
& SIInstrFlags::LGKM_CNT
) {
149 if (TII
->isSMRD(MI
.getOpcode())) {
151 MachineOperand
&Op
= MI
.getOperand(0);
152 assert(Op
.isReg() && "First LGKM operand must be a register!");
154 unsigned Reg
= Op
.getReg();
155 unsigned Size
= TRI
->getMinimalPhysRegClass(Reg
)->getSize();
156 Result
.Named
.LGKM
= Size
> 4 ? 2 : 1;
160 Result
.Named
.LGKM
= 1;
164 Result
.Named
.LGKM
= 0;
170 bool SIInsertWaits::isOpRelevant(MachineOperand
&Op
) {
172 // Constants are always irrelevant
176 // Defines are always relevant
180 // For exports all registers are relevant
181 MachineInstr
&MI
= *Op
.getParent();
182 if (MI
.getOpcode() == AMDGPU::EXP
)
185 // For stores the stored value is also relevant
186 if (!MI
.getDesc().mayStore())
189 // Check if this operand is the value being stored.
190 // Special case for DS instructions, since the address
191 // operand comes before the value operand and it may have
192 // multiple data operands.
194 if (TII
->isDS(MI
.getOpcode())) {
195 MachineOperand
*Data
= TII
->getNamedOperand(MI
, AMDGPU::OpName::data
);
196 if (Data
&& Op
.isIdenticalTo(*Data
))
199 MachineOperand
*Data0
= TII
->getNamedOperand(MI
, AMDGPU::OpName::data0
);
200 if (Data0
&& Op
.isIdenticalTo(*Data0
))
203 MachineOperand
*Data1
= TII
->getNamedOperand(MI
, AMDGPU::OpName::data1
);
204 if (Data1
&& Op
.isIdenticalTo(*Data1
))
210 // NOTE: This assumes that the value operand is before the
211 // address operand, and that there is only one value operand.
212 for (MachineInstr::mop_iterator I
= MI
.operands_begin(),
213 E
= MI
.operands_end(); I
!= E
; ++I
) {
215 if (I
->isReg() && I
->isUse())
216 return Op
.isIdenticalTo(*I
);
222 RegInterval
SIInsertWaits::getRegInterval(MachineOperand
&Op
) {
224 if (!Op
.isReg() || !TRI
->isInAllocatableClass(Op
.getReg()))
225 return std::make_pair(0, 0);
227 unsigned Reg
= Op
.getReg();
228 unsigned Size
= TRI
->getMinimalPhysRegClass(Reg
)->getSize();
233 Result
.first
= TRI
->getEncodingValue(Reg
);
234 Result
.second
= Result
.first
+ Size
/ 4;
239 void SIInsertWaits::pushInstruction(MachineBasicBlock
&MBB
,
240 MachineBasicBlock::iterator I
) {
242 // Get the hardware counter increments and sum them up
243 Counters Increment
= getHwCounts(*I
);
246 for (unsigned i
= 0; i
< 3; ++i
) {
247 LastIssued
.Array
[i
] += Increment
.Array
[i
];
248 Sum
+= Increment
.Array
[i
];
251 // If we don't increase anything then that's it
253 LastOpcodeType
= OTHER
;
257 if (TRI
->ST
.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS
) {
258 // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
259 // or SMEM clause, respectively.
261 // The temporary workaround is to break the clauses with S_NOP.
263 // The proper solution would be to allocate registers such that all source
264 // and destination registers don't overlap, e.g. this is illegal:
267 if ((LastOpcodeType
== SMEM
&& TII
->isSMRD(I
->getOpcode())) ||
268 (LastOpcodeType
== VMEM
&& Increment
.Named
.VM
)) {
269 // Insert a NOP to break the clause.
270 BuildMI(MBB
, I
, DebugLoc(), TII
->get(AMDGPU::S_NOP
))
274 if (TII
->isSMRD(I
->getOpcode()))
275 LastOpcodeType
= SMEM
;
276 else if (Increment
.Named
.VM
)
277 LastOpcodeType
= VMEM
;
280 // Remember which export instructions we have seen
281 if (Increment
.Named
.EXP
) {
282 ExpInstrTypesSeen
|= I
->getOpcode() == AMDGPU::EXP
? 1 : 2;
285 for (unsigned i
= 0, e
= I
->getNumOperands(); i
!= e
; ++i
) {
287 MachineOperand
&Op
= I
->getOperand(i
);
288 if (!isOpRelevant(Op
))
291 RegInterval Interval
= getRegInterval(Op
);
292 for (unsigned j
= Interval
.first
; j
< Interval
.second
; ++j
) {
294 // Remember which registers we define
296 DefinedRegs
[j
] = LastIssued
;
298 // and which one we are using
300 UsedRegs
[j
] = LastIssued
;
305 bool SIInsertWaits::insertWait(MachineBasicBlock
&MBB
,
306 MachineBasicBlock::iterator I
,
307 const Counters
&Required
) {
309 // End of program? No need to wait on anything
310 if (I
!= MBB
.end() && I
->getOpcode() == AMDGPU::S_ENDPGM
)
313 // Figure out if the async instructions execute in order
316 // VM_CNT is always ordered
319 // EXP_CNT is unordered if we have both EXP & VM-writes
320 Ordered
[1] = ExpInstrTypesSeen
== 3;
322 // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
325 // The values we are going to put into the S_WAITCNT instruction
326 Counters Counts
= WaitCounts
;
328 // Do we really need to wait?
329 bool NeedWait
= false;
331 for (unsigned i
= 0; i
< 3; ++i
) {
333 if (Required
.Array
[i
] <= WaitedOn
.Array
[i
])
339 unsigned Value
= LastIssued
.Array
[i
] - Required
.Array
[i
];
341 // Adjust the value to the real hardware possibilities.
342 Counts
.Array
[i
] = std::min(Value
, WaitCounts
.Array
[i
]);
347 // Remember on what we have waited on.
348 WaitedOn
.Array
[i
] = LastIssued
.Array
[i
] - Counts
.Array
[i
];
354 // Reset EXP_CNT instruction types
355 if (Counts
.Named
.EXP
== 0)
356 ExpInstrTypesSeen
= 0;
358 // Build the wait instruction
359 BuildMI(MBB
, I
, DebugLoc(), TII
->get(AMDGPU::S_WAITCNT
))
360 .addImm((Counts
.Named
.VM
& 0xF) |
361 ((Counts
.Named
.EXP
& 0x7) << 4) |
362 ((Counts
.Named
.LGKM
& 0x7) << 8));
364 LastOpcodeType
= OTHER
;
368 /// \brief helper function for handleOperands
369 static void increaseCounters(Counters
&Dst
, const Counters
&Src
) {
371 for (unsigned i
= 0; i
< 3; ++i
)
372 Dst
.Array
[i
] = std::max(Dst
.Array
[i
], Src
.Array
[i
]);
375 Counters
SIInsertWaits::handleOperands(MachineInstr
&MI
) {
377 Counters Result
= ZeroCounts
;
379 // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
380 // but we also want to wait for any other outstanding transfers before
381 // signalling other hardware blocks
382 if (MI
.getOpcode() == AMDGPU::S_SENDMSG
)
385 // For each register affected by this
386 // instruction increase the result sequence
387 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
389 MachineOperand
&Op
= MI
.getOperand(i
);
390 RegInterval Interval
= getRegInterval(Op
);
391 for (unsigned j
= Interval
.first
; j
< Interval
.second
; ++j
) {
394 increaseCounters(Result
, UsedRegs
[j
]);
395 increaseCounters(Result
, DefinedRegs
[j
]);
399 increaseCounters(Result
, DefinedRegs
[j
]);
406 // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
407 // around other non-memory instructions.
408 bool SIInsertWaits::runOnMachineFunction(MachineFunction
&MF
) {
409 bool Changes
= false;
411 TII
= static_cast<const SIInstrInfo
*>(MF
.getSubtarget().getInstrInfo());
413 static_cast<const SIRegisterInfo
*>(MF
.getSubtarget().getRegisterInfo());
415 MRI
= &MF
.getRegInfo();
417 WaitedOn
= ZeroCounts
;
418 LastIssued
= ZeroCounts
;
419 LastOpcodeType
= OTHER
;
421 memset(&UsedRegs
, 0, sizeof(UsedRegs
));
422 memset(&DefinedRegs
, 0, sizeof(DefinedRegs
));
424 for (MachineFunction::iterator BI
= MF
.begin(), BE
= MF
.end();
427 MachineBasicBlock
&MBB
= *BI
;
428 for (MachineBasicBlock::iterator I
= MBB
.begin(), E
= MBB
.end();
431 // Wait for everything before a barrier.
432 if (I
->getOpcode() == AMDGPU::S_BARRIER
)
433 Changes
|= insertWait(MBB
, I
, LastIssued
);
435 Changes
|= insertWait(MBB
, I
, handleOperands(*I
));
436 pushInstruction(MBB
, I
);
439 // Wait for everything at the end of the MBB
440 Changes
|= insertWait(MBB
, MBB
.getFirstTerminator(), LastIssued
);