1 //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief SI Implementation of TargetInstrInfo.
13 //===----------------------------------------------------------------------===//
16 #include "SIInstrInfo.h"
17 #include "AMDGPUTargetMachine.h"
18 #include "SIDefines.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/CodeGen/RegisterScavenging.h"
25 #include "llvm/MC/MCInstrDesc.h"
26 #include "llvm/Support/Debug.h"
30 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget
&st
)
31 : AMDGPUInstrInfo(st
),
34 //===----------------------------------------------------------------------===//
35 // TargetInstrInfo callbacks
36 //===----------------------------------------------------------------------===//
38 static unsigned getNumOperandsNoGlue(SDNode
*Node
) {
39 unsigned N
= Node
->getNumOperands();
40 while (N
&& Node
->getOperand(N
- 1).getValueType() == MVT::Glue
)
45 static SDValue
findChainOperand(SDNode
*Load
) {
46 SDValue LastOp
= Load
->getOperand(getNumOperandsNoGlue(Load
) - 1);
47 assert(LastOp
.getValueType() == MVT::Other
&& "Chain missing from load node");
51 /// \brief Returns true if both nodes have the same value for the given
52 /// operand \p Op, or if both nodes do not have this operand.
53 static bool nodesHaveSameOperandValue(SDNode
*N0
, SDNode
* N1
, unsigned OpName
) {
54 unsigned Opc0
= N0
->getMachineOpcode();
55 unsigned Opc1
= N1
->getMachineOpcode();
57 int Op0Idx
= AMDGPU::getNamedOperandIdx(Opc0
, OpName
);
58 int Op1Idx
= AMDGPU::getNamedOperandIdx(Opc1
, OpName
);
60 if (Op0Idx
== -1 && Op1Idx
== -1)
64 if ((Op0Idx
== -1 && Op1Idx
!= -1) ||
65 (Op1Idx
== -1 && Op0Idx
!= -1))
68 // getNamedOperandIdx returns the index for the MachineInstr's operands,
69 // which includes the result as the first operand. We are indexing into the
70 // MachineSDNode's operands, so we need to skip the result operand to get
75 return N0
->getOperand(Op0Idx
) == N1
->getOperand(Op1Idx
);
78 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode
*Load0
, SDNode
*Load1
,
80 int64_t &Offset1
) const {
81 if (!Load0
->isMachineOpcode() || !Load1
->isMachineOpcode())
84 unsigned Opc0
= Load0
->getMachineOpcode();
85 unsigned Opc1
= Load1
->getMachineOpcode();
87 // Make sure both are actually loads.
88 if (!get(Opc0
).mayLoad() || !get(Opc1
).mayLoad())
91 if (isDS(Opc0
) && isDS(Opc1
)) {
93 // FIXME: Handle this case:
94 if (getNumOperandsNoGlue(Load0
) != getNumOperandsNoGlue(Load1
))
98 if (Load0
->getOperand(1) != Load1
->getOperand(1))
102 if (findChainOperand(Load0
) != findChainOperand(Load1
))
105 // Skip read2 / write2 variants for simplicity.
106 // TODO: We should report true if the used offsets are adjacent (excluded
108 if (AMDGPU::getNamedOperandIdx(Opc0
, AMDGPU::OpName::data1
) != -1 ||
109 AMDGPU::getNamedOperandIdx(Opc1
, AMDGPU::OpName::data1
) != -1)
112 Offset0
= cast
<ConstantSDNode
>(Load0
->getOperand(2))->getZExtValue();
113 Offset1
= cast
<ConstantSDNode
>(Load1
->getOperand(2))->getZExtValue();
117 if (isSMRD(Opc0
) && isSMRD(Opc1
)) {
118 assert(getNumOperandsNoGlue(Load0
) == getNumOperandsNoGlue(Load1
));
121 if (Load0
->getOperand(0) != Load1
->getOperand(0))
125 if (findChainOperand(Load0
) != findChainOperand(Load1
))
128 Offset0
= cast
<ConstantSDNode
>(Load0
->getOperand(1))->getZExtValue();
129 Offset1
= cast
<ConstantSDNode
>(Load1
->getOperand(1))->getZExtValue();
133 // MUBUF and MTBUF can access the same addresses.
134 if ((isMUBUF(Opc0
) || isMTBUF(Opc0
)) && (isMUBUF(Opc1
) || isMTBUF(Opc1
))) {
136 // MUBUF and MTBUF have vaddr at different indices.
137 if (!nodesHaveSameOperandValue(Load0
, Load1
, AMDGPU::OpName::soffset
) ||
138 findChainOperand(Load0
) != findChainOperand(Load1
) ||
139 !nodesHaveSameOperandValue(Load0
, Load1
, AMDGPU::OpName::vaddr
) ||
140 !nodesHaveSameOperandValue(Load0
, Load1
, AMDGPU::OpName::srsrc
))
143 int OffIdx0
= AMDGPU::getNamedOperandIdx(Opc0
, AMDGPU::OpName::offset
);
144 int OffIdx1
= AMDGPU::getNamedOperandIdx(Opc1
, AMDGPU::OpName::offset
);
146 if (OffIdx0
== -1 || OffIdx1
== -1)
149 // getNamedOperandIdx returns the index for MachineInstrs. Since they
150 // inlcude the output in the operand list, but SDNodes don't, we need to
151 // subtract the index by one.
155 SDValue Off0
= Load0
->getOperand(OffIdx0
);
156 SDValue Off1
= Load1
->getOperand(OffIdx1
);
158 // The offset might be a FrameIndexSDNode.
159 if (!isa
<ConstantSDNode
>(Off0
) || !isa
<ConstantSDNode
>(Off1
))
162 Offset0
= cast
<ConstantSDNode
>(Off0
)->getZExtValue();
163 Offset1
= cast
<ConstantSDNode
>(Off1
)->getZExtValue();
170 static bool isStride64(unsigned Opc
) {
172 case AMDGPU::DS_READ2ST64_B32
:
173 case AMDGPU::DS_READ2ST64_B64
:
174 case AMDGPU::DS_WRITE2ST64_B32
:
175 case AMDGPU::DS_WRITE2ST64_B64
:
182 bool SIInstrInfo::getLdStBaseRegImmOfs(MachineInstr
*LdSt
,
183 unsigned &BaseReg
, unsigned &Offset
,
184 const TargetRegisterInfo
*TRI
) const {
185 unsigned Opc
= LdSt
->getOpcode();
187 const MachineOperand
*OffsetImm
= getNamedOperand(*LdSt
,
188 AMDGPU::OpName::offset
);
190 // Normal, single offset LDS instruction.
191 const MachineOperand
*AddrReg
= getNamedOperand(*LdSt
,
192 AMDGPU::OpName::addr
);
194 BaseReg
= AddrReg
->getReg();
195 Offset
= OffsetImm
->getImm();
199 // The 2 offset instructions use offset0 and offset1 instead. We can treat
200 // these as a load with a single offset if the 2 offsets are consecutive. We
201 // will use this for some partially aligned loads.
202 const MachineOperand
*Offset0Imm
= getNamedOperand(*LdSt
,
203 AMDGPU::OpName::offset0
);
204 const MachineOperand
*Offset1Imm
= getNamedOperand(*LdSt
,
205 AMDGPU::OpName::offset1
);
207 uint8_t Offset0
= Offset0Imm
->getImm();
208 uint8_t Offset1
= Offset1Imm
->getImm();
209 assert(Offset1
> Offset0
);
211 if (Offset1
- Offset0
== 1) {
212 // Each of these offsets is in element sized units, so we need to convert
213 // to bytes of the individual reads.
217 EltSize
= getOpRegClass(*LdSt
, 0)->getSize() / 2;
219 assert(LdSt
->mayStore());
220 int Data0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::data0
);
221 EltSize
= getOpRegClass(*LdSt
, Data0Idx
)->getSize();
227 const MachineOperand
*AddrReg
= getNamedOperand(*LdSt
,
228 AMDGPU::OpName::addr
);
229 BaseReg
= AddrReg
->getReg();
230 Offset
= EltSize
* Offset0
;
237 if (isMUBUF(Opc
) || isMTBUF(Opc
)) {
238 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::soffset
) != -1)
241 const MachineOperand
*AddrReg
= getNamedOperand(*LdSt
,
242 AMDGPU::OpName::vaddr
);
246 const MachineOperand
*OffsetImm
= getNamedOperand(*LdSt
,
247 AMDGPU::OpName::offset
);
248 BaseReg
= AddrReg
->getReg();
249 Offset
= OffsetImm
->getImm();
254 const MachineOperand
*OffsetImm
= getNamedOperand(*LdSt
,
255 AMDGPU::OpName::offset
);
259 const MachineOperand
*SBaseReg
= getNamedOperand(*LdSt
,
260 AMDGPU::OpName::sbase
);
261 BaseReg
= SBaseReg
->getReg();
262 Offset
= OffsetImm
->getImm();
269 bool SIInstrInfo::shouldClusterLoads(MachineInstr
*FirstLdSt
,
270 MachineInstr
*SecondLdSt
,
271 unsigned NumLoads
) const {
272 unsigned Opc0
= FirstLdSt
->getOpcode();
273 unsigned Opc1
= SecondLdSt
->getOpcode();
275 // TODO: This needs finer tuning
279 if (isDS(Opc0
) && isDS(Opc1
))
282 if (isSMRD(Opc0
) && isSMRD(Opc1
))
285 if ((isMUBUF(Opc0
) || isMTBUF(Opc0
)) && (isMUBUF(Opc1
) || isMTBUF(Opc1
)))
292 SIInstrInfo::copyPhysReg(MachineBasicBlock
&MBB
,
293 MachineBasicBlock::iterator MI
, DebugLoc DL
,
294 unsigned DestReg
, unsigned SrcReg
,
295 bool KillSrc
) const {
297 // If we are trying to copy to or from SCC, there is a bug somewhere else in
298 // the backend. While it may be theoretically possible to do this, it should
299 // never be necessary.
300 assert(DestReg
!= AMDGPU::SCC
&& SrcReg
!= AMDGPU::SCC
);
302 static const int16_t Sub0_15
[] = {
303 AMDGPU::sub0
, AMDGPU::sub1
, AMDGPU::sub2
, AMDGPU::sub3
,
304 AMDGPU::sub4
, AMDGPU::sub5
, AMDGPU::sub6
, AMDGPU::sub7
,
305 AMDGPU::sub8
, AMDGPU::sub9
, AMDGPU::sub10
, AMDGPU::sub11
,
306 AMDGPU::sub12
, AMDGPU::sub13
, AMDGPU::sub14
, AMDGPU::sub15
, 0
309 static const int16_t Sub0_7
[] = {
310 AMDGPU::sub0
, AMDGPU::sub1
, AMDGPU::sub2
, AMDGPU::sub3
,
311 AMDGPU::sub4
, AMDGPU::sub5
, AMDGPU::sub6
, AMDGPU::sub7
, 0
314 static const int16_t Sub0_3
[] = {
315 AMDGPU::sub0
, AMDGPU::sub1
, AMDGPU::sub2
, AMDGPU::sub3
, 0
318 static const int16_t Sub0_2
[] = {
319 AMDGPU::sub0
, AMDGPU::sub1
, AMDGPU::sub2
, 0
322 static const int16_t Sub0_1
[] = {
323 AMDGPU::sub0
, AMDGPU::sub1
, 0
327 const int16_t *SubIndices
;
329 if (AMDGPU::SReg_32RegClass
.contains(DestReg
)) {
330 assert(AMDGPU::SReg_32RegClass
.contains(SrcReg
));
331 BuildMI(MBB
, MI
, DL
, get(AMDGPU::S_MOV_B32
), DestReg
)
332 .addReg(SrcReg
, getKillRegState(KillSrc
));
335 } else if (AMDGPU::SReg_64RegClass
.contains(DestReg
)) {
336 assert(AMDGPU::SReg_64RegClass
.contains(SrcReg
));
337 BuildMI(MBB
, MI
, DL
, get(AMDGPU::S_MOV_B64
), DestReg
)
338 .addReg(SrcReg
, getKillRegState(KillSrc
));
341 } else if (AMDGPU::SReg_128RegClass
.contains(DestReg
)) {
342 assert(AMDGPU::SReg_128RegClass
.contains(SrcReg
));
343 Opcode
= AMDGPU::S_MOV_B32
;
346 } else if (AMDGPU::SReg_256RegClass
.contains(DestReg
)) {
347 assert(AMDGPU::SReg_256RegClass
.contains(SrcReg
));
348 Opcode
= AMDGPU::S_MOV_B32
;
351 } else if (AMDGPU::SReg_512RegClass
.contains(DestReg
)) {
352 assert(AMDGPU::SReg_512RegClass
.contains(SrcReg
));
353 Opcode
= AMDGPU::S_MOV_B32
;
354 SubIndices
= Sub0_15
;
356 } else if (AMDGPU::VGPR_32RegClass
.contains(DestReg
)) {
357 assert(AMDGPU::VGPR_32RegClass
.contains(SrcReg
) ||
358 AMDGPU::SReg_32RegClass
.contains(SrcReg
));
359 BuildMI(MBB
, MI
, DL
, get(AMDGPU::V_MOV_B32_e32
), DestReg
)
360 .addReg(SrcReg
, getKillRegState(KillSrc
));
363 } else if (AMDGPU::VReg_64RegClass
.contains(DestReg
)) {
364 assert(AMDGPU::VReg_64RegClass
.contains(SrcReg
) ||
365 AMDGPU::SReg_64RegClass
.contains(SrcReg
));
366 Opcode
= AMDGPU::V_MOV_B32_e32
;
369 } else if (AMDGPU::VReg_96RegClass
.contains(DestReg
)) {
370 assert(AMDGPU::VReg_96RegClass
.contains(SrcReg
));
371 Opcode
= AMDGPU::V_MOV_B32_e32
;
374 } else if (AMDGPU::VReg_128RegClass
.contains(DestReg
)) {
375 assert(AMDGPU::VReg_128RegClass
.contains(SrcReg
) ||
376 AMDGPU::SReg_128RegClass
.contains(SrcReg
));
377 Opcode
= AMDGPU::V_MOV_B32_e32
;
380 } else if (AMDGPU::VReg_256RegClass
.contains(DestReg
)) {
381 assert(AMDGPU::VReg_256RegClass
.contains(SrcReg
) ||
382 AMDGPU::SReg_256RegClass
.contains(SrcReg
));
383 Opcode
= AMDGPU::V_MOV_B32_e32
;
386 } else if (AMDGPU::VReg_512RegClass
.contains(DestReg
)) {
387 assert(AMDGPU::VReg_512RegClass
.contains(SrcReg
) ||
388 AMDGPU::SReg_512RegClass
.contains(SrcReg
));
389 Opcode
= AMDGPU::V_MOV_B32_e32
;
390 SubIndices
= Sub0_15
;
393 llvm_unreachable("Can't copy register!");
396 while (unsigned SubIdx
= *SubIndices
++) {
397 MachineInstrBuilder Builder
= BuildMI(MBB
, MI
, DL
,
398 get(Opcode
), RI
.getSubReg(DestReg
, SubIdx
));
400 Builder
.addReg(RI
.getSubReg(SrcReg
, SubIdx
), getKillRegState(KillSrc
));
403 Builder
.addReg(DestReg
, RegState::Define
| RegState::Implicit
);
407 unsigned SIInstrInfo::commuteOpcode(unsigned Opcode
) const {
410 // Try to map original to commuted opcode
411 if ((NewOpc
= AMDGPU::getCommuteRev(Opcode
)) != -1)
414 // Try to map commuted to original opcode
415 if ((NewOpc
= AMDGPU::getCommuteOrig(Opcode
)) != -1)
421 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass
*DstRC
) const {
423 if (DstRC
->getSize() == 4) {
424 return RI
.isSGPRClass(DstRC
) ? AMDGPU::S_MOV_B32
: AMDGPU::V_MOV_B32_e32
;
425 } else if (DstRC
->getSize() == 8 && RI
.isSGPRClass(DstRC
)) {
426 return AMDGPU::S_MOV_B64
;
427 } else if (DstRC
->getSize() == 8 && !RI
.isSGPRClass(DstRC
)) {
428 return AMDGPU::V_MOV_B64_PSEUDO
;
433 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock
&MBB
,
434 MachineBasicBlock::iterator MI
,
435 unsigned SrcReg
, bool isKill
,
437 const TargetRegisterClass
*RC
,
438 const TargetRegisterInfo
*TRI
) const {
439 MachineFunction
*MF
= MBB
.getParent();
440 SIMachineFunctionInfo
*MFI
= MF
->getInfo
<SIMachineFunctionInfo
>();
441 MachineFrameInfo
*FrameInfo
= MF
->getFrameInfo();
442 DebugLoc DL
= MBB
.findDebugLoc(MI
);
445 if (RI
.isSGPRClass(RC
)) {
446 // We are only allowed to create one new instruction when spilling
447 // registers, so we need to use pseudo instruction for spilling
449 switch (RC
->getSize() * 8) {
450 case 32: Opcode
= AMDGPU::SI_SPILL_S32_SAVE
; break;
451 case 64: Opcode
= AMDGPU::SI_SPILL_S64_SAVE
; break;
452 case 128: Opcode
= AMDGPU::SI_SPILL_S128_SAVE
; break;
453 case 256: Opcode
= AMDGPU::SI_SPILL_S256_SAVE
; break;
454 case 512: Opcode
= AMDGPU::SI_SPILL_S512_SAVE
; break;
456 } else if(RI
.hasVGPRs(RC
) && ST
.isVGPRSpillingEnabled(MFI
)) {
457 MFI
->setHasSpilledVGPRs();
459 switch(RC
->getSize() * 8) {
460 case 32: Opcode
= AMDGPU::SI_SPILL_V32_SAVE
; break;
461 case 64: Opcode
= AMDGPU::SI_SPILL_V64_SAVE
; break;
462 case 96: Opcode
= AMDGPU::SI_SPILL_V96_SAVE
; break;
463 case 128: Opcode
= AMDGPU::SI_SPILL_V128_SAVE
; break;
464 case 256: Opcode
= AMDGPU::SI_SPILL_V256_SAVE
; break;
465 case 512: Opcode
= AMDGPU::SI_SPILL_V512_SAVE
; break;
470 FrameInfo
->setObjectAlignment(FrameIndex
, 4);
471 BuildMI(MBB
, MI
, DL
, get(Opcode
))
473 .addFrameIndex(FrameIndex
)
474 // Place-holder registers, these will be filled in by
475 // SIPrepareScratchRegs.
476 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3
, RegState::Undef
)
477 .addReg(AMDGPU::SGPR0
, RegState::Undef
);
479 LLVMContext
&Ctx
= MF
->getFunction()->getContext();
480 Ctx
.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
482 BuildMI(MBB
, MI
, DL
, get(AMDGPU::KILL
))
487 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock
&MBB
,
488 MachineBasicBlock::iterator MI
,
489 unsigned DestReg
, int FrameIndex
,
490 const TargetRegisterClass
*RC
,
491 const TargetRegisterInfo
*TRI
) const {
492 MachineFunction
*MF
= MBB
.getParent();
493 const SIMachineFunctionInfo
*MFI
= MF
->getInfo
<SIMachineFunctionInfo
>();
494 MachineFrameInfo
*FrameInfo
= MF
->getFrameInfo();
495 DebugLoc DL
= MBB
.findDebugLoc(MI
);
498 if (RI
.isSGPRClass(RC
)){
499 switch(RC
->getSize() * 8) {
500 case 32: Opcode
= AMDGPU::SI_SPILL_S32_RESTORE
; break;
501 case 64: Opcode
= AMDGPU::SI_SPILL_S64_RESTORE
; break;
502 case 128: Opcode
= AMDGPU::SI_SPILL_S128_RESTORE
; break;
503 case 256: Opcode
= AMDGPU::SI_SPILL_S256_RESTORE
; break;
504 case 512: Opcode
= AMDGPU::SI_SPILL_S512_RESTORE
; break;
506 } else if(RI
.hasVGPRs(RC
) && ST
.isVGPRSpillingEnabled(MFI
)) {
507 switch(RC
->getSize() * 8) {
508 case 32: Opcode
= AMDGPU::SI_SPILL_V32_RESTORE
; break;
509 case 64: Opcode
= AMDGPU::SI_SPILL_V64_RESTORE
; break;
510 case 96: Opcode
= AMDGPU::SI_SPILL_V96_RESTORE
; break;
511 case 128: Opcode
= AMDGPU::SI_SPILL_V128_RESTORE
; break;
512 case 256: Opcode
= AMDGPU::SI_SPILL_V256_RESTORE
; break;
513 case 512: Opcode
= AMDGPU::SI_SPILL_V512_RESTORE
; break;
518 FrameInfo
->setObjectAlignment(FrameIndex
, 4);
519 BuildMI(MBB
, MI
, DL
, get(Opcode
), DestReg
)
520 .addFrameIndex(FrameIndex
)
521 // Place-holder registers, these will be filled in by
522 // SIPrepareScratchRegs.
523 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3
, RegState::Undef
)
524 .addReg(AMDGPU::SGPR0
, RegState::Undef
);
527 LLVMContext
&Ctx
= MF
->getFunction()->getContext();
528 Ctx
.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
529 " restore register");
530 BuildMI(MBB
, MI
, DL
, get(AMDGPU::IMPLICIT_DEF
), DestReg
);
534 /// \param @Offset Offset in bytes of the FrameIndex being spilled
535 unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock
&MBB
,
536 MachineBasicBlock::iterator MI
,
537 RegScavenger
*RS
, unsigned TmpReg
,
538 unsigned FrameOffset
,
539 unsigned Size
) const {
540 MachineFunction
*MF
= MBB
.getParent();
541 SIMachineFunctionInfo
*MFI
= MF
->getInfo
<SIMachineFunctionInfo
>();
542 const AMDGPUSubtarget
&ST
= MF
->getTarget().getSubtarget
<AMDGPUSubtarget
>();
543 const SIRegisterInfo
*TRI
=
544 static_cast<const SIRegisterInfo
*>(ST
.getRegisterInfo());
545 DebugLoc DL
= MBB
.findDebugLoc(MI
);
546 unsigned WorkGroupSize
= MFI
->getMaximumWorkGroupSize(*MF
);
547 unsigned WavefrontSize
= ST
.getWavefrontSize();
549 unsigned TIDReg
= MFI
->getTIDReg();
550 if (!MFI
->hasCalculatedTID()) {
551 MachineBasicBlock
&Entry
= MBB
.getParent()->front();
552 MachineBasicBlock::iterator Insert
= Entry
.front();
553 DebugLoc DL
= Insert
->getDebugLoc();
555 TIDReg
= RI
.findUnusedRegister(MF
->getRegInfo(), &AMDGPU::VGPR_32RegClass
);
556 if (TIDReg
== AMDGPU::NoRegister
)
560 if (MFI
->getShaderType() == ShaderType::COMPUTE
&&
561 WorkGroupSize
> WavefrontSize
) {
563 unsigned TIDIGXReg
= TRI
->getPreloadedValue(*MF
, SIRegisterInfo::TIDIG_X
);
564 unsigned TIDIGYReg
= TRI
->getPreloadedValue(*MF
, SIRegisterInfo::TIDIG_Y
);
565 unsigned TIDIGZReg
= TRI
->getPreloadedValue(*MF
, SIRegisterInfo::TIDIG_Z
);
566 unsigned InputPtrReg
=
567 TRI
->getPreloadedValue(*MF
, SIRegisterInfo::INPUT_PTR
);
568 static const unsigned TIDIGRegs
[3] = {
569 TIDIGXReg
, TIDIGYReg
, TIDIGZReg
571 for (unsigned Reg
: TIDIGRegs
) {
572 if (!Entry
.isLiveIn(Reg
))
573 Entry
.addLiveIn(Reg
);
576 RS
->enterBasicBlock(&Entry
);
577 unsigned STmp0
= RS
->scavengeRegister(&AMDGPU::SGPR_32RegClass
, 0);
578 unsigned STmp1
= RS
->scavengeRegister(&AMDGPU::SGPR_32RegClass
, 0);
579 BuildMI(Entry
, Insert
, DL
, get(AMDGPU::S_LOAD_DWORD_IMM
), STmp0
)
581 .addImm(SI::KernelInputOffsets::NGROUPS_Z
);
582 BuildMI(Entry
, Insert
, DL
, get(AMDGPU::S_LOAD_DWORD_IMM
), STmp1
)
584 .addImm(SI::KernelInputOffsets::NGROUPS_Y
);
586 // NGROUPS.X * NGROUPS.Y
587 BuildMI(Entry
, Insert
, DL
, get(AMDGPU::S_MUL_I32
), STmp1
)
590 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
591 BuildMI(Entry
, Insert
, DL
, get(AMDGPU::V_MUL_U32_U24_e32
), TIDReg
)
594 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
595 BuildMI(Entry
, Insert
, DL
, get(AMDGPU::V_MAD_U32_U24
), TIDReg
)
599 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
600 BuildMI(Entry
, Insert
, DL
, get(AMDGPU::V_ADD_I32_e32
), TIDReg
)
605 BuildMI(Entry
, Insert
, DL
, get(AMDGPU::V_MBCNT_LO_U32_B32_e64
),
610 BuildMI(Entry
, Insert
, DL
, get(AMDGPU::V_MBCNT_HI_U32_B32_e64
),
616 BuildMI(Entry
, Insert
, DL
, get(AMDGPU::V_LSHLREV_B32_e32
),
620 MFI
->setTIDReg(TIDReg
);
623 // Add FrameIndex to LDS offset
624 unsigned LDSOffset
= MFI
->LDSSize
+ (FrameOffset
* WorkGroupSize
);
625 BuildMI(MBB
, MI
, DL
, get(AMDGPU::V_ADD_I32_e32
), TmpReg
)
632 void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI
,
641 BuildMI(*MI
->getParent(), MI
, MI
->getDebugLoc(), get(AMDGPU::S_NOP
))
646 bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI
) const {
647 MachineBasicBlock
&MBB
= *MI
->getParent();
648 DebugLoc DL
= MBB
.findDebugLoc(MI
);
649 switch (MI
->getOpcode()) {
650 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI
);
652 case AMDGPU::SI_CONSTDATA_PTR
: {
653 unsigned Reg
= MI
->getOperand(0).getReg();
654 unsigned RegLo
= RI
.getSubReg(Reg
, AMDGPU::sub0
);
655 unsigned RegHi
= RI
.getSubReg(Reg
, AMDGPU::sub1
);
657 BuildMI(MBB
, MI
, DL
, get(AMDGPU::S_GETPC_B64
), Reg
);
659 // Add 32-bit offset from this instruction to the start of the constant data.
660 BuildMI(MBB
, MI
, DL
, get(AMDGPU::S_ADD_U32
), RegLo
)
662 .addTargetIndex(AMDGPU::TI_CONSTDATA_START
)
663 .addReg(AMDGPU::SCC
, RegState::Define
| RegState::Implicit
);
664 BuildMI(MBB
, MI
, DL
, get(AMDGPU::S_ADDC_U32
), RegHi
)
667 .addReg(AMDGPU::SCC
, RegState::Define
| RegState::Implicit
)
668 .addReg(AMDGPU::SCC
, RegState::Implicit
);
669 MI
->eraseFromParent();
672 case AMDGPU::SGPR_USE
:
673 // This is just a placeholder for register allocation.
674 MI
->eraseFromParent();
677 case AMDGPU::V_MOV_B64_PSEUDO
: {
678 unsigned Dst
= MI
->getOperand(0).getReg();
679 unsigned DstLo
= RI
.getSubReg(Dst
, AMDGPU::sub0
);
680 unsigned DstHi
= RI
.getSubReg(Dst
, AMDGPU::sub1
);
682 const MachineOperand
&SrcOp
= MI
->getOperand(1);
683 // FIXME: Will this work for 64-bit floating point immediates?
684 assert(!SrcOp
.isFPImm());
686 APInt
Imm(64, SrcOp
.getImm());
687 BuildMI(MBB
, MI
, DL
, get(AMDGPU::V_MOV_B32_e32
), DstLo
)
688 .addImm(Imm
.getLoBits(32).getZExtValue())
689 .addReg(Dst
, RegState::Implicit
);
690 BuildMI(MBB
, MI
, DL
, get(AMDGPU::V_MOV_B32_e32
), DstHi
)
691 .addImm(Imm
.getHiBits(32).getZExtValue())
692 .addReg(Dst
, RegState::Implicit
);
694 assert(SrcOp
.isReg());
695 BuildMI(MBB
, MI
, DL
, get(AMDGPU::V_MOV_B32_e32
), DstLo
)
696 .addReg(RI
.getSubReg(SrcOp
.getReg(), AMDGPU::sub0
))
697 .addReg(Dst
, RegState::Implicit
);
698 BuildMI(MBB
, MI
, DL
, get(AMDGPU::V_MOV_B32_e32
), DstHi
)
699 .addReg(RI
.getSubReg(SrcOp
.getReg(), AMDGPU::sub1
))
700 .addReg(Dst
, RegState::Implicit
);
702 MI
->eraseFromParent();
709 MachineInstr
*SIInstrInfo::commuteInstruction(MachineInstr
*MI
,
712 if (MI
->getNumOperands() < 3)
715 int Src0Idx
= AMDGPU::getNamedOperandIdx(MI
->getOpcode(),
716 AMDGPU::OpName::src0
);
717 assert(Src0Idx
!= -1 && "Should always have src0 operand");
719 MachineOperand
&Src0
= MI
->getOperand(Src0Idx
);
723 int Src1Idx
= AMDGPU::getNamedOperandIdx(MI
->getOpcode(),
724 AMDGPU::OpName::src1
);
728 MachineOperand
&Src1
= MI
->getOperand(Src1Idx
);
730 // Make sure it's legal to commute operands for VOP2.
731 if (isVOP2(MI
->getOpcode()) &&
732 (!isOperandLegal(MI
, Src0Idx
, &Src1
) ||
733 !isOperandLegal(MI
, Src1Idx
, &Src0
))) {
738 // Allow commuting instructions with Imm operands.
739 if (NewMI
|| !Src1
.isImm() ||
740 (!isVOP2(MI
->getOpcode()) && !isVOP3(MI
->getOpcode()))) {
744 // Be sure to copy the source modifiers to the right place.
745 if (MachineOperand
*Src0Mods
746 = getNamedOperand(*MI
, AMDGPU::OpName::src0_modifiers
)) {
747 MachineOperand
*Src1Mods
748 = getNamedOperand(*MI
, AMDGPU::OpName::src1_modifiers
);
750 int Src0ModsVal
= Src0Mods
->getImm();
751 if (!Src1Mods
&& Src0ModsVal
!= 0)
754 // XXX - This assert might be a lie. It might be useful to have a neg
755 // modifier with 0.0.
756 int Src1ModsVal
= Src1Mods
->getImm();
757 assert((Src1ModsVal
== 0) && "Not expecting modifiers with immediates");
759 Src1Mods
->setImm(Src0ModsVal
);
760 Src0Mods
->setImm(Src1ModsVal
);
763 unsigned Reg
= Src0
.getReg();
764 unsigned SubReg
= Src0
.getSubReg();
766 Src0
.ChangeToImmediate(Src1
.getImm());
768 llvm_unreachable("Should only have immediates");
770 Src1
.ChangeToRegister(Reg
, false);
771 Src1
.setSubReg(SubReg
);
773 MI
= TargetInstrInfo::commuteInstruction(MI
, NewMI
);
777 MI
->setDesc(get(commuteOpcode(MI
->getOpcode())));
782 // This needs to be implemented because the source modifiers may be inserted
783 // between the true commutable operands, and the base
784 // TargetInstrInfo::commuteInstruction uses it.
785 bool SIInstrInfo::findCommutedOpIndices(MachineInstr
*MI
,
787 unsigned &SrcOpIdx2
) const {
788 const MCInstrDesc
&MCID
= MI
->getDesc();
789 if (!MCID
.isCommutable())
792 unsigned Opc
= MI
->getOpcode();
793 int Src0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0
);
797 // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
799 if (!MI
->getOperand(Src0Idx
).isReg())
802 int Src1Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src1
);
806 if (!MI
->getOperand(Src1Idx
).isReg())
809 // If any source modifiers are set, the generic instruction commuting won't
810 // understand how to copy the source modifiers.
811 if (hasModifiersSet(*MI
, AMDGPU::OpName::src0_modifiers
) ||
812 hasModifiersSet(*MI
, AMDGPU::OpName::src1_modifiers
))
820 MachineInstr
*SIInstrInfo::buildMovInstr(MachineBasicBlock
*MBB
,
821 MachineBasicBlock::iterator I
,
823 unsigned SrcReg
) const {
824 return BuildMI(*MBB
, I
, MBB
->findDebugLoc(I
), get(AMDGPU::V_MOV_B32_e32
),
825 DstReg
) .addReg(SrcReg
);
828 bool SIInstrInfo::isMov(unsigned Opcode
) const {
830 default: return false;
831 case AMDGPU::S_MOV_B32
:
832 case AMDGPU::S_MOV_B64
:
833 case AMDGPU::V_MOV_B32_e32
:
834 case AMDGPU::V_MOV_B32_e64
:
840 SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass
*RC
) const {
841 return RC
!= &AMDGPU::EXECRegRegClass
;
845 SIInstrInfo::isTriviallyReMaterializable(const MachineInstr
*MI
,
846 AliasAnalysis
*AA
) const {
847 switch(MI
->getOpcode()) {
848 default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI
, AA
);
849 case AMDGPU::S_MOV_B32
:
850 case AMDGPU::S_MOV_B64
:
851 case AMDGPU::V_MOV_B32_e32
:
852 return MI
->getOperand(1).isImm();
856 static bool offsetsDoNotOverlap(int WidthA
, int OffsetA
,
857 int WidthB
, int OffsetB
) {
858 int LowOffset
= OffsetA
< OffsetB
? OffsetA
: OffsetB
;
859 int HighOffset
= OffsetA
< OffsetB
? OffsetB
: OffsetA
;
860 int LowWidth
= (LowOffset
== OffsetA
) ? WidthA
: WidthB
;
861 return LowOffset
+ LowWidth
<= HighOffset
;
864 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr
*MIa
,
865 MachineInstr
*MIb
) const {
866 unsigned BaseReg0
, Offset0
;
867 unsigned BaseReg1
, Offset1
;
869 if (getLdStBaseRegImmOfs(MIa
, BaseReg0
, Offset0
, &RI
) &&
870 getLdStBaseRegImmOfs(MIb
, BaseReg1
, Offset1
, &RI
)) {
871 assert(MIa
->hasOneMemOperand() && MIb
->hasOneMemOperand() &&
872 "read2 / write2 not expected here yet");
873 unsigned Width0
= (*MIa
->memoperands_begin())->getSize();
874 unsigned Width1
= (*MIb
->memoperands_begin())->getSize();
875 if (BaseReg0
== BaseReg1
&&
876 offsetsDoNotOverlap(Width0
, Offset0
, Width1
, Offset1
)) {
884 bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr
*MIa
,
886 AliasAnalysis
*AA
) const {
887 unsigned Opc0
= MIa
->getOpcode();
888 unsigned Opc1
= MIb
->getOpcode();
890 assert(MIa
&& (MIa
->mayLoad() || MIa
->mayStore()) &&
891 "MIa must load from or modify a memory location");
892 assert(MIb
&& (MIb
->mayLoad() || MIb
->mayStore()) &&
893 "MIb must load from or modify a memory location");
895 if (MIa
->hasUnmodeledSideEffects() || MIb
->hasUnmodeledSideEffects())
898 // XXX - Can we relax this between address spaces?
899 if (MIa
->hasOrderedMemoryRef() || MIb
->hasOrderedMemoryRef())
902 // TODO: Should we check the address space from the MachineMemOperand? That
903 // would allow us to distinguish objects we know don't alias based on the
904 // underlying addres space, even if it was lowered to a different one,
905 // e.g. private accesses lowered to use MUBUF instructions on a scratch
909 return checkInstOffsetsDoNotOverlap(MIa
, MIb
);
911 return !isFLAT(Opc1
);
914 if (isMUBUF(Opc0
) || isMTBUF(Opc0
)) {
915 if (isMUBUF(Opc1
) || isMTBUF(Opc1
))
916 return checkInstOffsetsDoNotOverlap(MIa
, MIb
);
918 return !isFLAT(Opc1
) && !isSMRD(Opc1
);
923 return checkInstOffsetsDoNotOverlap(MIa
, MIb
);
925 return !isFLAT(Opc1
) && !isMUBUF(Opc0
) && !isMTBUF(Opc0
);
930 return checkInstOffsetsDoNotOverlap(MIa
, MIb
);
938 bool SIInstrInfo::isInlineConstant(const APInt
&Imm
) const {
939 int64_t SVal
= Imm
.getSExtValue();
940 if (SVal
>= -16 && SVal
<= 64)
943 if (Imm
.getBitWidth() == 64) {
944 uint64_t Val
= Imm
.getZExtValue();
945 return (DoubleToBits(0.0) == Val
) ||
946 (DoubleToBits(1.0) == Val
) ||
947 (DoubleToBits(-1.0) == Val
) ||
948 (DoubleToBits(0.5) == Val
) ||
949 (DoubleToBits(-0.5) == Val
) ||
950 (DoubleToBits(2.0) == Val
) ||
951 (DoubleToBits(-2.0) == Val
) ||
952 (DoubleToBits(4.0) == Val
) ||
953 (DoubleToBits(-4.0) == Val
);
956 // The actual type of the operand does not seem to matter as long
957 // as the bits match one of the inline immediate values. For example:
959 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
960 // so it is a legal inline immediate.
962 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
963 // floating-point, so it is a legal inline immediate.
964 uint32_t Val
= Imm
.getZExtValue();
966 return (FloatToBits(0.0f
) == Val
) ||
967 (FloatToBits(1.0f
) == Val
) ||
968 (FloatToBits(-1.0f
) == Val
) ||
969 (FloatToBits(0.5f
) == Val
) ||
970 (FloatToBits(-0.5f
) == Val
) ||
971 (FloatToBits(2.0f
) == Val
) ||
972 (FloatToBits(-2.0f
) == Val
) ||
973 (FloatToBits(4.0f
) == Val
) ||
974 (FloatToBits(-4.0f
) == Val
);
977 bool SIInstrInfo::isInlineConstant(const MachineOperand
&MO
) const {
979 return isInlineConstant(APInt(32, MO
.getImm(), true));
984 bool SIInstrInfo::isLiteralConstant(const MachineOperand
&MO
) const {
985 return MO
.isImm() && !isInlineConstant(MO
);
988 static bool compareMachineOp(const MachineOperand
&Op0
,
989 const MachineOperand
&Op1
) {
990 if (Op0
.getType() != Op1
.getType())
993 switch (Op0
.getType()) {
994 case MachineOperand::MO_Register
:
995 return Op0
.getReg() == Op1
.getReg();
996 case MachineOperand::MO_Immediate
:
997 return Op0
.getImm() == Op1
.getImm();
999 llvm_unreachable("Didn't expect to be comparing these operand types");
1003 bool SIInstrInfo::isImmOperandLegal(const MachineInstr
*MI
, unsigned OpNo
,
1004 const MachineOperand
&MO
) const {
1005 const MCOperandInfo
&OpInfo
= get(MI
->getOpcode()).OpInfo
[OpNo
];
1007 assert(MO
.isImm() || MO
.isTargetIndex() || MO
.isFI());
1009 if (OpInfo
.OperandType
== MCOI::OPERAND_IMMEDIATE
)
1012 if (OpInfo
.RegClass
< 0)
1015 if (isLiteralConstant(MO
))
1016 return RI
.opCanUseLiteralConstant(OpInfo
.OperandType
);
1018 return RI
.opCanUseInlineConstant(OpInfo
.OperandType
);
1021 bool SIInstrInfo::canFoldOffset(unsigned OffsetSize
, unsigned AS
) const {
1023 case AMDGPUAS::GLOBAL_ADDRESS
: {
1024 // MUBUF instructions a 12-bit offset in bytes.
1025 return isUInt
<12>(OffsetSize
);
1027 case AMDGPUAS::CONSTANT_ADDRESS
: {
1028 // SMRD instructions have an 8-bit offset in dwords on SI and
1029 // a 20-bit offset in bytes on VI.
1030 if (RI
.ST
.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS
)
1031 return isUInt
<20>(OffsetSize
);
1033 return (OffsetSize
% 4 == 0) && isUInt
<8>(OffsetSize
/ 4);
1035 case AMDGPUAS::LOCAL_ADDRESS
:
1036 case AMDGPUAS::REGION_ADDRESS
: {
1037 // The single offset versions have a 16-bit offset in bytes.
1038 return isUInt
<16>(OffsetSize
);
1040 case AMDGPUAS::PRIVATE_ADDRESS
:
1041 // Indirect register addressing does not use any offsets.
1047 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode
) const {
1048 int Op32
= AMDGPU::getVOPe32(Opcode
);
1052 return pseudoToMCOpcode(Op32
) != -1;
1055 bool SIInstrInfo::hasModifiers(unsigned Opcode
) const {
1056 // The src0_modifier operand is present on all instructions
1057 // that have modifiers.
1059 return AMDGPU::getNamedOperandIdx(Opcode
,
1060 AMDGPU::OpName::src0_modifiers
) != -1;
1063 bool SIInstrInfo::hasModifiersSet(const MachineInstr
&MI
,
1064 unsigned OpName
) const {
1065 const MachineOperand
*Mods
= getNamedOperand(MI
, OpName
);
1066 return Mods
&& Mods
->getImm();
1069 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo
&MRI
,
1070 const MachineOperand
&MO
) const {
1071 // Literal constants use the constant bus.
1072 if (isLiteralConstant(MO
))
1075 if (!MO
.isReg() || !MO
.isUse())
1078 if (TargetRegisterInfo::isVirtualRegister(MO
.getReg()))
1079 return RI
.isSGPRClass(MRI
.getRegClass(MO
.getReg()));
1081 // FLAT_SCR is just an SGPR pair.
1082 if (!MO
.isImplicit() && (MO
.getReg() == AMDGPU::FLAT_SCR
))
1085 // EXEC register uses the constant bus.
1086 if (!MO
.isImplicit() && MO
.getReg() == AMDGPU::EXEC
)
1089 // SGPRs use the constant bus
1090 if (MO
.getReg() == AMDGPU::M0
|| MO
.getReg() == AMDGPU::VCC
||
1091 (!MO
.isImplicit() &&
1092 (AMDGPU::SGPR_32RegClass
.contains(MO
.getReg()) ||
1093 AMDGPU::SGPR_64RegClass
.contains(MO
.getReg())))) {
1100 bool SIInstrInfo::verifyInstruction(const MachineInstr
*MI
,
1101 StringRef
&ErrInfo
) const {
1102 uint16_t Opcode
= MI
->getOpcode();
1103 const MachineRegisterInfo
&MRI
= MI
->getParent()->getParent()->getRegInfo();
1104 int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
1105 int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
1106 int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
1108 // Make sure the number of operands is correct.
1109 const MCInstrDesc
&Desc
= get(Opcode
);
1110 if (!Desc
.isVariadic() &&
1111 Desc
.getNumOperands() != MI
->getNumExplicitOperands()) {
1112 ErrInfo
= "Instruction has wrong number of operands.";
1116 // Make sure the register classes are correct
1117 for (int i
= 0, e
= Desc
.getNumOperands(); i
!= e
; ++i
) {
1118 if (MI
->getOperand(i
).isFPImm()) {
1119 ErrInfo
= "FPImm Machine Operands are not supported. ISel should bitcast "
1120 "all fp values to integers.";
1124 switch (Desc
.OpInfo
[i
].OperandType
) {
1125 case MCOI::OPERAND_REGISTER
:
1126 if (MI
->getOperand(i
).isImm() || MI
->getOperand(i
).isFPImm()) {
1127 ErrInfo
= "Illegal immediate value for operand.";
1131 case AMDGPU::OPERAND_REG_IMM32
:
1133 case AMDGPU::OPERAND_REG_INLINE_C
:
1134 if (MI
->getOperand(i
).isImm() && !isInlineConstant(MI
->getOperand(i
))) {
1135 ErrInfo
= "Illegal immediate value for operand.";
1139 case MCOI::OPERAND_IMMEDIATE
:
1140 // Check if this operand is an immediate.
1141 // FrameIndex operands will be replaced by immediates, so they are
1143 if (!MI
->getOperand(i
).isImm() && !MI
->getOperand(i
).isFI()) {
1144 ErrInfo
= "Expected immediate, but got non-immediate";
1152 if (!MI
->getOperand(i
).isReg())
1155 int RegClass
= Desc
.OpInfo
[i
].RegClass
;
1156 if (RegClass
!= -1) {
1157 unsigned Reg
= MI
->getOperand(i
).getReg();
1158 if (TargetRegisterInfo::isVirtualRegister(Reg
))
1161 const TargetRegisterClass
*RC
= RI
.getRegClass(RegClass
);
1162 if (!RC
->contains(Reg
)) {
1163 ErrInfo
= "Operand has incorrect register class.";
1171 if (isVOP1(Opcode
) || isVOP2(Opcode
) || isVOP3(Opcode
) || isVOPC(Opcode
)) {
1172 // Only look at the true operands. Only a real operand can use the constant
1173 // bus, and we don't want to check pseudo-operands like the source modifier
1175 const int OpIndices
[] = { Src0Idx
, Src1Idx
, Src2Idx
};
1177 unsigned ConstantBusCount
= 0;
1178 unsigned SGPRUsed
= AMDGPU::NoRegister
;
1179 for (int OpIdx
: OpIndices
) {
1183 const MachineOperand
&MO
= MI
->getOperand(OpIdx
);
1184 if (usesConstantBus(MRI
, MO
)) {
1186 if (MO
.getReg() != SGPRUsed
)
1188 SGPRUsed
= MO
.getReg();
1194 if (ConstantBusCount
> 1) {
1195 ErrInfo
= "VOP* instruction uses the constant bus more than once";
1200 // Verify SRC1 for VOP2 and VOPC
1201 if (Src1Idx
!= -1 && (isVOP2(Opcode
) || isVOPC(Opcode
))) {
1202 const MachineOperand
&Src1
= MI
->getOperand(Src1Idx
);
1204 ErrInfo
= "VOP[2C] src1 cannot be an immediate.";
1210 if (isVOP3(Opcode
)) {
1211 if (Src0Idx
!= -1 && isLiteralConstant(MI
->getOperand(Src0Idx
))) {
1212 ErrInfo
= "VOP3 src0 cannot be a literal constant.";
1215 if (Src1Idx
!= -1 && isLiteralConstant(MI
->getOperand(Src1Idx
))) {
1216 ErrInfo
= "VOP3 src1 cannot be a literal constant.";
1219 if (Src2Idx
!= -1 && isLiteralConstant(MI
->getOperand(Src2Idx
))) {
1220 ErrInfo
= "VOP3 src2 cannot be a literal constant.";
1225 // Verify misc. restrictions on specific instructions.
1226 if (Desc
.getOpcode() == AMDGPU::V_DIV_SCALE_F32
||
1227 Desc
.getOpcode() == AMDGPU::V_DIV_SCALE_F64
) {
1228 const MachineOperand
&Src0
= MI
->getOperand(Src0Idx
);
1229 const MachineOperand
&Src1
= MI
->getOperand(Src1Idx
);
1230 const MachineOperand
&Src2
= MI
->getOperand(Src2Idx
);
1231 if (Src0
.isReg() && Src1
.isReg() && Src2
.isReg()) {
1232 if (!compareMachineOp(Src0
, Src1
) &&
1233 !compareMachineOp(Src0
, Src2
)) {
1234 ErrInfo
= "v_div_scale_{f32|f64} require src0 = src1 or src2";
1243 unsigned SIInstrInfo::getVALUOp(const MachineInstr
&MI
) {
1244 switch (MI
.getOpcode()) {
1245 default: return AMDGPU::INSTRUCTION_LIST_END
;
1246 case AMDGPU::REG_SEQUENCE
: return AMDGPU::REG_SEQUENCE
;
1247 case AMDGPU::COPY
: return AMDGPU::COPY
;
1248 case AMDGPU::PHI
: return AMDGPU::PHI
;
1249 case AMDGPU::INSERT_SUBREG
: return AMDGPU::INSERT_SUBREG
;
1250 case AMDGPU::S_MOV_B32
:
1251 return MI
.getOperand(1).isReg() ?
1252 AMDGPU::COPY
: AMDGPU::V_MOV_B32_e32
;
1253 case AMDGPU::S_ADD_I32
:
1254 case AMDGPU::S_ADD_U32
: return AMDGPU::V_ADD_I32_e32
;
1255 case AMDGPU::S_ADDC_U32
: return AMDGPU::V_ADDC_U32_e32
;
1256 case AMDGPU::S_SUB_I32
:
1257 case AMDGPU::S_SUB_U32
: return AMDGPU::V_SUB_I32_e32
;
1258 case AMDGPU::S_SUBB_U32
: return AMDGPU::V_SUBB_U32_e32
;
1259 case AMDGPU::S_MUL_I32
: return AMDGPU::V_MUL_LO_I32
;
1260 case AMDGPU::S_AND_B32
: return AMDGPU::V_AND_B32_e32
;
1261 case AMDGPU::S_OR_B32
: return AMDGPU::V_OR_B32_e32
;
1262 case AMDGPU::S_XOR_B32
: return AMDGPU::V_XOR_B32_e32
;
1263 case AMDGPU::S_MIN_I32
: return AMDGPU::V_MIN_I32_e32
;
1264 case AMDGPU::S_MIN_U32
: return AMDGPU::V_MIN_U32_e32
;
1265 case AMDGPU::S_MAX_I32
: return AMDGPU::V_MAX_I32_e32
;
1266 case AMDGPU::S_MAX_U32
: return AMDGPU::V_MAX_U32_e32
;
1267 case AMDGPU::S_ASHR_I32
: return AMDGPU::V_ASHR_I32_e32
;
1268 case AMDGPU::S_ASHR_I64
: return AMDGPU::V_ASHR_I64
;
1269 case AMDGPU::S_LSHL_B32
: return AMDGPU::V_LSHL_B32_e32
;
1270 case AMDGPU::S_LSHL_B64
: return AMDGPU::V_LSHL_B64
;
1271 case AMDGPU::S_LSHR_B32
: return AMDGPU::V_LSHR_B32_e32
;
1272 case AMDGPU::S_LSHR_B64
: return AMDGPU::V_LSHR_B64
;
1273 case AMDGPU::S_SEXT_I32_I8
: return AMDGPU::V_BFE_I32
;
1274 case AMDGPU::S_SEXT_I32_I16
: return AMDGPU::V_BFE_I32
;
1275 case AMDGPU::S_BFE_U32
: return AMDGPU::V_BFE_U32
;
1276 case AMDGPU::S_BFE_I32
: return AMDGPU::V_BFE_I32
;
1277 case AMDGPU::S_BREV_B32
: return AMDGPU::V_BFREV_B32_e32
;
1278 case AMDGPU::S_NOT_B32
: return AMDGPU::V_NOT_B32_e32
;
1279 case AMDGPU::S_NOT_B64
: return AMDGPU::V_NOT_B32_e32
;
1280 case AMDGPU::S_CMP_EQ_I32
: return AMDGPU::V_CMP_EQ_I32_e32
;
1281 case AMDGPU::S_CMP_LG_I32
: return AMDGPU::V_CMP_NE_I32_e32
;
1282 case AMDGPU::S_CMP_GT_I32
: return AMDGPU::V_CMP_GT_I32_e32
;
1283 case AMDGPU::S_CMP_GE_I32
: return AMDGPU::V_CMP_GE_I32_e32
;
1284 case AMDGPU::S_CMP_LT_I32
: return AMDGPU::V_CMP_LT_I32_e32
;
1285 case AMDGPU::S_CMP_LE_I32
: return AMDGPU::V_CMP_LE_I32_e32
;
1286 case AMDGPU::S_LOAD_DWORD_IMM
:
1287 case AMDGPU::S_LOAD_DWORD_SGPR
: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64
;
1288 case AMDGPU::S_LOAD_DWORDX2_IMM
:
1289 case AMDGPU::S_LOAD_DWORDX2_SGPR
: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64
;
1290 case AMDGPU::S_LOAD_DWORDX4_IMM
:
1291 case AMDGPU::S_LOAD_DWORDX4_SGPR
: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64
;
1292 case AMDGPU::S_BCNT1_I32_B32
: return AMDGPU::V_BCNT_U32_B32_e64
;
1293 case AMDGPU::S_FF1_I32_B32
: return AMDGPU::V_FFBL_B32_e32
;
1294 case AMDGPU::S_FLBIT_I32_B32
: return AMDGPU::V_FFBH_U32_e32
;
1298 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr
&MI
) const {
1299 return getVALUOp(MI
) != AMDGPU::INSTRUCTION_LIST_END
;
1302 const TargetRegisterClass
*SIInstrInfo::getOpRegClass(const MachineInstr
&MI
,
1303 unsigned OpNo
) const {
1304 const MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
1305 const MCInstrDesc
&Desc
= get(MI
.getOpcode());
1306 if (MI
.isVariadic() || OpNo
>= Desc
.getNumOperands() ||
1307 Desc
.OpInfo
[OpNo
].RegClass
== -1) {
1308 unsigned Reg
= MI
.getOperand(OpNo
).getReg();
1310 if (TargetRegisterInfo::isVirtualRegister(Reg
))
1311 return MRI
.getRegClass(Reg
);
1312 return RI
.getRegClass(Reg
);
1315 unsigned RCID
= Desc
.OpInfo
[OpNo
].RegClass
;
1316 return RI
.getRegClass(RCID
);
1319 bool SIInstrInfo::canReadVGPR(const MachineInstr
&MI
, unsigned OpNo
) const {
1320 switch (MI
.getOpcode()) {
1322 case AMDGPU::REG_SEQUENCE
:
1324 case AMDGPU::INSERT_SUBREG
:
1325 return RI
.hasVGPRs(getOpRegClass(MI
, 0));
1327 return RI
.hasVGPRs(getOpRegClass(MI
, OpNo
));
1331 void SIInstrInfo::legalizeOpWithMove(MachineInstr
*MI
, unsigned OpIdx
) const {
1332 MachineBasicBlock::iterator I
= MI
;
1333 MachineBasicBlock
*MBB
= MI
->getParent();
1334 MachineOperand
&MO
= MI
->getOperand(OpIdx
);
1335 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1336 unsigned RCID
= get(MI
->getOpcode()).OpInfo
[OpIdx
].RegClass
;
1337 const TargetRegisterClass
*RC
= RI
.getRegClass(RCID
);
1338 unsigned Opcode
= AMDGPU::V_MOV_B32_e32
;
1340 Opcode
= AMDGPU::COPY
;
1341 else if (RI
.isSGPRClass(RC
))
1342 Opcode
= AMDGPU::S_MOV_B32
;
1345 const TargetRegisterClass
*VRC
= RI
.getEquivalentVGPRClass(RC
);
1346 if (RI
.getCommonSubClass(&AMDGPU::VReg_64RegClass
, VRC
))
1347 VRC
= &AMDGPU::VReg_64RegClass
;
1349 VRC
= &AMDGPU::VGPR_32RegClass
;
1351 unsigned Reg
= MRI
.createVirtualRegister(VRC
);
1352 DebugLoc DL
= MBB
->findDebugLoc(I
);
1353 BuildMI(*MI
->getParent(), I
, DL
, get(Opcode
), Reg
)
1355 MO
.ChangeToRegister(Reg
, false);
1358 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI
,
1359 MachineRegisterInfo
&MRI
,
1360 MachineOperand
&SuperReg
,
1361 const TargetRegisterClass
*SuperRC
,
1363 const TargetRegisterClass
*SubRC
)
1365 assert(SuperReg
.isReg());
1367 unsigned NewSuperReg
= MRI
.createVirtualRegister(SuperRC
);
1368 unsigned SubReg
= MRI
.createVirtualRegister(SubRC
);
1370 // Just in case the super register is itself a sub-register, copy it to a new
1371 // value so we don't need to worry about merging its subreg index with the
1372 // SubIdx passed to this function. The register coalescer should be able to
1373 // eliminate this extra copy.
1374 MachineBasicBlock
*MBB
= MI
->getParent();
1375 DebugLoc DL
= MI
->getDebugLoc();
1377 BuildMI(*MBB
, MI
, DL
, get(TargetOpcode::COPY
), NewSuperReg
)
1378 .addReg(SuperReg
.getReg(), 0, SuperReg
.getSubReg());
1380 BuildMI(*MBB
, MI
, DL
, get(TargetOpcode::COPY
), SubReg
)
1381 .addReg(NewSuperReg
, 0, SubIdx
);
1386 MachineOperand
SIInstrInfo::buildExtractSubRegOrImm(
1387 MachineBasicBlock::iterator MII
,
1388 MachineRegisterInfo
&MRI
,
1390 const TargetRegisterClass
*SuperRC
,
1392 const TargetRegisterClass
*SubRC
) const {
1394 // XXX - Is there a better way to do this?
1395 if (SubIdx
== AMDGPU::sub0
)
1396 return MachineOperand::CreateImm(Op
.getImm() & 0xFFFFFFFF);
1397 if (SubIdx
== AMDGPU::sub1
)
1398 return MachineOperand::CreateImm(Op
.getImm() >> 32);
1400 llvm_unreachable("Unhandled register index for immediate");
1403 unsigned SubReg
= buildExtractSubReg(MII
, MRI
, Op
, SuperRC
,
1405 return MachineOperand::CreateReg(SubReg
, false);
1408 unsigned SIInstrInfo::split64BitImm(SmallVectorImpl
<MachineInstr
*> &Worklist
,
1409 MachineBasicBlock::iterator MI
,
1410 MachineRegisterInfo
&MRI
,
1411 const TargetRegisterClass
*RC
,
1412 const MachineOperand
&Op
) const {
1413 MachineBasicBlock
*MBB
= MI
->getParent();
1414 DebugLoc DL
= MI
->getDebugLoc();
1415 unsigned LoDst
= MRI
.createVirtualRegister(&AMDGPU::SGPR_32RegClass
);
1416 unsigned HiDst
= MRI
.createVirtualRegister(&AMDGPU::SGPR_32RegClass
);
1417 unsigned Dst
= MRI
.createVirtualRegister(RC
);
1419 MachineInstr
*Lo
= BuildMI(*MBB
, MI
, DL
, get(AMDGPU::S_MOV_B32
),
1421 .addImm(Op
.getImm() & 0xFFFFFFFF);
1422 MachineInstr
*Hi
= BuildMI(*MBB
, MI
, DL
, get(AMDGPU::S_MOV_B32
),
1424 .addImm(Op
.getImm() >> 32);
1426 BuildMI(*MBB
, MI
, DL
, get(TargetOpcode::REG_SEQUENCE
), Dst
)
1428 .addImm(AMDGPU::sub0
)
1430 .addImm(AMDGPU::sub1
);
1432 Worklist
.push_back(Lo
);
1433 Worklist
.push_back(Hi
);
1438 // Change the order of operands from (0, 1, 2) to (0, 2, 1)
1439 void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst
) const {
1440 assert(Inst
->getNumExplicitOperands() == 3);
1441 MachineOperand Op1
= Inst
->getOperand(1);
1442 Inst
->RemoveOperand(1);
1443 Inst
->addOperand(Op1
);
1446 bool SIInstrInfo::isOperandLegal(const MachineInstr
*MI
, unsigned OpIdx
,
1447 const MachineOperand
*MO
) const {
1448 const MachineRegisterInfo
&MRI
= MI
->getParent()->getParent()->getRegInfo();
1449 const MCInstrDesc
&InstDesc
= get(MI
->getOpcode());
1450 const MCOperandInfo
&OpInfo
= InstDesc
.OpInfo
[OpIdx
];
1451 const TargetRegisterClass
*DefinedRC
=
1452 OpInfo
.RegClass
!= -1 ? RI
.getRegClass(OpInfo
.RegClass
) : nullptr;
1454 MO
= &MI
->getOperand(OpIdx
);
1456 if (isVALU(InstDesc
.Opcode
) && usesConstantBus(MRI
, *MO
)) {
1458 MO
->isReg() ? MO
->getReg() : (unsigned)AMDGPU::NoRegister
;
1459 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
1462 if (usesConstantBus(MRI
, MI
->getOperand(i
)) &&
1463 MI
->getOperand(i
).isReg() && MI
->getOperand(i
).getReg() != SGPRUsed
) {
1471 const TargetRegisterClass
*RC
= MRI
.getRegClass(MO
->getReg());
1473 // In order to be legal, the common sub-class must be equal to the
1474 // class of the current operand. For example:
1476 // v_mov_b32 s0 ; Operand defined as vsrc_32
1477 // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
1479 // s_sendmsg 0, s0 ; Operand defined as m0reg
1480 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
1482 return RI
.getCommonSubClass(RC
, RI
.getRegClass(OpInfo
.RegClass
)) == RC
;
1486 // Handle non-register types that are treated like immediates.
1487 assert(MO
->isImm() || MO
->isTargetIndex() || MO
->isFI());
1490 // This operand expects an immediate.
1494 return isImmOperandLegal(MI
, OpIdx
, *MO
);
1497 void SIInstrInfo::legalizeOperands(MachineInstr
*MI
) const {
1498 MachineRegisterInfo
&MRI
= MI
->getParent()->getParent()->getRegInfo();
1500 int Src0Idx
= AMDGPU::getNamedOperandIdx(MI
->getOpcode(),
1501 AMDGPU::OpName::src0
);
1502 int Src1Idx
= AMDGPU::getNamedOperandIdx(MI
->getOpcode(),
1503 AMDGPU::OpName::src1
);
1504 int Src2Idx
= AMDGPU::getNamedOperandIdx(MI
->getOpcode(),
1505 AMDGPU::OpName::src2
);
1508 if (isVOP2(MI
->getOpcode()) && Src1Idx
!= -1) {
1510 if (!isOperandLegal(MI
, Src0Idx
))
1511 legalizeOpWithMove(MI
, Src0Idx
);
1514 if (isOperandLegal(MI
, Src1Idx
))
1517 // Usually src0 of VOP2 instructions allow more types of inputs
1518 // than src1, so try to commute the instruction to decrease our
1519 // chances of having to insert a MOV instruction to legalize src1.
1520 if (MI
->isCommutable()) {
1521 if (commuteInstruction(MI
))
1522 // If we are successful in commuting, then we know MI is legal, so
1527 legalizeOpWithMove(MI
, Src1Idx
);
1531 // XXX - Do any VOP3 instructions read VCC?
1533 if (isVOP3(MI
->getOpcode())) {
1534 int VOP3Idx
[3] = { Src0Idx
, Src1Idx
, Src2Idx
};
1536 // Find the one SGPR operand we are allowed to use.
1537 unsigned SGPRReg
= findUsedSGPR(MI
, VOP3Idx
);
1539 for (unsigned i
= 0; i
< 3; ++i
) {
1540 int Idx
= VOP3Idx
[i
];
1543 MachineOperand
&MO
= MI
->getOperand(Idx
);
1546 if (!RI
.isSGPRClass(MRI
.getRegClass(MO
.getReg())))
1547 continue; // VGPRs are legal
1549 assert(MO
.getReg() != AMDGPU::SCC
&& "SCC operand to VOP3 instruction");
1551 if (SGPRReg
== AMDGPU::NoRegister
|| SGPRReg
== MO
.getReg()) {
1552 SGPRReg
= MO
.getReg();
1553 // We can use one SGPR in each VOP3 instruction.
1556 } else if (!isLiteralConstant(MO
)) {
1557 // If it is not a register and not a literal constant, then it must be
1558 // an inline constant which is always legal.
1561 // If we make it this far, then the operand is not legal and we must
1563 legalizeOpWithMove(MI
, Idx
);
1567 // Legalize REG_SEQUENCE and PHI
1568 // The register class of the operands much be the same type as the register
1569 // class of the output.
1570 if (MI
->getOpcode() == AMDGPU::REG_SEQUENCE
||
1571 MI
->getOpcode() == AMDGPU::PHI
) {
1572 const TargetRegisterClass
*RC
= nullptr, *SRC
= nullptr, *VRC
= nullptr;
1573 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+=2) {
1574 if (!MI
->getOperand(i
).isReg() ||
1575 !TargetRegisterInfo::isVirtualRegister(MI
->getOperand(i
).getReg()))
1577 const TargetRegisterClass
*OpRC
=
1578 MRI
.getRegClass(MI
->getOperand(i
).getReg());
1579 if (RI
.hasVGPRs(OpRC
)) {
1586 // If any of the operands are VGPR registers, then they all most be
1587 // otherwise we will create illegal VGPR->SGPR copies when legalizing
1589 if (VRC
|| !RI
.isSGPRClass(getOpRegClass(*MI
, 0))) {
1592 VRC
= RI
.getEquivalentVGPRClass(SRC
);
1599 // Update all the operands so they have the same type.
1600 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
!= e
; i
+=2) {
1601 if (!MI
->getOperand(i
).isReg() ||
1602 !TargetRegisterInfo::isVirtualRegister(MI
->getOperand(i
).getReg()))
1604 unsigned DstReg
= MRI
.createVirtualRegister(RC
);
1605 MachineBasicBlock
*InsertBB
;
1606 MachineBasicBlock::iterator Insert
;
1607 if (MI
->getOpcode() == AMDGPU::REG_SEQUENCE
) {
1608 InsertBB
= MI
->getParent();
1611 // MI is a PHI instruction.
1612 InsertBB
= MI
->getOperand(i
+ 1).getMBB();
1613 Insert
= InsertBB
->getFirstTerminator();
1615 BuildMI(*InsertBB
, Insert
, MI
->getDebugLoc(),
1616 get(AMDGPU::COPY
), DstReg
)
1617 .addOperand(MI
->getOperand(i
));
1618 MI
->getOperand(i
).setReg(DstReg
);
1622 // Legalize INSERT_SUBREG
1623 // src0 must have the same register class as dst
1624 if (MI
->getOpcode() == AMDGPU::INSERT_SUBREG
) {
1625 unsigned Dst
= MI
->getOperand(0).getReg();
1626 unsigned Src0
= MI
->getOperand(1).getReg();
1627 const TargetRegisterClass
*DstRC
= MRI
.getRegClass(Dst
);
1628 const TargetRegisterClass
*Src0RC
= MRI
.getRegClass(Src0
);
1629 if (DstRC
!= Src0RC
) {
1630 MachineBasicBlock
&MBB
= *MI
->getParent();
1631 unsigned NewSrc0
= MRI
.createVirtualRegister(DstRC
);
1632 BuildMI(MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::COPY
), NewSrc0
)
1634 MI
->getOperand(1).setReg(NewSrc0
);
1639 // Legalize MUBUF* instructions
1640 // FIXME: If we start using the non-addr64 instructions for compute, we
1641 // may need to legalize them here.
1643 AMDGPU::getNamedOperandIdx(MI
->getOpcode(), AMDGPU::OpName::srsrc
);
1644 if (SRsrcIdx
!= -1) {
1645 // We have an MUBUF instruction
1646 MachineOperand
*SRsrc
= &MI
->getOperand(SRsrcIdx
);
1647 unsigned SRsrcRC
= get(MI
->getOpcode()).OpInfo
[SRsrcIdx
].RegClass
;
1648 if (RI
.getCommonSubClass(MRI
.getRegClass(SRsrc
->getReg()),
1649 RI
.getRegClass(SRsrcRC
))) {
1650 // The operands are legal.
1651 // FIXME: We may need to legalize operands besided srsrc.
1655 MachineBasicBlock
&MBB
= *MI
->getParent();
1656 // Extract the the ptr from the resource descriptor.
1658 // SRsrcPtrLo = srsrc:sub0
1659 unsigned SRsrcPtrLo
= buildExtractSubReg(MI
, MRI
, *SRsrc
,
1660 &AMDGPU::VReg_128RegClass
, AMDGPU::sub0
, &AMDGPU::VGPR_32RegClass
);
1662 // SRsrcPtrHi = srsrc:sub1
1663 unsigned SRsrcPtrHi
= buildExtractSubReg(MI
, MRI
, *SRsrc
,
1664 &AMDGPU::VReg_128RegClass
, AMDGPU::sub1
, &AMDGPU::VGPR_32RegClass
);
1666 // Create an empty resource descriptor
1667 unsigned Zero64
= MRI
.createVirtualRegister(&AMDGPU::SReg_64RegClass
);
1668 unsigned SRsrcFormatLo
= MRI
.createVirtualRegister(&AMDGPU::SGPR_32RegClass
);
1669 unsigned SRsrcFormatHi
= MRI
.createVirtualRegister(&AMDGPU::SGPR_32RegClass
);
1670 unsigned NewSRsrc
= MRI
.createVirtualRegister(&AMDGPU::SReg_128RegClass
);
1671 uint64_t RsrcDataFormat
= getDefaultRsrcDataFormat();
1674 BuildMI(MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::S_MOV_B64
),
1678 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
1679 BuildMI(MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::S_MOV_B32
),
1681 .addImm(RsrcDataFormat
& 0xFFFFFFFF);
1683 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
1684 BuildMI(MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::S_MOV_B32
),
1686 .addImm(RsrcDataFormat
>> 32);
1688 // NewSRsrc = {Zero64, SRsrcFormat}
1689 BuildMI(MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::REG_SEQUENCE
),
1692 .addImm(AMDGPU::sub0_sub1
)
1693 .addReg(SRsrcFormatLo
)
1694 .addImm(AMDGPU::sub2
)
1695 .addReg(SRsrcFormatHi
)
1696 .addImm(AMDGPU::sub3
);
1698 MachineOperand
*VAddr
= getNamedOperand(*MI
, AMDGPU::OpName::vaddr
);
1699 unsigned NewVAddr
= MRI
.createVirtualRegister(&AMDGPU::VReg_64RegClass
);
1700 unsigned NewVAddrLo
;
1701 unsigned NewVAddrHi
;
1703 // This is already an ADDR64 instruction so we need to add the pointer
1704 // extracted from the resource descriptor to the current value of VAddr.
1705 NewVAddrLo
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1706 NewVAddrHi
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1708 // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
1709 BuildMI(MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32
),
1712 .addReg(VAddr
->getReg(), 0, AMDGPU::sub0
)
1713 .addReg(AMDGPU::VCC
, RegState::ImplicitDefine
);
1715 // NewVaddrHi = SRsrcPtrHi + VAddr:sub1
1716 BuildMI(MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32
),
1719 .addReg(VAddr
->getReg(), 0, AMDGPU::sub1
)
1720 .addReg(AMDGPU::VCC
, RegState::ImplicitDefine
)
1721 .addReg(AMDGPU::VCC
, RegState::Implicit
);
1724 // This instructions is the _OFFSET variant, so we need to convert it to
1726 MachineOperand
*VData
= getNamedOperand(*MI
, AMDGPU::OpName::vdata
);
1727 MachineOperand
*Offset
= getNamedOperand(*MI
, AMDGPU::OpName::offset
);
1728 MachineOperand
*SOffset
= getNamedOperand(*MI
, AMDGPU::OpName::soffset
);
1729 assert(SOffset
->isImm() && SOffset
->getImm() == 0 && "Legalizing MUBUF "
1730 "with non-zero soffset is not implemented");
1733 // Create the new instruction.
1734 unsigned Addr64Opcode
= AMDGPU::getAddr64Inst(MI
->getOpcode());
1735 MachineInstr
*Addr64
=
1736 BuildMI(MBB
, MI
, MI
->getDebugLoc(), get(Addr64Opcode
))
1739 .addReg(AMDGPU::NoRegister
) // Dummy value for vaddr.
1740 // This will be replaced later
1741 // with the new value of vaddr.
1742 .addOperand(*Offset
);
1744 MI
->removeFromParent();
1747 NewVAddrLo
= SRsrcPtrLo
;
1748 NewVAddrHi
= SRsrcPtrHi
;
1749 VAddr
= getNamedOperand(*MI
, AMDGPU::OpName::vaddr
);
1750 SRsrc
= getNamedOperand(*MI
, AMDGPU::OpName::srsrc
);
1753 // NewVaddr = {NewVaddrHi, NewVaddrLo}
1754 BuildMI(MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::REG_SEQUENCE
),
1757 .addImm(AMDGPU::sub0
)
1759 .addImm(AMDGPU::sub1
);
1762 // Update the instruction to use NewVaddr
1763 VAddr
->setReg(NewVAddr
);
1764 // Update the instruction to use NewSRsrc
1765 SRsrc
->setReg(NewSRsrc
);
1769 void SIInstrInfo::splitSMRD(MachineInstr
*MI
,
1770 const TargetRegisterClass
*HalfRC
,
1771 unsigned HalfImmOp
, unsigned HalfSGPROp
,
1772 MachineInstr
*&Lo
, MachineInstr
*&Hi
) const {
1774 DebugLoc DL
= MI
->getDebugLoc();
1775 MachineBasicBlock
*MBB
= MI
->getParent();
1776 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1777 unsigned RegLo
= MRI
.createVirtualRegister(HalfRC
);
1778 unsigned RegHi
= MRI
.createVirtualRegister(HalfRC
);
1779 unsigned HalfSize
= HalfRC
->getSize();
1780 const MachineOperand
*OffOp
=
1781 getNamedOperand(*MI
, AMDGPU::OpName::offset
);
1782 const MachineOperand
*SBase
= getNamedOperand(*MI
, AMDGPU::OpName::sbase
);
1784 // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
1787 bool isVI
= RI
.ST
.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS
;
1788 unsigned OffScale
= isVI
? 1 : 4;
1789 // Handle the _IMM variant
1790 unsigned LoOffset
= OffOp
->getImm() * OffScale
;
1791 unsigned HiOffset
= LoOffset
+ HalfSize
;
1792 Lo
= BuildMI(*MBB
, MI
, DL
, get(HalfImmOp
), RegLo
)
1794 .addImm(LoOffset
/ OffScale
);
1796 if (!isUInt
<20>(HiOffset
) || (!isVI
&& !isUInt
<8>(HiOffset
/ OffScale
))) {
1797 unsigned OffsetSGPR
=
1798 MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
1799 BuildMI(*MBB
, MI
, DL
, get(AMDGPU::S_MOV_B32
), OffsetSGPR
)
1800 .addImm(HiOffset
); // The offset in register is in bytes.
1801 Hi
= BuildMI(*MBB
, MI
, DL
, get(HalfSGPROp
), RegHi
)
1803 .addReg(OffsetSGPR
);
1805 Hi
= BuildMI(*MBB
, MI
, DL
, get(HalfImmOp
), RegHi
)
1807 .addImm(HiOffset
/ OffScale
);
1810 // Handle the _SGPR variant
1811 MachineOperand
*SOff
= getNamedOperand(*MI
, AMDGPU::OpName::soff
);
1812 Lo
= BuildMI(*MBB
, MI
, DL
, get(HalfSGPROp
), RegLo
)
1815 unsigned OffsetSGPR
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
1816 BuildMI(*MBB
, MI
, DL
, get(AMDGPU::S_ADD_I32
), OffsetSGPR
)
1819 Hi
= BuildMI(*MBB
, MI
, DL
, get(HalfSGPROp
))
1821 .addReg(OffsetSGPR
);
1824 unsigned SubLo
, SubHi
;
1827 SubLo
= AMDGPU::sub0
;
1828 SubHi
= AMDGPU::sub1
;
1831 SubLo
= AMDGPU::sub0_sub1
;
1832 SubHi
= AMDGPU::sub2_sub3
;
1835 SubLo
= AMDGPU::sub0_sub1_sub2_sub3
;
1836 SubHi
= AMDGPU::sub4_sub5_sub6_sub7
;
1839 SubLo
= AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7
;
1840 SubHi
= AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
;
1843 llvm_unreachable("Unhandled HalfSize");
1846 BuildMI(*MBB
, MI
, DL
, get(AMDGPU::REG_SEQUENCE
))
1847 .addOperand(MI
->getOperand(0))
1854 void SIInstrInfo::moveSMRDToVALU(MachineInstr
*MI
, MachineRegisterInfo
&MRI
) const {
1855 MachineBasicBlock
*MBB
= MI
->getParent();
1856 switch (MI
->getOpcode()) {
1857 case AMDGPU::S_LOAD_DWORD_IMM
:
1858 case AMDGPU::S_LOAD_DWORD_SGPR
:
1859 case AMDGPU::S_LOAD_DWORDX2_IMM
:
1860 case AMDGPU::S_LOAD_DWORDX2_SGPR
:
1861 case AMDGPU::S_LOAD_DWORDX4_IMM
:
1862 case AMDGPU::S_LOAD_DWORDX4_SGPR
: {
1863 unsigned NewOpcode
= getVALUOp(*MI
);
1867 if (MI
->getOperand(2).isReg()) {
1868 RegOffset
= MI
->getOperand(2).getReg();
1871 assert(MI
->getOperand(2).isImm());
1872 // SMRD instructions take a dword offsets on SI and byte offset on VI
1873 // and MUBUF instructions always take a byte offset.
1874 ImmOffset
= MI
->getOperand(2).getImm();
1875 if (RI
.ST
.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS
)
1877 RegOffset
= MRI
.createVirtualRegister(&AMDGPU::SGPR_32RegClass
);
1879 if (isUInt
<12>(ImmOffset
)) {
1880 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::S_MOV_B32
),
1884 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::S_MOV_B32
),
1891 unsigned SRsrc
= MRI
.createVirtualRegister(&AMDGPU::SReg_128RegClass
);
1892 unsigned DWord0
= RegOffset
;
1893 unsigned DWord1
= MRI
.createVirtualRegister(&AMDGPU::SGPR_32RegClass
);
1894 unsigned DWord2
= MRI
.createVirtualRegister(&AMDGPU::SGPR_32RegClass
);
1895 unsigned DWord3
= MRI
.createVirtualRegister(&AMDGPU::SGPR_32RegClass
);
1896 uint64_t RsrcDataFormat
= getDefaultRsrcDataFormat();
1898 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::S_MOV_B32
), DWord1
)
1900 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::S_MOV_B32
), DWord2
)
1901 .addImm(RsrcDataFormat
& 0xFFFFFFFF);
1902 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::S_MOV_B32
), DWord3
)
1903 .addImm(RsrcDataFormat
>> 32);
1904 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), get(AMDGPU::REG_SEQUENCE
), SRsrc
)
1906 .addImm(AMDGPU::sub0
)
1908 .addImm(AMDGPU::sub1
)
1910 .addImm(AMDGPU::sub2
)
1912 .addImm(AMDGPU::sub3
);
1913 MI
->setDesc(get(NewOpcode
));
1914 if (MI
->getOperand(2).isReg()) {
1915 MI
->getOperand(2).setReg(MI
->getOperand(1).getReg());
1917 MI
->getOperand(2).ChangeToRegister(MI
->getOperand(1).getReg(), false);
1919 MI
->getOperand(1).setReg(SRsrc
);
1920 MI
->addOperand(*MBB
->getParent(), MachineOperand::CreateImm(ImmOffset
));
1922 const TargetRegisterClass
*NewDstRC
=
1923 RI
.getRegClass(get(NewOpcode
).OpInfo
[0].RegClass
);
1925 unsigned DstReg
= MI
->getOperand(0).getReg();
1926 unsigned NewDstReg
= MRI
.createVirtualRegister(NewDstRC
);
1927 MRI
.replaceRegWith(DstReg
, NewDstReg
);
1930 case AMDGPU::S_LOAD_DWORDX8_IMM
:
1931 case AMDGPU::S_LOAD_DWORDX8_SGPR
: {
1932 MachineInstr
*Lo
, *Hi
;
1933 splitSMRD(MI
, &AMDGPU::SReg_128RegClass
, AMDGPU::S_LOAD_DWORDX4_IMM
,
1934 AMDGPU::S_LOAD_DWORDX4_SGPR
, Lo
, Hi
);
1935 MI
->eraseFromParent();
1936 moveSMRDToVALU(Lo
, MRI
);
1937 moveSMRDToVALU(Hi
, MRI
);
1941 case AMDGPU::S_LOAD_DWORDX16_IMM
:
1942 case AMDGPU::S_LOAD_DWORDX16_SGPR
: {
1943 MachineInstr
*Lo
, *Hi
;
1944 splitSMRD(MI
, &AMDGPU::SReg_256RegClass
, AMDGPU::S_LOAD_DWORDX8_IMM
,
1945 AMDGPU::S_LOAD_DWORDX8_SGPR
, Lo
, Hi
);
1946 MI
->eraseFromParent();
1947 moveSMRDToVALU(Lo
, MRI
);
1948 moveSMRDToVALU(Hi
, MRI
);
1954 void SIInstrInfo::moveToVALU(MachineInstr
&TopInst
) const {
1955 SmallVector
<MachineInstr
*, 128> Worklist
;
1956 Worklist
.push_back(&TopInst
);
1958 while (!Worklist
.empty()) {
1959 MachineInstr
*Inst
= Worklist
.pop_back_val();
1960 MachineBasicBlock
*MBB
= Inst
->getParent();
1961 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1963 unsigned Opcode
= Inst
->getOpcode();
1964 unsigned NewOpcode
= getVALUOp(*Inst
);
1966 // Handle some special cases
1969 if (isSMRD(Inst
->getOpcode())) {
1970 moveSMRDToVALU(Inst
, MRI
);
1973 case AMDGPU::S_MOV_B64
: {
1974 DebugLoc DL
= Inst
->getDebugLoc();
1976 // If the source operand is a register we can replace this with a
1978 if (Inst
->getOperand(1).isReg()) {
1979 MachineInstr
*Copy
= BuildMI(*MBB
, Inst
, DL
, get(TargetOpcode::COPY
))
1980 .addOperand(Inst
->getOperand(0))
1981 .addOperand(Inst
->getOperand(1));
1982 Worklist
.push_back(Copy
);
1984 // Otherwise, we need to split this into two movs, because there is
1985 // no 64-bit VALU move instruction.
1986 unsigned Reg
= Inst
->getOperand(0).getReg();
1987 unsigned Dst
= split64BitImm(Worklist
,
1990 MRI
.getRegClass(Reg
),
1991 Inst
->getOperand(1));
1992 MRI
.replaceRegWith(Reg
, Dst
);
1994 Inst
->eraseFromParent();
1997 case AMDGPU::S_AND_B64
:
1998 splitScalar64BitBinaryOp(Worklist
, Inst
, AMDGPU::S_AND_B32
);
1999 Inst
->eraseFromParent();
2002 case AMDGPU::S_OR_B64
:
2003 splitScalar64BitBinaryOp(Worklist
, Inst
, AMDGPU::S_OR_B32
);
2004 Inst
->eraseFromParent();
2007 case AMDGPU::S_XOR_B64
:
2008 splitScalar64BitBinaryOp(Worklist
, Inst
, AMDGPU::S_XOR_B32
);
2009 Inst
->eraseFromParent();
2012 case AMDGPU::S_NOT_B64
:
2013 splitScalar64BitUnaryOp(Worklist
, Inst
, AMDGPU::S_NOT_B32
);
2014 Inst
->eraseFromParent();
2017 case AMDGPU::S_BCNT1_I32_B64
:
2018 splitScalar64BitBCNT(Worklist
, Inst
);
2019 Inst
->eraseFromParent();
2022 case AMDGPU::S_BFE_I64
: {
2023 splitScalar64BitBFE(Worklist
, Inst
);
2024 Inst
->eraseFromParent();
2028 case AMDGPU::S_LSHL_B32
:
2029 if (ST
.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS
) {
2030 NewOpcode
= AMDGPU::V_LSHLREV_B32_e64
;
2034 case AMDGPU::S_ASHR_I32
:
2035 if (ST
.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS
) {
2036 NewOpcode
= AMDGPU::V_ASHRREV_I32_e64
;
2040 case AMDGPU::S_LSHR_B32
:
2041 if (ST
.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS
) {
2042 NewOpcode
= AMDGPU::V_LSHRREV_B32_e64
;
2047 case AMDGPU::S_BFE_U64
:
2048 case AMDGPU::S_BFM_B64
:
2049 llvm_unreachable("Moving this op to VALU not implemented");
2052 if (NewOpcode
== AMDGPU::INSTRUCTION_LIST_END
) {
2053 // We cannot move this instruction to the VALU, so we should try to
2054 // legalize its operands instead.
2055 legalizeOperands(Inst
);
2059 // Use the new VALU Opcode.
2060 const MCInstrDesc
&NewDesc
= get(NewOpcode
);
2061 Inst
->setDesc(NewDesc
);
2063 // Remove any references to SCC. Vector instructions can't read from it, and
2064 // We're just about to add the implicit use / defs of VCC, and we don't want
2066 for (unsigned i
= Inst
->getNumOperands() - 1; i
> 0; --i
) {
2067 MachineOperand
&Op
= Inst
->getOperand(i
);
2068 if (Op
.isReg() && Op
.getReg() == AMDGPU::SCC
)
2069 Inst
->RemoveOperand(i
);
2072 if (Opcode
== AMDGPU::S_SEXT_I32_I8
|| Opcode
== AMDGPU::S_SEXT_I32_I16
) {
2073 // We are converting these to a BFE, so we need to add the missing
2074 // operands for the size and offset.
2075 unsigned Size
= (Opcode
== AMDGPU::S_SEXT_I32_I8
) ? 8 : 16;
2076 Inst
->addOperand(MachineOperand::CreateImm(0));
2077 Inst
->addOperand(MachineOperand::CreateImm(Size
));
2079 } else if (Opcode
== AMDGPU::S_BCNT1_I32_B32
) {
2080 // The VALU version adds the second operand to the result, so insert an
2082 Inst
->addOperand(MachineOperand::CreateImm(0));
2085 addDescImplicitUseDef(NewDesc
, Inst
);
2087 if (Opcode
== AMDGPU::S_BFE_I32
|| Opcode
== AMDGPU::S_BFE_U32
) {
2088 const MachineOperand
&OffsetWidthOp
= Inst
->getOperand(2);
2089 // If we need to move this to VGPRs, we need to unpack the second operand
2090 // back into the 2 separate ones for bit offset and width.
2091 assert(OffsetWidthOp
.isImm() &&
2092 "Scalar BFE is only implemented for constant width and offset");
2093 uint32_t Imm
= OffsetWidthOp
.getImm();
2095 uint32_t Offset
= Imm
& 0x3f; // Extract bits [5:0].
2096 uint32_t BitWidth
= (Imm
& 0x7f0000) >> 16; // Extract bits [22:16].
2097 Inst
->RemoveOperand(2); // Remove old immediate.
2098 Inst
->addOperand(MachineOperand::CreateImm(Offset
));
2099 Inst
->addOperand(MachineOperand::CreateImm(BitWidth
));
2102 // Update the destination register class.
2104 const TargetRegisterClass
*NewDstRC
= getOpRegClass(*Inst
, 0);
2107 // For target instructions, getOpRegClass just returns the virtual
2108 // register class associated with the operand, so we need to find an
2109 // equivalent VGPR register class in order to move the instruction to the
2113 case AMDGPU::REG_SEQUENCE
:
2114 case AMDGPU::INSERT_SUBREG
:
2115 if (RI
.hasVGPRs(NewDstRC
))
2117 NewDstRC
= RI
.getEquivalentVGPRClass(NewDstRC
);
2125 unsigned DstReg
= Inst
->getOperand(0).getReg();
2126 unsigned NewDstReg
= MRI
.createVirtualRegister(NewDstRC
);
2127 MRI
.replaceRegWith(DstReg
, NewDstReg
);
2129 // Legalize the operands
2130 legalizeOperands(Inst
);
2132 for (MachineRegisterInfo::use_iterator I
= MRI
.use_begin(NewDstReg
),
2133 E
= MRI
.use_end(); I
!= E
; ++I
) {
2134 MachineInstr
&UseMI
= *I
->getParent();
2135 if (!canReadVGPR(UseMI
, I
.getOperandNo())) {
2136 Worklist
.push_back(&UseMI
);
2142 //===----------------------------------------------------------------------===//
2143 // Indirect addressing callbacks
2144 //===----------------------------------------------------------------------===//
2146 unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex
,
2147 unsigned Channel
) const {
2148 assert(Channel
== 0);
2152 const TargetRegisterClass
*SIInstrInfo::getIndirectAddrRegClass() const {
2153 return &AMDGPU::VGPR_32RegClass
;
2156 void SIInstrInfo::splitScalar64BitUnaryOp(
2157 SmallVectorImpl
<MachineInstr
*> &Worklist
,
2159 unsigned Opcode
) const {
2160 MachineBasicBlock
&MBB
= *Inst
->getParent();
2161 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
2163 MachineOperand
&Dest
= Inst
->getOperand(0);
2164 MachineOperand
&Src0
= Inst
->getOperand(1);
2165 DebugLoc DL
= Inst
->getDebugLoc();
2167 MachineBasicBlock::iterator MII
= Inst
;
2169 const MCInstrDesc
&InstDesc
= get(Opcode
);
2170 const TargetRegisterClass
*Src0RC
= Src0
.isReg() ?
2171 MRI
.getRegClass(Src0
.getReg()) :
2172 &AMDGPU::SGPR_32RegClass
;
2174 const TargetRegisterClass
*Src0SubRC
= RI
.getSubRegClass(Src0RC
, AMDGPU::sub0
);
2176 MachineOperand SrcReg0Sub0
= buildExtractSubRegOrImm(MII
, MRI
, Src0
, Src0RC
,
2177 AMDGPU::sub0
, Src0SubRC
);
2179 const TargetRegisterClass
*DestRC
= MRI
.getRegClass(Dest
.getReg());
2180 const TargetRegisterClass
*DestSubRC
= RI
.getSubRegClass(DestRC
, AMDGPU::sub0
);
2182 unsigned DestSub0
= MRI
.createVirtualRegister(DestRC
);
2183 MachineInstr
*LoHalf
= BuildMI(MBB
, MII
, DL
, InstDesc
, DestSub0
)
2184 .addOperand(SrcReg0Sub0
);
2186 MachineOperand SrcReg0Sub1
= buildExtractSubRegOrImm(MII
, MRI
, Src0
, Src0RC
,
2187 AMDGPU::sub1
, Src0SubRC
);
2189 unsigned DestSub1
= MRI
.createVirtualRegister(DestSubRC
);
2190 MachineInstr
*HiHalf
= BuildMI(MBB
, MII
, DL
, InstDesc
, DestSub1
)
2191 .addOperand(SrcReg0Sub1
);
2193 unsigned FullDestReg
= MRI
.createVirtualRegister(DestRC
);
2194 BuildMI(MBB
, MII
, DL
, get(TargetOpcode::REG_SEQUENCE
), FullDestReg
)
2196 .addImm(AMDGPU::sub0
)
2198 .addImm(AMDGPU::sub1
);
2200 MRI
.replaceRegWith(Dest
.getReg(), FullDestReg
);
2202 // Try to legalize the operands in case we need to swap the order to keep it
2204 Worklist
.push_back(LoHalf
);
2205 Worklist
.push_back(HiHalf
);
2208 void SIInstrInfo::splitScalar64BitBinaryOp(
2209 SmallVectorImpl
<MachineInstr
*> &Worklist
,
2211 unsigned Opcode
) const {
2212 MachineBasicBlock
&MBB
= *Inst
->getParent();
2213 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
2215 MachineOperand
&Dest
= Inst
->getOperand(0);
2216 MachineOperand
&Src0
= Inst
->getOperand(1);
2217 MachineOperand
&Src1
= Inst
->getOperand(2);
2218 DebugLoc DL
= Inst
->getDebugLoc();
2220 MachineBasicBlock::iterator MII
= Inst
;
2222 const MCInstrDesc
&InstDesc
= get(Opcode
);
2223 const TargetRegisterClass
*Src0RC
= Src0
.isReg() ?
2224 MRI
.getRegClass(Src0
.getReg()) :
2225 &AMDGPU::SGPR_32RegClass
;
2227 const TargetRegisterClass
*Src0SubRC
= RI
.getSubRegClass(Src0RC
, AMDGPU::sub0
);
2228 const TargetRegisterClass
*Src1RC
= Src1
.isReg() ?
2229 MRI
.getRegClass(Src1
.getReg()) :
2230 &AMDGPU::SGPR_32RegClass
;
2232 const TargetRegisterClass
*Src1SubRC
= RI
.getSubRegClass(Src1RC
, AMDGPU::sub0
);
2234 MachineOperand SrcReg0Sub0
= buildExtractSubRegOrImm(MII
, MRI
, Src0
, Src0RC
,
2235 AMDGPU::sub0
, Src0SubRC
);
2236 MachineOperand SrcReg1Sub0
= buildExtractSubRegOrImm(MII
, MRI
, Src1
, Src1RC
,
2237 AMDGPU::sub0
, Src1SubRC
);
2239 const TargetRegisterClass
*DestRC
= MRI
.getRegClass(Dest
.getReg());
2240 const TargetRegisterClass
*DestSubRC
= RI
.getSubRegClass(DestRC
, AMDGPU::sub0
);
2242 unsigned DestSub0
= MRI
.createVirtualRegister(DestRC
);
2243 MachineInstr
*LoHalf
= BuildMI(MBB
, MII
, DL
, InstDesc
, DestSub0
)
2244 .addOperand(SrcReg0Sub0
)
2245 .addOperand(SrcReg1Sub0
);
2247 MachineOperand SrcReg0Sub1
= buildExtractSubRegOrImm(MII
, MRI
, Src0
, Src0RC
,
2248 AMDGPU::sub1
, Src0SubRC
);
2249 MachineOperand SrcReg1Sub1
= buildExtractSubRegOrImm(MII
, MRI
, Src1
, Src1RC
,
2250 AMDGPU::sub1
, Src1SubRC
);
2252 unsigned DestSub1
= MRI
.createVirtualRegister(DestSubRC
);
2253 MachineInstr
*HiHalf
= BuildMI(MBB
, MII
, DL
, InstDesc
, DestSub1
)
2254 .addOperand(SrcReg0Sub1
)
2255 .addOperand(SrcReg1Sub1
);
2257 unsigned FullDestReg
= MRI
.createVirtualRegister(DestRC
);
2258 BuildMI(MBB
, MII
, DL
, get(TargetOpcode::REG_SEQUENCE
), FullDestReg
)
2260 .addImm(AMDGPU::sub0
)
2262 .addImm(AMDGPU::sub1
);
2264 MRI
.replaceRegWith(Dest
.getReg(), FullDestReg
);
2266 // Try to legalize the operands in case we need to swap the order to keep it
2268 Worklist
.push_back(LoHalf
);
2269 Worklist
.push_back(HiHalf
);
2272 void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl
<MachineInstr
*> &Worklist
,
2273 MachineInstr
*Inst
) const {
2274 MachineBasicBlock
&MBB
= *Inst
->getParent();
2275 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
2277 MachineBasicBlock::iterator MII
= Inst
;
2278 DebugLoc DL
= Inst
->getDebugLoc();
2280 MachineOperand
&Dest
= Inst
->getOperand(0);
2281 MachineOperand
&Src
= Inst
->getOperand(1);
2283 const MCInstrDesc
&InstDesc
= get(AMDGPU::V_BCNT_U32_B32_e64
);
2284 const TargetRegisterClass
*SrcRC
= Src
.isReg() ?
2285 MRI
.getRegClass(Src
.getReg()) :
2286 &AMDGPU::SGPR_32RegClass
;
2288 unsigned MidReg
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
2289 unsigned ResultReg
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
2291 const TargetRegisterClass
*SrcSubRC
= RI
.getSubRegClass(SrcRC
, AMDGPU::sub0
);
2293 MachineOperand SrcRegSub0
= buildExtractSubRegOrImm(MII
, MRI
, Src
, SrcRC
,
2294 AMDGPU::sub0
, SrcSubRC
);
2295 MachineOperand SrcRegSub1
= buildExtractSubRegOrImm(MII
, MRI
, Src
, SrcRC
,
2296 AMDGPU::sub1
, SrcSubRC
);
2298 MachineInstr
*First
= BuildMI(MBB
, MII
, DL
, InstDesc
, MidReg
)
2299 .addOperand(SrcRegSub0
)
2302 MachineInstr
*Second
= BuildMI(MBB
, MII
, DL
, InstDesc
, ResultReg
)
2303 .addOperand(SrcRegSub1
)
2306 MRI
.replaceRegWith(Dest
.getReg(), ResultReg
);
2308 Worklist
.push_back(First
);
2309 Worklist
.push_back(Second
);
2312 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl
<MachineInstr
*> &Worklist
,
2313 MachineInstr
*Inst
) const {
2314 MachineBasicBlock
&MBB
= *Inst
->getParent();
2315 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
2316 MachineBasicBlock::iterator MII
= Inst
;
2317 DebugLoc DL
= Inst
->getDebugLoc();
2319 MachineOperand
&Dest
= Inst
->getOperand(0);
2320 uint32_t Imm
= Inst
->getOperand(2).getImm();
2321 uint32_t Offset
= Imm
& 0x3f; // Extract bits [5:0].
2322 uint32_t BitWidth
= (Imm
& 0x7f0000) >> 16; // Extract bits [22:16].
2326 // Only sext_inreg cases handled.
2327 assert(Inst
->getOpcode() == AMDGPU::S_BFE_I64
&&
2332 if (BitWidth
< 32) {
2333 unsigned MidRegLo
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
2334 unsigned MidRegHi
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
2335 unsigned ResultReg
= MRI
.createVirtualRegister(&AMDGPU::VReg_64RegClass
);
2337 BuildMI(MBB
, MII
, DL
, get(AMDGPU::V_BFE_I32
), MidRegLo
)
2338 .addReg(Inst
->getOperand(1).getReg(), 0, AMDGPU::sub0
)
2342 BuildMI(MBB
, MII
, DL
, get(AMDGPU::V_ASHRREV_I32_e32
), MidRegHi
)
2346 BuildMI(MBB
, MII
, DL
, get(TargetOpcode::REG_SEQUENCE
), ResultReg
)
2348 .addImm(AMDGPU::sub0
)
2350 .addImm(AMDGPU::sub1
);
2352 MRI
.replaceRegWith(Dest
.getReg(), ResultReg
);
2356 MachineOperand
&Src
= Inst
->getOperand(1);
2357 unsigned TmpReg
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
2358 unsigned ResultReg
= MRI
.createVirtualRegister(&AMDGPU::VReg_64RegClass
);
2360 BuildMI(MBB
, MII
, DL
, get(AMDGPU::V_ASHRREV_I32_e64
), TmpReg
)
2362 .addReg(Src
.getReg(), 0, AMDGPU::sub0
);
2364 BuildMI(MBB
, MII
, DL
, get(TargetOpcode::REG_SEQUENCE
), ResultReg
)
2365 .addReg(Src
.getReg(), 0, AMDGPU::sub0
)
2366 .addImm(AMDGPU::sub0
)
2368 .addImm(AMDGPU::sub1
);
2370 MRI
.replaceRegWith(Dest
.getReg(), ResultReg
);
2373 void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc
&NewDesc
,
2374 MachineInstr
*Inst
) const {
2375 // Add the implict and explicit register definitions.
2376 if (NewDesc
.ImplicitUses
) {
2377 for (unsigned i
= 0; NewDesc
.ImplicitUses
[i
]; ++i
) {
2378 unsigned Reg
= NewDesc
.ImplicitUses
[i
];
2379 Inst
->addOperand(MachineOperand::CreateReg(Reg
, false, true));
2383 if (NewDesc
.ImplicitDefs
) {
2384 for (unsigned i
= 0; NewDesc
.ImplicitDefs
[i
]; ++i
) {
2385 unsigned Reg
= NewDesc
.ImplicitDefs
[i
];
2386 Inst
->addOperand(MachineOperand::CreateReg(Reg
, true, true));
2391 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr
*MI
,
2392 int OpIndices
[3]) const {
2393 const MCInstrDesc
&Desc
= get(MI
->getOpcode());
2395 // Find the one SGPR operand we are allowed to use.
2396 unsigned SGPRReg
= AMDGPU::NoRegister
;
2398 // First we need to consider the instruction's operand requirements before
2399 // legalizing. Some operands are required to be SGPRs, such as implicit uses
2400 // of VCC, but we are still bound by the constant bus requirement to only use
2403 // If the operand's class is an SGPR, we can never move it.
2405 for (const MachineOperand
&MO
: MI
->implicit_operands()) {
2406 // We only care about reads.
2410 if (MO
.getReg() == AMDGPU::VCC
)
2413 if (MO
.getReg() == AMDGPU::FLAT_SCR
)
2414 return AMDGPU::FLAT_SCR
;
2417 unsigned UsedSGPRs
[3] = { AMDGPU::NoRegister
};
2418 const MachineRegisterInfo
&MRI
= MI
->getParent()->getParent()->getRegInfo();
2420 for (unsigned i
= 0; i
< 3; ++i
) {
2421 int Idx
= OpIndices
[i
];
2425 const MachineOperand
&MO
= MI
->getOperand(Idx
);
2426 if (RI
.isSGPRClassID(Desc
.OpInfo
[Idx
].RegClass
))
2427 SGPRReg
= MO
.getReg();
2429 if (MO
.isReg() && RI
.isSGPRClass(MRI
.getRegClass(MO
.getReg())))
2430 UsedSGPRs
[i
] = MO
.getReg();
2433 if (SGPRReg
!= AMDGPU::NoRegister
)
2436 // We don't have a required SGPR operand, so we have a bit more freedom in
2437 // selecting operands to move.
2439 // Try to select the most used SGPR. If an SGPR is equal to one of the
2440 // others, we choose that.
2443 // V_FMA_F32 v0, s0, s0, s0 -> No moves
2444 // V_FMA_F32 v0, s0, s1, s0 -> Move s1
2446 if (UsedSGPRs
[0] != AMDGPU::NoRegister
) {
2447 if (UsedSGPRs
[0] == UsedSGPRs
[1] || UsedSGPRs
[0] == UsedSGPRs
[2])
2448 SGPRReg
= UsedSGPRs
[0];
2451 if (SGPRReg
== AMDGPU::NoRegister
&& UsedSGPRs
[1] != AMDGPU::NoRegister
) {
2452 if (UsedSGPRs
[1] == UsedSGPRs
[2])
2453 SGPRReg
= UsedSGPRs
[1];
2459 MachineInstrBuilder
SIInstrInfo::buildIndirectWrite(
2460 MachineBasicBlock
*MBB
,
2461 MachineBasicBlock::iterator I
,
2463 unsigned Address
, unsigned OffsetReg
) const {
2464 const DebugLoc
&DL
= MBB
->findDebugLoc(I
);
2465 unsigned IndirectBaseReg
= AMDGPU::VGPR_32RegClass
.getRegister(
2466 getIndirectIndexBegin(*MBB
->getParent()));
2468 return BuildMI(*MBB
, I
, DL
, get(AMDGPU::SI_INDIRECT_DST_V1
))
2469 .addReg(IndirectBaseReg
, RegState::Define
)
2470 .addOperand(I
->getOperand(0))
2471 .addReg(IndirectBaseReg
)
2477 MachineInstrBuilder
SIInstrInfo::buildIndirectRead(
2478 MachineBasicBlock
*MBB
,
2479 MachineBasicBlock::iterator I
,
2481 unsigned Address
, unsigned OffsetReg
) const {
2482 const DebugLoc
&DL
= MBB
->findDebugLoc(I
);
2483 unsigned IndirectBaseReg
= AMDGPU::VGPR_32RegClass
.getRegister(
2484 getIndirectIndexBegin(*MBB
->getParent()));
2486 return BuildMI(*MBB
, I
, DL
, get(AMDGPU::SI_INDIRECT_SRC
))
2487 .addOperand(I
->getOperand(0))
2488 .addOperand(I
->getOperand(1))
2489 .addReg(IndirectBaseReg
)
2495 void SIInstrInfo::reserveIndirectRegisters(BitVector
&Reserved
,
2496 const MachineFunction
&MF
) const {
2497 int End
= getIndirectIndexEnd(MF
);
2498 int Begin
= getIndirectIndexBegin(MF
);
2504 for (int Index
= Begin
; Index
<= End
; ++Index
)
2505 Reserved
.set(AMDGPU::VGPR_32RegClass
.getRegister(Index
));
2507 for (int Index
= std::max(0, Begin
- 1); Index
<= End
; ++Index
)
2508 Reserved
.set(AMDGPU::VReg_64RegClass
.getRegister(Index
));
2510 for (int Index
= std::max(0, Begin
- 2); Index
<= End
; ++Index
)
2511 Reserved
.set(AMDGPU::VReg_96RegClass
.getRegister(Index
));
2513 for (int Index
= std::max(0, Begin
- 3); Index
<= End
; ++Index
)
2514 Reserved
.set(AMDGPU::VReg_128RegClass
.getRegister(Index
));
2516 for (int Index
= std::max(0, Begin
- 7); Index
<= End
; ++Index
)
2517 Reserved
.set(AMDGPU::VReg_256RegClass
.getRegister(Index
));
2519 for (int Index
= std::max(0, Begin
- 15); Index
<= End
; ++Index
)
2520 Reserved
.set(AMDGPU::VReg_512RegClass
.getRegister(Index
));
2523 MachineOperand
*SIInstrInfo::getNamedOperand(MachineInstr
&MI
,
2524 unsigned OperandName
) const {
2525 int Idx
= AMDGPU::getNamedOperandIdx(MI
.getOpcode(), OperandName
);
2529 return &MI
.getOperand(Idx
);
2532 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
2533 uint64_t RsrcDataFormat
= AMDGPU::RSRC_DATA_FORMAT
;
2534 if (ST
.isAmdHsaOS())
2535 RsrcDataFormat
|= (1ULL << 56);
2537 return RsrcDataFormat
;