1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Custom DAG lowering for R600
13 //===----------------------------------------------------------------------===//
15 #include "R600ISelLowering.h"
16 #include "AMDGPUFrameLowering.h"
17 #include "AMDGPUIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "R600Defines.h"
20 #include "R600InstrInfo.h"
21 #include "R600MachineFunctionInfo.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/IR/Argument.h"
29 #include "llvm/IR/Function.h"
33 R600TargetLowering::R600TargetLowering(TargetMachine
&TM
) :
34 AMDGPUTargetLowering(TM
),
35 Gen(TM
.getSubtarget
<AMDGPUSubtarget
>().getGeneration()) {
36 addRegisterClass(MVT::v4f32
, &AMDGPU::R600_Reg128RegClass
);
37 addRegisterClass(MVT::f32
, &AMDGPU::R600_Reg32RegClass
);
38 addRegisterClass(MVT::v4i32
, &AMDGPU::R600_Reg128RegClass
);
39 addRegisterClass(MVT::i32
, &AMDGPU::R600_Reg32RegClass
);
40 addRegisterClass(MVT::v2f32
, &AMDGPU::R600_Reg64RegClass
);
41 addRegisterClass(MVT::v2i32
, &AMDGPU::R600_Reg64RegClass
);
43 computeRegisterProperties();
45 // Set condition code actions
46 setCondCodeAction(ISD::SETO
, MVT::f32
, Expand
);
47 setCondCodeAction(ISD::SETUO
, MVT::f32
, Expand
);
48 setCondCodeAction(ISD::SETLT
, MVT::f32
, Expand
);
49 setCondCodeAction(ISD::SETLE
, MVT::f32
, Expand
);
50 setCondCodeAction(ISD::SETOLT
, MVT::f32
, Expand
);
51 setCondCodeAction(ISD::SETOLE
, MVT::f32
, Expand
);
52 setCondCodeAction(ISD::SETONE
, MVT::f32
, Expand
);
53 setCondCodeAction(ISD::SETUEQ
, MVT::f32
, Expand
);
54 setCondCodeAction(ISD::SETUGE
, MVT::f32
, Expand
);
55 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
56 setCondCodeAction(ISD::SETULT
, MVT::f32
, Expand
);
57 setCondCodeAction(ISD::SETULE
, MVT::f32
, Expand
);
59 setCondCodeAction(ISD::SETLE
, MVT::i32
, Expand
);
60 setCondCodeAction(ISD::SETLT
, MVT::i32
, Expand
);
61 setCondCodeAction(ISD::SETULE
, MVT::i32
, Expand
);
62 setCondCodeAction(ISD::SETULT
, MVT::i32
, Expand
);
64 setOperationAction(ISD::FCOS
, MVT::f32
, Custom
);
65 setOperationAction(ISD::FSIN
, MVT::f32
, Custom
);
67 setOperationAction(ISD::SETCC
, MVT::v4i32
, Expand
);
68 setOperationAction(ISD::SETCC
, MVT::v2i32
, Expand
);
70 setOperationAction(ISD::BR_CC
, MVT::i32
, Expand
);
71 setOperationAction(ISD::BR_CC
, MVT::f32
, Expand
);
72 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
74 setOperationAction(ISD::FSUB
, MVT::f32
, Expand
);
76 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
77 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
78 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i1
, Custom
);
80 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
81 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
83 setOperationAction(ISD::SETCC
, MVT::i32
, Expand
);
84 setOperationAction(ISD::SETCC
, MVT::f32
, Expand
);
85 setOperationAction(ISD::FP_TO_UINT
, MVT::i1
, Custom
);
86 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
87 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Custom
);
89 setOperationAction(ISD::SELECT
, MVT::i32
, Expand
);
90 setOperationAction(ISD::SELECT
, MVT::f32
, Expand
);
91 setOperationAction(ISD::SELECT
, MVT::v2i32
, Expand
);
92 setOperationAction(ISD::SELECT
, MVT::v4i32
, Expand
);
94 // Expand sign extension of vectors
95 if (!Subtarget
->hasBFE())
96 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
98 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i1
, Expand
);
99 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i1
, Expand
);
101 if (!Subtarget
->hasBFE())
102 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i8
, Expand
);
103 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i8
, Expand
);
104 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i8
, Expand
);
106 if (!Subtarget
->hasBFE())
107 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i16
, Expand
);
108 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i16
, Expand
);
109 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i16
, Expand
);
111 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i32
, Legal
);
112 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i32
, Expand
);
113 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i32
, Expand
);
115 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::Other
, Expand
);
118 // Legalize loads and stores to the private address space.
119 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
120 setOperationAction(ISD::LOAD
, MVT::v2i32
, Custom
);
121 setOperationAction(ISD::LOAD
, MVT::v4i32
, Custom
);
123 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
124 // spaces, so it is custom lowered to handle those where it isn't.
125 for (MVT VT
: MVT::integer_valuetypes()) {
126 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
127 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i8
, Custom
);
128 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i16
, Custom
);
130 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
131 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i8
, Custom
);
132 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i16
, Custom
);
134 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i1
, Promote
);
135 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i8
, Custom
);
136 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i16
, Custom
);
139 setOperationAction(ISD::STORE
, MVT::i8
, Custom
);
140 setOperationAction(ISD::STORE
, MVT::i32
, Custom
);
141 setOperationAction(ISD::STORE
, MVT::v2i32
, Custom
);
142 setOperationAction(ISD::STORE
, MVT::v4i32
, Custom
);
143 setTruncStoreAction(MVT::i32
, MVT::i8
, Custom
);
144 setTruncStoreAction(MVT::i32
, MVT::i16
, Custom
);
146 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
147 setOperationAction(ISD::LOAD
, MVT::v4i32
, Custom
);
148 setOperationAction(ISD::FrameIndex
, MVT::i32
, Custom
);
150 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2i32
, Custom
);
151 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f32
, Custom
);
152 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i32
, Custom
);
153 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Custom
);
155 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v2i32
, Custom
);
156 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v2f32
, Custom
);
157 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i32
, Custom
);
158 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
160 setTargetDAGCombine(ISD::FP_ROUND
);
161 setTargetDAGCombine(ISD::FP_TO_SINT
);
162 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT
);
163 setTargetDAGCombine(ISD::SELECT_CC
);
164 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT
);
166 setOperationAction(ISD::SUB
, MVT::i64
, Expand
);
168 // These should be replaced by UDVIREM, but it does not happen automatically
169 // during Type Legalization
170 setOperationAction(ISD::UDIV
, MVT::i64
, Custom
);
171 setOperationAction(ISD::UREM
, MVT::i64
, Custom
);
172 setOperationAction(ISD::SDIV
, MVT::i64
, Custom
);
173 setOperationAction(ISD::SREM
, MVT::i64
, Custom
);
175 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
176 // to be Legal/Custom in order to avoid library calls.
177 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Custom
);
178 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Custom
);
179 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Custom
);
181 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
183 const MVT ScalarIntVTs
[] = { MVT::i32
, MVT::i64
};
184 for (MVT VT
: ScalarIntVTs
) {
185 setOperationAction(ISD::ADDC
, VT
, Expand
);
186 setOperationAction(ISD::SUBC
, VT
, Expand
);
187 setOperationAction(ISD::ADDE
, VT
, Expand
);
188 setOperationAction(ISD::SUBE
, VT
, Expand
);
191 setSchedulingPreference(Sched::Source
);
194 MachineBasicBlock
* R600TargetLowering::EmitInstrWithCustomInserter(
195 MachineInstr
* MI
, MachineBasicBlock
* BB
) const {
196 MachineFunction
* MF
= BB
->getParent();
197 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
198 MachineBasicBlock::iterator I
= *MI
;
199 const R600InstrInfo
*TII
=
200 static_cast<const R600InstrInfo
*>(MF
->getSubtarget().getInstrInfo());
202 switch (MI
->getOpcode()) {
204 // Replace LDS_*_RET instruction that don't have any uses with the
205 // equivalent LDS_*_NORET instruction.
206 if (TII
->isLDSRetInstr(MI
->getOpcode())) {
207 int DstIdx
= TII
->getOperandIdx(MI
->getOpcode(), AMDGPU::OpName::dst
);
208 assert(DstIdx
!= -1);
209 MachineInstrBuilder NewMI
;
210 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
211 // LDS_1A2D support and remove this special case.
212 if (!MRI
.use_empty(MI
->getOperand(DstIdx
).getReg()) ||
213 MI
->getOpcode() == AMDGPU::LDS_CMPST_RET
)
216 NewMI
= BuildMI(*BB
, I
, BB
->findDebugLoc(I
),
217 TII
->get(AMDGPU::getLDSNoRetOp(MI
->getOpcode())));
218 for (unsigned i
= 1, e
= MI
->getNumOperands(); i
< e
; ++i
) {
219 NewMI
.addOperand(MI
->getOperand(i
));
222 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
225 case AMDGPU::CLAMP_R600
: {
226 MachineInstr
*NewMI
= TII
->buildDefaultInstruction(*BB
, I
,
228 MI
->getOperand(0).getReg(),
229 MI
->getOperand(1).getReg());
230 TII
->addFlag(NewMI
, 0, MO_FLAG_CLAMP
);
234 case AMDGPU::FABS_R600
: {
235 MachineInstr
*NewMI
= TII
->buildDefaultInstruction(*BB
, I
,
237 MI
->getOperand(0).getReg(),
238 MI
->getOperand(1).getReg());
239 TII
->addFlag(NewMI
, 0, MO_FLAG_ABS
);
243 case AMDGPU::FNEG_R600
: {
244 MachineInstr
*NewMI
= TII
->buildDefaultInstruction(*BB
, I
,
246 MI
->getOperand(0).getReg(),
247 MI
->getOperand(1).getReg());
248 TII
->addFlag(NewMI
, 0, MO_FLAG_NEG
);
252 case AMDGPU::MASK_WRITE
: {
253 unsigned maskedRegister
= MI
->getOperand(0).getReg();
254 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister
));
255 MachineInstr
* defInstr
= MRI
.getVRegDef(maskedRegister
);
256 TII
->addFlag(defInstr
, 0, MO_FLAG_MASK
);
260 case AMDGPU::MOV_IMM_F32
:
261 TII
->buildMovImm(*BB
, I
, MI
->getOperand(0).getReg(),
262 MI
->getOperand(1).getFPImm()->getValueAPF()
263 .bitcastToAPInt().getZExtValue());
265 case AMDGPU::MOV_IMM_I32
:
266 TII
->buildMovImm(*BB
, I
, MI
->getOperand(0).getReg(),
267 MI
->getOperand(1).getImm());
269 case AMDGPU::CONST_COPY
: {
270 MachineInstr
*NewMI
= TII
->buildDefaultInstruction(*BB
, MI
, AMDGPU::MOV
,
271 MI
->getOperand(0).getReg(), AMDGPU::ALU_CONST
);
272 TII
->setImmOperand(NewMI
, AMDGPU::OpName::src0_sel
,
273 MI
->getOperand(1).getImm());
277 case AMDGPU::RAT_WRITE_CACHELESS_32_eg
:
278 case AMDGPU::RAT_WRITE_CACHELESS_64_eg
:
279 case AMDGPU::RAT_WRITE_CACHELESS_128_eg
: {
280 unsigned EOP
= (std::next(I
)->getOpcode() == AMDGPU::RETURN
) ? 1 : 0;
282 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(MI
->getOpcode()))
283 .addOperand(MI
->getOperand(0))
284 .addOperand(MI
->getOperand(1))
285 .addImm(EOP
); // Set End of program bit
290 unsigned T0
= MRI
.createVirtualRegister(&AMDGPU::R600_Reg128RegClass
);
291 unsigned T1
= MRI
.createVirtualRegister(&AMDGPU::R600_Reg128RegClass
);
292 MachineOperand
&RID
= MI
->getOperand(4);
293 MachineOperand
&SID
= MI
->getOperand(5);
294 unsigned TextureId
= MI
->getOperand(6).getImm();
295 unsigned SrcX
= 0, SrcY
= 1, SrcZ
= 2, SrcW
= 3;
296 unsigned CTX
= 1, CTY
= 1, CTZ
= 1, CTW
= 1;
308 case 8: // ShadowRect
319 case 11: // Shadow1DArray
323 case 12: // Shadow2DArray
327 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_H
), T0
)
328 .addOperand(MI
->getOperand(3))
346 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_V
), T1
)
347 .addOperand(MI
->getOperand(2))
365 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SAMPLE_G
))
366 .addOperand(MI
->getOperand(0))
367 .addOperand(MI
->getOperand(1))
385 .addReg(T0
, RegState::Implicit
)
386 .addReg(T1
, RegState::Implicit
);
390 case AMDGPU::TXD_SHADOW
: {
391 unsigned T0
= MRI
.createVirtualRegister(&AMDGPU::R600_Reg128RegClass
);
392 unsigned T1
= MRI
.createVirtualRegister(&AMDGPU::R600_Reg128RegClass
);
393 MachineOperand
&RID
= MI
->getOperand(4);
394 MachineOperand
&SID
= MI
->getOperand(5);
395 unsigned TextureId
= MI
->getOperand(6).getImm();
396 unsigned SrcX
= 0, SrcY
= 1, SrcZ
= 2, SrcW
= 3;
397 unsigned CTX
= 1, CTY
= 1, CTZ
= 1, CTW
= 1;
409 case 8: // ShadowRect
420 case 11: // Shadow1DArray
424 case 12: // Shadow2DArray
429 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_H
), T0
)
430 .addOperand(MI
->getOperand(3))
448 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_V
), T1
)
449 .addOperand(MI
->getOperand(2))
467 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SAMPLE_C_G
))
468 .addOperand(MI
->getOperand(0))
469 .addOperand(MI
->getOperand(1))
487 .addReg(T0
, RegState::Implicit
)
488 .addReg(T1
, RegState::Implicit
);
493 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP
))
494 .addOperand(MI
->getOperand(0));
497 case AMDGPU::BRANCH_COND_f32
: {
498 MachineInstr
*NewMI
=
499 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::PRED_X
),
500 AMDGPU::PREDICATE_BIT
)
501 .addOperand(MI
->getOperand(1))
502 .addImm(OPCODE_IS_NOT_ZERO
)
504 TII
->addFlag(NewMI
, 0, MO_FLAG_PUSH
);
505 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP_COND
))
506 .addOperand(MI
->getOperand(0))
507 .addReg(AMDGPU::PREDICATE_BIT
, RegState::Kill
);
511 case AMDGPU::BRANCH_COND_i32
: {
512 MachineInstr
*NewMI
=
513 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::PRED_X
),
514 AMDGPU::PREDICATE_BIT
)
515 .addOperand(MI
->getOperand(1))
516 .addImm(OPCODE_IS_NOT_ZERO_INT
)
518 TII
->addFlag(NewMI
, 0, MO_FLAG_PUSH
);
519 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP_COND
))
520 .addOperand(MI
->getOperand(0))
521 .addReg(AMDGPU::PREDICATE_BIT
, RegState::Kill
);
525 case AMDGPU::EG_ExportSwz
:
526 case AMDGPU::R600_ExportSwz
: {
527 // Instruction is left unmodified if its not the last one of its type
528 bool isLastInstructionOfItsType
= true;
529 unsigned InstExportType
= MI
->getOperand(1).getImm();
530 for (MachineBasicBlock::iterator NextExportInst
= std::next(I
),
531 EndBlock
= BB
->end(); NextExportInst
!= EndBlock
;
532 NextExportInst
= std::next(NextExportInst
)) {
533 if (NextExportInst
->getOpcode() == AMDGPU::EG_ExportSwz
||
534 NextExportInst
->getOpcode() == AMDGPU::R600_ExportSwz
) {
535 unsigned CurrentInstExportType
= NextExportInst
->getOperand(1)
537 if (CurrentInstExportType
== InstExportType
) {
538 isLastInstructionOfItsType
= false;
543 bool EOP
= (std::next(I
)->getOpcode() == AMDGPU::RETURN
) ? 1 : 0;
544 if (!EOP
&& !isLastInstructionOfItsType
)
546 unsigned CfInst
= (MI
->getOpcode() == AMDGPU::EG_ExportSwz
)? 84 : 40;
547 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(MI
->getOpcode()))
548 .addOperand(MI
->getOperand(0))
549 .addOperand(MI
->getOperand(1))
550 .addOperand(MI
->getOperand(2))
551 .addOperand(MI
->getOperand(3))
552 .addOperand(MI
->getOperand(4))
553 .addOperand(MI
->getOperand(5))
554 .addOperand(MI
->getOperand(6))
559 case AMDGPU::RETURN
: {
560 // RETURN instructions must have the live-out registers as implicit uses,
561 // otherwise they appear dead.
562 R600MachineFunctionInfo
*MFI
= MF
->getInfo
<R600MachineFunctionInfo
>();
563 MachineInstrBuilder
MIB(*MF
, MI
);
564 for (unsigned i
= 0, e
= MFI
->LiveOuts
.size(); i
!= e
; ++i
)
565 MIB
.addReg(MFI
->LiveOuts
[i
], RegState::Implicit
);
570 MI
->eraseFromParent();
574 //===----------------------------------------------------------------------===//
575 // Custom DAG Lowering Operations
576 //===----------------------------------------------------------------------===//
578 SDValue
R600TargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
579 MachineFunction
&MF
= DAG
.getMachineFunction();
580 R600MachineFunctionInfo
*MFI
= MF
.getInfo
<R600MachineFunctionInfo
>();
581 switch (Op
.getOpcode()) {
582 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
583 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
584 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
585 case ISD::SHL_PARTS
: return LowerSHLParts(Op
, DAG
);
587 case ISD::SRL_PARTS
: return LowerSRXParts(Op
, DAG
);
589 case ISD::FSIN
: return LowerTrig(Op
, DAG
);
590 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
591 case ISD::STORE
: return LowerSTORE(Op
, DAG
);
593 SDValue Result
= LowerLOAD(Op
, DAG
);
594 assert((!Result
.getNode() ||
595 Result
.getNode()->getNumValues() == 2) &&
596 "Load should return a value and a chain");
600 case ISD::BRCOND
: return LowerBRCOND(Op
, DAG
);
601 case ISD::GlobalAddress
: return LowerGlobalAddress(MFI
, Op
, DAG
);
602 case ISD::INTRINSIC_VOID
: {
603 SDValue Chain
= Op
.getOperand(0);
604 unsigned IntrinsicID
=
605 cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
606 switch (IntrinsicID
) {
607 case AMDGPUIntrinsic::AMDGPU_store_output
: {
608 int64_t RegIndex
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
609 unsigned Reg
= AMDGPU::R600_TReg32RegClass
.getRegister(RegIndex
);
610 MFI
->LiveOuts
.push_back(Reg
);
611 return DAG
.getCopyToReg(Chain
, SDLoc(Op
), Reg
, Op
.getOperand(2));
613 case AMDGPUIntrinsic::R600_store_swizzle
: {
614 const SDValue Args
[8] = {
616 Op
.getOperand(2), // Export Value
617 Op
.getOperand(3), // ArrayBase
618 Op
.getOperand(4), // Type
619 DAG
.getConstant(0, MVT::i32
), // SWZ_X
620 DAG
.getConstant(1, MVT::i32
), // SWZ_Y
621 DAG
.getConstant(2, MVT::i32
), // SWZ_Z
622 DAG
.getConstant(3, MVT::i32
) // SWZ_W
624 return DAG
.getNode(AMDGPUISD::EXPORT
, SDLoc(Op
), Op
.getValueType(), Args
);
627 // default for switch(IntrinsicID)
630 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
633 case ISD::INTRINSIC_WO_CHAIN
: {
634 unsigned IntrinsicID
=
635 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
636 EVT VT
= Op
.getValueType();
638 switch(IntrinsicID
) {
639 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
640 case AMDGPUIntrinsic::R600_load_input
: {
641 int64_t RegIndex
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
642 unsigned Reg
= AMDGPU::R600_TReg32RegClass
.getRegister(RegIndex
);
643 MachineFunction
&MF
= DAG
.getMachineFunction();
644 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
646 return DAG
.getCopyFromReg(DAG
.getEntryNode(),
647 SDLoc(DAG
.getEntryNode()), Reg
, VT
);
650 case AMDGPUIntrinsic::R600_interp_input
: {
651 int slot
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
652 int ijb
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getSExtValue();
653 MachineSDNode
*interp
;
655 const MachineFunction
&MF
= DAG
.getMachineFunction();
656 const R600InstrInfo
*TII
= static_cast<const R600InstrInfo
*>(
657 MF
.getSubtarget().getInstrInfo());
658 interp
= DAG
.getMachineNode(AMDGPU::INTERP_VEC_LOAD
, DL
,
659 MVT::v4f32
, DAG
.getTargetConstant(slot
/ 4 , MVT::i32
));
660 return DAG
.getTargetExtractSubreg(
661 TII
->getRegisterInfo().getSubRegFromChannel(slot
% 4),
662 DL
, MVT::f32
, SDValue(interp
, 0));
664 MachineFunction
&MF
= DAG
.getMachineFunction();
665 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
666 unsigned RegisterI
= AMDGPU::R600_TReg32RegClass
.getRegister(2 * ijb
);
667 unsigned RegisterJ
= AMDGPU::R600_TReg32RegClass
.getRegister(2 * ijb
+ 1);
668 MRI
.addLiveIn(RegisterI
);
669 MRI
.addLiveIn(RegisterJ
);
670 SDValue RegisterINode
= DAG
.getCopyFromReg(DAG
.getEntryNode(),
671 SDLoc(DAG
.getEntryNode()), RegisterI
, MVT::f32
);
672 SDValue RegisterJNode
= DAG
.getCopyFromReg(DAG
.getEntryNode(),
673 SDLoc(DAG
.getEntryNode()), RegisterJ
, MVT::f32
);
676 interp
= DAG
.getMachineNode(AMDGPU::INTERP_PAIR_XY
, DL
,
677 MVT::f32
, MVT::f32
, DAG
.getTargetConstant(slot
/ 4 , MVT::i32
),
678 RegisterJNode
, RegisterINode
);
680 interp
= DAG
.getMachineNode(AMDGPU::INTERP_PAIR_ZW
, DL
,
681 MVT::f32
, MVT::f32
, DAG
.getTargetConstant(slot
/ 4 , MVT::i32
),
682 RegisterJNode
, RegisterINode
);
683 return SDValue(interp
, slot
% 2);
685 case AMDGPUIntrinsic::R600_interp_xy
:
686 case AMDGPUIntrinsic::R600_interp_zw
: {
687 int slot
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
688 MachineSDNode
*interp
;
689 SDValue RegisterINode
= Op
.getOperand(2);
690 SDValue RegisterJNode
= Op
.getOperand(3);
692 if (IntrinsicID
== AMDGPUIntrinsic::R600_interp_xy
)
693 interp
= DAG
.getMachineNode(AMDGPU::INTERP_PAIR_XY
, DL
,
694 MVT::f32
, MVT::f32
, DAG
.getTargetConstant(slot
, MVT::i32
),
695 RegisterJNode
, RegisterINode
);
697 interp
= DAG
.getMachineNode(AMDGPU::INTERP_PAIR_ZW
, DL
,
698 MVT::f32
, MVT::f32
, DAG
.getTargetConstant(slot
, MVT::i32
),
699 RegisterJNode
, RegisterINode
);
700 return DAG
.getNode(ISD::BUILD_VECTOR
, DL
, MVT::v2f32
,
701 SDValue(interp
, 0), SDValue(interp
, 1));
703 case AMDGPUIntrinsic::R600_tex
:
704 case AMDGPUIntrinsic::R600_texc
:
705 case AMDGPUIntrinsic::R600_txl
:
706 case AMDGPUIntrinsic::R600_txlc
:
707 case AMDGPUIntrinsic::R600_txb
:
708 case AMDGPUIntrinsic::R600_txbc
:
709 case AMDGPUIntrinsic::R600_txf
:
710 case AMDGPUIntrinsic::R600_txq
:
711 case AMDGPUIntrinsic::R600_ddx
:
712 case AMDGPUIntrinsic::R600_ddy
:
713 case AMDGPUIntrinsic::R600_ldptr
: {
715 switch (IntrinsicID
) {
716 case AMDGPUIntrinsic::R600_tex
:
719 case AMDGPUIntrinsic::R600_texc
:
722 case AMDGPUIntrinsic::R600_txl
:
725 case AMDGPUIntrinsic::R600_txlc
:
728 case AMDGPUIntrinsic::R600_txb
:
731 case AMDGPUIntrinsic::R600_txbc
:
734 case AMDGPUIntrinsic::R600_txf
:
737 case AMDGPUIntrinsic::R600_txq
:
740 case AMDGPUIntrinsic::R600_ddx
:
743 case AMDGPUIntrinsic::R600_ddy
:
746 case AMDGPUIntrinsic::R600_ldptr
:
750 llvm_unreachable("Unknow Texture Operation");
753 SDValue TexArgs
[19] = {
754 DAG
.getConstant(TextureOp
, MVT::i32
),
756 DAG
.getConstant(0, MVT::i32
),
757 DAG
.getConstant(1, MVT::i32
),
758 DAG
.getConstant(2, MVT::i32
),
759 DAG
.getConstant(3, MVT::i32
),
763 DAG
.getConstant(0, MVT::i32
),
764 DAG
.getConstant(1, MVT::i32
),
765 DAG
.getConstant(2, MVT::i32
),
766 DAG
.getConstant(3, MVT::i32
),
774 return DAG
.getNode(AMDGPUISD::TEXTURE_FETCH
, DL
, MVT::v4f32
, TexArgs
);
776 case AMDGPUIntrinsic::AMDGPU_dp4
: {
778 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(1),
779 DAG
.getConstant(0, MVT::i32
)),
780 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(2),
781 DAG
.getConstant(0, MVT::i32
)),
782 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(1),
783 DAG
.getConstant(1, MVT::i32
)),
784 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(2),
785 DAG
.getConstant(1, MVT::i32
)),
786 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(1),
787 DAG
.getConstant(2, MVT::i32
)),
788 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(2),
789 DAG
.getConstant(2, MVT::i32
)),
790 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(1),
791 DAG
.getConstant(3, MVT::i32
)),
792 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(2),
793 DAG
.getConstant(3, MVT::i32
))
795 return DAG
.getNode(AMDGPUISD::DOT4
, DL
, MVT::f32
, Args
);
798 case Intrinsic::r600_read_ngroups_x
:
799 return LowerImplicitParameter(DAG
, VT
, DL
, 0);
800 case Intrinsic::r600_read_ngroups_y
:
801 return LowerImplicitParameter(DAG
, VT
, DL
, 1);
802 case Intrinsic::r600_read_ngroups_z
:
803 return LowerImplicitParameter(DAG
, VT
, DL
, 2);
804 case Intrinsic::r600_read_global_size_x
:
805 return LowerImplicitParameter(DAG
, VT
, DL
, 3);
806 case Intrinsic::r600_read_global_size_y
:
807 return LowerImplicitParameter(DAG
, VT
, DL
, 4);
808 case Intrinsic::r600_read_global_size_z
:
809 return LowerImplicitParameter(DAG
, VT
, DL
, 5);
810 case Intrinsic::r600_read_local_size_x
:
811 return LowerImplicitParameter(DAG
, VT
, DL
, 6);
812 case Intrinsic::r600_read_local_size_y
:
813 return LowerImplicitParameter(DAG
, VT
, DL
, 7);
814 case Intrinsic::r600_read_local_size_z
:
815 return LowerImplicitParameter(DAG
, VT
, DL
, 8);
817 case Intrinsic::AMDGPU_read_workdim
:
818 return LowerImplicitParameter(DAG
, VT
, DL
, MFI
->ABIArgOffset
/ 4);
820 case Intrinsic::r600_read_tgid_x
:
821 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
823 case Intrinsic::r600_read_tgid_y
:
824 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
826 case Intrinsic::r600_read_tgid_z
:
827 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
829 case Intrinsic::r600_read_tidig_x
:
830 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
832 case Intrinsic::r600_read_tidig_y
:
833 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
835 case Intrinsic::r600_read_tidig_z
:
836 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
838 case Intrinsic::AMDGPU_rsq
:
839 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
840 return DAG
.getNode(AMDGPUISD::RSQ_LEGACY
, DL
, VT
, Op
.getOperand(1));
842 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
845 } // end switch(Op.getOpcode())
849 void R600TargetLowering::ReplaceNodeResults(SDNode
*N
,
850 SmallVectorImpl
<SDValue
> &Results
,
851 SelectionDAG
&DAG
) const {
852 switch (N
->getOpcode()) {
854 AMDGPUTargetLowering::ReplaceNodeResults(N
, Results
, DAG
);
856 case ISD::FP_TO_UINT
:
857 if (N
->getValueType(0) == MVT::i1
) {
858 Results
.push_back(LowerFPTOUINT(N
->getOperand(0), DAG
));
861 // Fall-through. Since we don't care about out of bounds values
862 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
863 // considers some extra cases which are not necessary here.
864 case ISD::FP_TO_SINT
: {
866 if (expandFP_TO_SINT(N
, Result
, DAG
))
867 Results
.push_back(Result
);
871 SDValue Op
= SDValue(N
, 0);
873 EVT VT
= Op
.getValueType();
874 SDValue UDIVREM
= DAG
.getNode(ISD::UDIVREM
, DL
, DAG
.getVTList(VT
, VT
),
875 N
->getOperand(0), N
->getOperand(1));
876 Results
.push_back(UDIVREM
);
880 SDValue Op
= SDValue(N
, 0);
882 EVT VT
= Op
.getValueType();
883 SDValue UDIVREM
= DAG
.getNode(ISD::UDIVREM
, DL
, DAG
.getVTList(VT
, VT
),
884 N
->getOperand(0), N
->getOperand(1));
885 Results
.push_back(UDIVREM
.getValue(1));
889 SDValue Op
= SDValue(N
, 0);
891 EVT VT
= Op
.getValueType();
892 SDValue SDIVREM
= DAG
.getNode(ISD::SDIVREM
, DL
, DAG
.getVTList(VT
, VT
),
893 N
->getOperand(0), N
->getOperand(1));
894 Results
.push_back(SDIVREM
);
898 SDValue Op
= SDValue(N
, 0);
900 EVT VT
= Op
.getValueType();
901 SDValue SDIVREM
= DAG
.getNode(ISD::SDIVREM
, DL
, DAG
.getVTList(VT
, VT
),
902 N
->getOperand(0), N
->getOperand(1));
903 Results
.push_back(SDIVREM
.getValue(1));
907 SDValue Op
= SDValue(N
, 1);
908 SDValue RES
= LowerSDIVREM(Op
, DAG
);
909 Results
.push_back(RES
);
910 Results
.push_back(RES
.getValue(1));
914 SDValue Op
= SDValue(N
, 0);
915 LowerUDIVREM64(Op
, DAG
, Results
);
921 SDValue
R600TargetLowering::vectorToVerticalVector(SelectionDAG
&DAG
,
922 SDValue Vector
) const {
925 EVT VecVT
= Vector
.getValueType();
926 EVT EltVT
= VecVT
.getVectorElementType();
927 SmallVector
<SDValue
, 8> Args
;
929 for (unsigned i
= 0, e
= VecVT
.getVectorNumElements();
931 Args
.push_back(DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
,
932 Vector
, DAG
.getConstant(i
, getVectorIdxTy())));
935 return DAG
.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR
, DL
, VecVT
, Args
);
938 SDValue
R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
939 SelectionDAG
&DAG
) const {
942 SDValue Vector
= Op
.getOperand(0);
943 SDValue Index
= Op
.getOperand(1);
945 if (isa
<ConstantSDNode
>(Index
) ||
946 Vector
.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR
)
949 Vector
= vectorToVerticalVector(DAG
, Vector
);
950 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, Op
.getValueType(),
954 SDValue
R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
955 SelectionDAG
&DAG
) const {
957 SDValue Vector
= Op
.getOperand(0);
958 SDValue Value
= Op
.getOperand(1);
959 SDValue Index
= Op
.getOperand(2);
961 if (isa
<ConstantSDNode
>(Index
) ||
962 Vector
.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR
)
965 Vector
= vectorToVerticalVector(DAG
, Vector
);
966 SDValue Insert
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, Op
.getValueType(),
967 Vector
, Value
, Index
);
968 return vectorToVerticalVector(DAG
, Insert
);
971 SDValue
R600TargetLowering::LowerTrig(SDValue Op
, SelectionDAG
&DAG
) const {
972 // On hw >= R700, COS/SIN input must be between -1. and 1.
973 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
974 EVT VT
= Op
.getValueType();
975 SDValue Arg
= Op
.getOperand(0);
976 SDValue FractPart
= DAG
.getNode(AMDGPUISD::FRACT
, SDLoc(Op
), VT
,
977 DAG
.getNode(ISD::FADD
, SDLoc(Op
), VT
,
978 DAG
.getNode(ISD::FMUL
, SDLoc(Op
), VT
, Arg
,
979 DAG
.getConstantFP(0.15915494309, MVT::f32
)),
980 DAG
.getConstantFP(0.5, MVT::f32
)));
982 switch (Op
.getOpcode()) {
984 TrigNode
= AMDGPUISD::COS_HW
;
987 TrigNode
= AMDGPUISD::SIN_HW
;
990 llvm_unreachable("Wrong trig opcode");
992 SDValue TrigVal
= DAG
.getNode(TrigNode
, SDLoc(Op
), VT
,
993 DAG
.getNode(ISD::FADD
, SDLoc(Op
), VT
, FractPart
,
994 DAG
.getConstantFP(-0.5, MVT::f32
)));
995 if (Gen
>= AMDGPUSubtarget::R700
)
997 // On R600 hw, COS/SIN input must be between -Pi and Pi.
998 return DAG
.getNode(ISD::FMUL
, SDLoc(Op
), VT
, TrigVal
,
999 DAG
.getConstantFP(3.14159265359, MVT::f32
));
1002 SDValue
R600TargetLowering::LowerSHLParts(SDValue Op
, SelectionDAG
&DAG
) const {
1004 EVT VT
= Op
.getValueType();
1006 SDValue Lo
= Op
.getOperand(0);
1007 SDValue Hi
= Op
.getOperand(1);
1008 SDValue Shift
= Op
.getOperand(2);
1009 SDValue Zero
= DAG
.getConstant(0, VT
);
1010 SDValue One
= DAG
.getConstant(1, VT
);
1012 SDValue Width
= DAG
.getConstant(VT
.getSizeInBits(), VT
);
1013 SDValue Width1
= DAG
.getConstant(VT
.getSizeInBits() - 1, VT
);
1014 SDValue BigShift
= DAG
.getNode(ISD::SUB
, DL
, VT
, Shift
, Width
);
1015 SDValue CompShift
= DAG
.getNode(ISD::SUB
, DL
, VT
, Width1
, Shift
);
1017 // The dance around Width1 is necessary for 0 special case.
1018 // Without it the CompShift might be 32, producing incorrect results in
1019 // Overflow. So we do the shift in two steps, the alternative is to
1020 // add a conditional to filter the special case.
1022 SDValue Overflow
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, CompShift
);
1023 Overflow
= DAG
.getNode(ISD::SRL
, DL
, VT
, Overflow
, One
);
1025 SDValue HiSmall
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, Shift
);
1026 HiSmall
= DAG
.getNode(ISD::OR
, DL
, VT
, HiSmall
, Overflow
);
1027 SDValue LoSmall
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, Shift
);
1029 SDValue HiBig
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, BigShift
);
1030 SDValue LoBig
= Zero
;
1032 Hi
= DAG
.getSelectCC(DL
, Shift
, Width
, HiSmall
, HiBig
, ISD::SETULT
);
1033 Lo
= DAG
.getSelectCC(DL
, Shift
, Width
, LoSmall
, LoBig
, ISD::SETULT
);
1035 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, DAG
.getVTList(VT
,VT
), Lo
, Hi
);
1038 SDValue
R600TargetLowering::LowerSRXParts(SDValue Op
, SelectionDAG
&DAG
) const {
1040 EVT VT
= Op
.getValueType();
1042 SDValue Lo
= Op
.getOperand(0);
1043 SDValue Hi
= Op
.getOperand(1);
1044 SDValue Shift
= Op
.getOperand(2);
1045 SDValue Zero
= DAG
.getConstant(0, VT
);
1046 SDValue One
= DAG
.getConstant(1, VT
);
1048 const bool SRA
= Op
.getOpcode() == ISD::SRA_PARTS
;
1050 SDValue Width
= DAG
.getConstant(VT
.getSizeInBits(), VT
);
1051 SDValue Width1
= DAG
.getConstant(VT
.getSizeInBits() - 1, VT
);
1052 SDValue BigShift
= DAG
.getNode(ISD::SUB
, DL
, VT
, Shift
, Width
);
1053 SDValue CompShift
= DAG
.getNode(ISD::SUB
, DL
, VT
, Width1
, Shift
);
1055 // The dance around Width1 is necessary for 0 special case.
1056 // Without it the CompShift might be 32, producing incorrect results in
1057 // Overflow. So we do the shift in two steps, the alternative is to
1058 // add a conditional to filter the special case.
1060 SDValue Overflow
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, CompShift
);
1061 Overflow
= DAG
.getNode(ISD::SHL
, DL
, VT
, Overflow
, One
);
1063 SDValue HiSmall
= DAG
.getNode(SRA
? ISD::SRA
: ISD::SRL
, DL
, VT
, Hi
, Shift
);
1064 SDValue LoSmall
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, Shift
);
1065 LoSmall
= DAG
.getNode(ISD::OR
, DL
, VT
, LoSmall
, Overflow
);
1067 SDValue LoBig
= DAG
.getNode(SRA
? ISD::SRA
: ISD::SRL
, DL
, VT
, Hi
, BigShift
);
1068 SDValue HiBig
= SRA
? DAG
.getNode(ISD::SRA
, DL
, VT
, Hi
, Width1
) : Zero
;
1070 Hi
= DAG
.getSelectCC(DL
, Shift
, Width
, HiSmall
, HiBig
, ISD::SETULT
);
1071 Lo
= DAG
.getSelectCC(DL
, Shift
, Width
, LoSmall
, LoBig
, ISD::SETULT
);
1073 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, DAG
.getVTList(VT
,VT
), Lo
, Hi
);
1076 SDValue
R600TargetLowering::LowerFPTOUINT(SDValue Op
, SelectionDAG
&DAG
) const {
1081 Op
, DAG
.getConstantFP(0.0f
, MVT::f32
),
1082 DAG
.getCondCode(ISD::SETNE
)
1086 SDValue
R600TargetLowering::LowerImplicitParameter(SelectionDAG
&DAG
, EVT VT
,
1088 unsigned DwordOffset
) const {
1089 unsigned ByteOffset
= DwordOffset
* 4;
1090 PointerType
* PtrType
= PointerType::get(VT
.getTypeForEVT(*DAG
.getContext()),
1091 AMDGPUAS::CONSTANT_BUFFER_0
);
1093 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1094 assert(isInt
<16>(ByteOffset
));
1096 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
1097 DAG
.getConstant(ByteOffset
, MVT::i32
), // PTR
1098 MachinePointerInfo(ConstantPointerNull::get(PtrType
)),
1099 false, false, false, 0);
1102 bool R600TargetLowering::isZero(SDValue Op
) const {
1103 if(ConstantSDNode
*Cst
= dyn_cast
<ConstantSDNode
>(Op
)) {
1104 return Cst
->isNullValue();
1105 } else if(ConstantFPSDNode
*CstFP
= dyn_cast
<ConstantFPSDNode
>(Op
)){
1106 return CstFP
->isZero();
1112 SDValue
R600TargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const {
1114 EVT VT
= Op
.getValueType();
1116 SDValue LHS
= Op
.getOperand(0);
1117 SDValue RHS
= Op
.getOperand(1);
1118 SDValue True
= Op
.getOperand(2);
1119 SDValue False
= Op
.getOperand(3);
1120 SDValue CC
= Op
.getOperand(4);
1123 if (VT
== MVT::f32
) {
1124 DAGCombinerInfo
DCI(DAG
, AfterLegalizeVectorOps
, true, nullptr);
1125 SDValue MinMax
= CombineFMinMaxLegacy(DL
, VT
, LHS
, RHS
, True
, False
, CC
, DCI
);
1130 // LHS and RHS are guaranteed to be the same value type
1131 EVT CompareVT
= LHS
.getValueType();
1133 // Check if we can lower this to a native operation.
1135 // Try to lower to a SET* instruction:
1137 // SET* can match the following patterns:
1139 // select_cc f32, f32, -1, 0, cc_supported
1140 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1141 // select_cc i32, i32, -1, 0, cc_supported
1144 // Move hardware True/False values to the correct operand.
1145 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
1146 ISD::CondCode InverseCC
=
1147 ISD::getSetCCInverse(CCOpcode
, CompareVT
== MVT::i32
);
1148 if (isHWTrueValue(False
) && isHWFalseValue(True
)) {
1149 if (isCondCodeLegal(InverseCC
, CompareVT
.getSimpleVT())) {
1150 std::swap(False
, True
);
1151 CC
= DAG
.getCondCode(InverseCC
);
1153 ISD::CondCode SwapInvCC
= ISD::getSetCCSwappedOperands(InverseCC
);
1154 if (isCondCodeLegal(SwapInvCC
, CompareVT
.getSimpleVT())) {
1155 std::swap(False
, True
);
1156 std::swap(LHS
, RHS
);
1157 CC
= DAG
.getCondCode(SwapInvCC
);
1162 if (isHWTrueValue(True
) && isHWFalseValue(False
) &&
1163 (CompareVT
== VT
|| VT
== MVT::i32
)) {
1164 // This can be matched by a SET* instruction.
1165 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
1168 // Try to lower to a CND* instruction:
1170 // CND* can match the following patterns:
1172 // select_cc f32, 0.0, f32, f32, cc_supported
1173 // select_cc f32, 0.0, i32, i32, cc_supported
1174 // select_cc i32, 0, f32, f32, cc_supported
1175 // select_cc i32, 0, i32, i32, cc_supported
1178 // Try to move the zero value to the RHS
1180 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
1181 // Try swapping the operands
1182 ISD::CondCode CCSwapped
= ISD::getSetCCSwappedOperands(CCOpcode
);
1183 if (isCondCodeLegal(CCSwapped
, CompareVT
.getSimpleVT())) {
1184 std::swap(LHS
, RHS
);
1185 CC
= DAG
.getCondCode(CCSwapped
);
1187 // Try inverting the conditon and then swapping the operands
1188 ISD::CondCode CCInv
= ISD::getSetCCInverse(CCOpcode
, CompareVT
.isInteger());
1189 CCSwapped
= ISD::getSetCCSwappedOperands(CCInv
);
1190 if (isCondCodeLegal(CCSwapped
, CompareVT
.getSimpleVT())) {
1191 std::swap(True
, False
);
1192 std::swap(LHS
, RHS
);
1193 CC
= DAG
.getCondCode(CCSwapped
);
1200 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
1201 if (CompareVT
!= VT
) {
1202 // Bitcast True / False to the correct types. This will end up being
1203 // a nop, but it allows us to define only a single pattern in the
1204 // .TD files for each CND* instruction rather than having to have
1205 // one pattern for integer True/False and one for fp True/False
1206 True
= DAG
.getNode(ISD::BITCAST
, DL
, CompareVT
, True
);
1207 False
= DAG
.getNode(ISD::BITCAST
, DL
, CompareVT
, False
);
1214 CCOpcode
= ISD::getSetCCInverse(CCOpcode
, CompareVT
== MVT::i32
);
1222 SDValue SelectNode
= DAG
.getNode(ISD::SELECT_CC
, DL
, CompareVT
,
1225 DAG
.getCondCode(CCOpcode
));
1226 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, SelectNode
);
1229 // If we make it this for it means we have no native instructions to handle
1230 // this SELECT_CC, so we must lower it.
1231 SDValue HWTrue
, HWFalse
;
1233 if (CompareVT
== MVT::f32
) {
1234 HWTrue
= DAG
.getConstantFP(1.0f
, CompareVT
);
1235 HWFalse
= DAG
.getConstantFP(0.0f
, CompareVT
);
1236 } else if (CompareVT
== MVT::i32
) {
1237 HWTrue
= DAG
.getConstant(-1, CompareVT
);
1238 HWFalse
= DAG
.getConstant(0, CompareVT
);
1241 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1244 // Lower this unsupported SELECT_CC into a combination of two supported
1245 // SELECT_CC operations.
1246 SDValue Cond
= DAG
.getNode(ISD::SELECT_CC
, DL
, CompareVT
, LHS
, RHS
, HWTrue
, HWFalse
, CC
);
1248 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
,
1251 DAG
.getCondCode(ISD::SETNE
));
1254 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1255 /// convert these pointers to a register index. Each register holds
1256 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1257 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1258 /// for indirect addressing.
1259 SDValue
R600TargetLowering::stackPtrToRegIndex(SDValue Ptr
,
1260 unsigned StackWidth
,
1261 SelectionDAG
&DAG
) const {
1263 switch(StackWidth
) {
1273 default: llvm_unreachable("Invalid stack width");
1276 return DAG
.getNode(ISD::SRL
, SDLoc(Ptr
), Ptr
.getValueType(), Ptr
,
1277 DAG
.getConstant(SRLPad
, MVT::i32
));
1280 void R600TargetLowering::getStackAddress(unsigned StackWidth
,
1283 unsigned &PtrIncr
) const {
1284 switch (StackWidth
) {
1295 Channel
= ElemIdx
% 2;
1309 SDValue
R600TargetLowering::LowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
1311 StoreSDNode
*StoreNode
= cast
<StoreSDNode
>(Op
);
1312 SDValue Chain
= Op
.getOperand(0);
1313 SDValue Value
= Op
.getOperand(1);
1314 SDValue Ptr
= Op
.getOperand(2);
1316 SDValue Result
= AMDGPUTargetLowering::LowerSTORE(Op
, DAG
);
1317 if (Result
.getNode()) {
1321 if (StoreNode
->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS
) {
1322 if (StoreNode
->isTruncatingStore()) {
1323 EVT VT
= Value
.getValueType();
1324 assert(VT
.bitsLE(MVT::i32
));
1325 EVT MemVT
= StoreNode
->getMemoryVT();
1326 SDValue MaskConstant
;
1327 if (MemVT
== MVT::i8
) {
1328 MaskConstant
= DAG
.getConstant(0xFF, MVT::i32
);
1330 assert(MemVT
== MVT::i16
);
1331 MaskConstant
= DAG
.getConstant(0xFFFF, MVT::i32
);
1333 SDValue DWordAddr
= DAG
.getNode(ISD::SRL
, DL
, VT
, Ptr
,
1334 DAG
.getConstant(2, MVT::i32
));
1335 SDValue ByteIndex
= DAG
.getNode(ISD::AND
, DL
, Ptr
.getValueType(), Ptr
,
1336 DAG
.getConstant(0x00000003, VT
));
1337 SDValue TruncValue
= DAG
.getNode(ISD::AND
, DL
, VT
, Value
, MaskConstant
);
1338 SDValue Shift
= DAG
.getNode(ISD::SHL
, DL
, VT
, ByteIndex
,
1339 DAG
.getConstant(3, VT
));
1340 SDValue ShiftedValue
= DAG
.getNode(ISD::SHL
, DL
, VT
, TruncValue
, Shift
);
1341 SDValue Mask
= DAG
.getNode(ISD::SHL
, DL
, VT
, MaskConstant
, Shift
);
1342 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1346 DAG
.getConstant(0, MVT::i32
),
1347 DAG
.getConstant(0, MVT::i32
),
1350 SDValue Input
= DAG
.getNode(ISD::BUILD_VECTOR
, DL
, MVT::v4i32
, Src
);
1351 SDValue Args
[3] = { Chain
, Input
, DWordAddr
};
1352 return DAG
.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR
, DL
,
1353 Op
->getVTList(), Args
, MemVT
,
1354 StoreNode
->getMemOperand());
1355 } else if (Ptr
->getOpcode() != AMDGPUISD::DWORDADDR
&&
1356 Value
.getValueType().bitsGE(MVT::i32
)) {
1357 // Convert pointer from byte address to dword address.
1358 Ptr
= DAG
.getNode(AMDGPUISD::DWORDADDR
, DL
, Ptr
.getValueType(),
1359 DAG
.getNode(ISD::SRL
, DL
, Ptr
.getValueType(),
1360 Ptr
, DAG
.getConstant(2, MVT::i32
)));
1362 if (StoreNode
->isTruncatingStore() || StoreNode
->isIndexed()) {
1363 llvm_unreachable("Truncated and indexed stores not supported yet");
1365 Chain
= DAG
.getStore(Chain
, DL
, Value
, Ptr
, StoreNode
->getMemOperand());
1371 EVT ValueVT
= Value
.getValueType();
1373 if (StoreNode
->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS
) {
1377 SDValue Ret
= AMDGPUTargetLowering::LowerSTORE(Op
, DAG
);
1378 if (Ret
.getNode()) {
1381 // Lowering for indirect addressing
1383 const MachineFunction
&MF
= DAG
.getMachineFunction();
1384 const AMDGPUFrameLowering
*TFL
= static_cast<const AMDGPUFrameLowering
*>(
1385 getTargetMachine().getSubtargetImpl()->getFrameLowering());
1386 unsigned StackWidth
= TFL
->getStackWidth(MF
);
1388 Ptr
= stackPtrToRegIndex(Ptr
, StackWidth
, DAG
);
1390 if (ValueVT
.isVector()) {
1391 unsigned NumElemVT
= ValueVT
.getVectorNumElements();
1392 EVT ElemVT
= ValueVT
.getVectorElementType();
1393 SmallVector
<SDValue
, 4> Stores(NumElemVT
);
1395 assert(NumElemVT
>= StackWidth
&& "Stack width cannot be greater than "
1396 "vector width in load");
1398 for (unsigned i
= 0; i
< NumElemVT
; ++i
) {
1399 unsigned Channel
, PtrIncr
;
1400 getStackAddress(StackWidth
, i
, Channel
, PtrIncr
);
1401 Ptr
= DAG
.getNode(ISD::ADD
, DL
, MVT::i32
, Ptr
,
1402 DAG
.getConstant(PtrIncr
, MVT::i32
));
1403 SDValue Elem
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, ElemVT
,
1404 Value
, DAG
.getConstant(i
, MVT::i32
));
1406 Stores
[i
] = DAG
.getNode(AMDGPUISD::REGISTER_STORE
, DL
, MVT::Other
,
1408 DAG
.getTargetConstant(Channel
, MVT::i32
));
1410 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, Stores
);
1412 if (ValueVT
== MVT::i8
) {
1413 Value
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i32
, Value
);
1415 Chain
= DAG
.getNode(AMDGPUISD::REGISTER_STORE
, DL
, MVT::Other
, Chain
, Value
, Ptr
,
1416 DAG
.getTargetConstant(0, MVT::i32
)); // Channel
1422 // return (512 + (kc_bank << 12)
1424 ConstantAddressBlock(unsigned AddressSpace
) {
1425 switch (AddressSpace
) {
1426 case AMDGPUAS::CONSTANT_BUFFER_0
:
1428 case AMDGPUAS::CONSTANT_BUFFER_1
:
1430 case AMDGPUAS::CONSTANT_BUFFER_2
:
1431 return 512 + 4096 * 2;
1432 case AMDGPUAS::CONSTANT_BUFFER_3
:
1433 return 512 + 4096 * 3;
1434 case AMDGPUAS::CONSTANT_BUFFER_4
:
1435 return 512 + 4096 * 4;
1436 case AMDGPUAS::CONSTANT_BUFFER_5
:
1437 return 512 + 4096 * 5;
1438 case AMDGPUAS::CONSTANT_BUFFER_6
:
1439 return 512 + 4096 * 6;
1440 case AMDGPUAS::CONSTANT_BUFFER_7
:
1441 return 512 + 4096 * 7;
1442 case AMDGPUAS::CONSTANT_BUFFER_8
:
1443 return 512 + 4096 * 8;
1444 case AMDGPUAS::CONSTANT_BUFFER_9
:
1445 return 512 + 4096 * 9;
1446 case AMDGPUAS::CONSTANT_BUFFER_10
:
1447 return 512 + 4096 * 10;
1448 case AMDGPUAS::CONSTANT_BUFFER_11
:
1449 return 512 + 4096 * 11;
1450 case AMDGPUAS::CONSTANT_BUFFER_12
:
1451 return 512 + 4096 * 12;
1452 case AMDGPUAS::CONSTANT_BUFFER_13
:
1453 return 512 + 4096 * 13;
1454 case AMDGPUAS::CONSTANT_BUFFER_14
:
1455 return 512 + 4096 * 14;
1456 case AMDGPUAS::CONSTANT_BUFFER_15
:
1457 return 512 + 4096 * 15;
1463 SDValue
R600TargetLowering::LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const
1465 EVT VT
= Op
.getValueType();
1467 LoadSDNode
*LoadNode
= cast
<LoadSDNode
>(Op
);
1468 SDValue Chain
= Op
.getOperand(0);
1469 SDValue Ptr
= Op
.getOperand(1);
1470 SDValue LoweredLoad
;
1472 SDValue Ret
= AMDGPUTargetLowering::LowerLOAD(Op
, DAG
);
1473 if (Ret
.getNode()) {
1478 return DAG
.getMergeValues(Ops
, DL
);
1481 // Lower loads constant address space global variable loads
1482 if (LoadNode
->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS
&&
1483 isa
<GlobalVariable
>(
1484 GetUnderlyingObject(LoadNode
->getMemOperand()->getValue()))) {
1486 SDValue Ptr
= DAG
.getZExtOrTrunc(LoadNode
->getBasePtr(), DL
,
1487 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS
));
1488 Ptr
= DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, Ptr
,
1489 DAG
.getConstant(2, MVT::i32
));
1490 return DAG
.getNode(AMDGPUISD::REGISTER_LOAD
, DL
, Op
->getVTList(),
1491 LoadNode
->getChain(), Ptr
,
1492 DAG
.getTargetConstant(0, MVT::i32
), Op
.getOperand(2));
1495 if (LoadNode
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
&& VT
.isVector()) {
1496 SDValue MergedValues
[2] = {
1497 ScalarizeVectorLoad(Op
, DAG
),
1500 return DAG
.getMergeValues(MergedValues
, DL
);
1503 int ConstantBlock
= ConstantAddressBlock(LoadNode
->getAddressSpace());
1504 if (ConstantBlock
> -1 &&
1505 ((LoadNode
->getExtensionType() == ISD::NON_EXTLOAD
) ||
1506 (LoadNode
->getExtensionType() == ISD::ZEXTLOAD
))) {
1508 if (isa
<ConstantExpr
>(LoadNode
->getMemOperand()->getValue()) ||
1509 isa
<Constant
>(LoadNode
->getMemOperand()->getValue()) ||
1510 isa
<ConstantSDNode
>(Ptr
)) {
1512 for (unsigned i
= 0; i
< 4; i
++) {
1513 // We want Const position encoded with the following formula :
1514 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1515 // const_index is Ptr computed by llvm using an alignment of 16.
1516 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1517 // then div by 4 at the ISel step
1518 SDValue NewPtr
= DAG
.getNode(ISD::ADD
, DL
, Ptr
.getValueType(), Ptr
,
1519 DAG
.getConstant(4 * i
+ ConstantBlock
* 16, MVT::i32
));
1520 Slots
[i
] = DAG
.getNode(AMDGPUISD::CONST_ADDRESS
, DL
, MVT::i32
, NewPtr
);
1522 EVT NewVT
= MVT::v4i32
;
1523 unsigned NumElements
= 4;
1524 if (VT
.isVector()) {
1526 NumElements
= VT
.getVectorNumElements();
1528 Result
= DAG
.getNode(ISD::BUILD_VECTOR
, DL
, NewVT
,
1529 makeArrayRef(Slots
, NumElements
));
1531 // non-constant ptr can't be folded, keeps it as a v4f32 load
1532 Result
= DAG
.getNode(AMDGPUISD::CONST_ADDRESS
, DL
, MVT::v4i32
,
1533 DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, Ptr
, DAG
.getConstant(4, MVT::i32
)),
1534 DAG
.getConstant(LoadNode
->getAddressSpace() -
1535 AMDGPUAS::CONSTANT_BUFFER_0
, MVT::i32
)
1539 if (!VT
.isVector()) {
1540 Result
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
, Result
,
1541 DAG
.getConstant(0, MVT::i32
));
1544 SDValue MergedValues
[2] = {
1548 return DAG
.getMergeValues(MergedValues
, DL
);
1551 // For most operations returning SDValue() will result in the node being
1552 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1553 // need to manually expand loads that may be legal in some address spaces and
1554 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1555 // compute shaders, since the data is sign extended when it is uploaded to the
1556 // buffer. However SEXT loads from other address spaces are not supported, so
1557 // we need to expand them here.
1558 if (LoadNode
->getExtensionType() == ISD::SEXTLOAD
) {
1559 EVT MemVT
= LoadNode
->getMemoryVT();
1560 assert(!MemVT
.isVector() && (MemVT
== MVT::i16
|| MemVT
== MVT::i8
));
1561 SDValue ShiftAmount
=
1562 DAG
.getConstant(VT
.getSizeInBits() - MemVT
.getSizeInBits(), MVT::i32
);
1563 SDValue NewLoad
= DAG
.getExtLoad(ISD::EXTLOAD
, DL
, VT
, Chain
, Ptr
,
1564 LoadNode
->getPointerInfo(), MemVT
,
1565 LoadNode
->isVolatile(),
1566 LoadNode
->isNonTemporal(),
1567 LoadNode
->isInvariant(),
1568 LoadNode
->getAlignment());
1569 SDValue Shl
= DAG
.getNode(ISD::SHL
, DL
, VT
, NewLoad
, ShiftAmount
);
1570 SDValue Sra
= DAG
.getNode(ISD::SRA
, DL
, VT
, Shl
, ShiftAmount
);
1572 SDValue MergedValues
[2] = { Sra
, Chain
};
1573 return DAG
.getMergeValues(MergedValues
, DL
);
1576 if (LoadNode
->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS
) {
1580 // Lowering for indirect addressing
1581 const MachineFunction
&MF
= DAG
.getMachineFunction();
1582 const AMDGPUFrameLowering
*TFL
= static_cast<const AMDGPUFrameLowering
*>(
1583 getTargetMachine().getSubtargetImpl()->getFrameLowering());
1584 unsigned StackWidth
= TFL
->getStackWidth(MF
);
1586 Ptr
= stackPtrToRegIndex(Ptr
, StackWidth
, DAG
);
1588 if (VT
.isVector()) {
1589 unsigned NumElemVT
= VT
.getVectorNumElements();
1590 EVT ElemVT
= VT
.getVectorElementType();
1593 assert(NumElemVT
>= StackWidth
&& "Stack width cannot be greater than "
1594 "vector width in load");
1596 for (unsigned i
= 0; i
< NumElemVT
; ++i
) {
1597 unsigned Channel
, PtrIncr
;
1598 getStackAddress(StackWidth
, i
, Channel
, PtrIncr
);
1599 Ptr
= DAG
.getNode(ISD::ADD
, DL
, MVT::i32
, Ptr
,
1600 DAG
.getConstant(PtrIncr
, MVT::i32
));
1601 Loads
[i
] = DAG
.getNode(AMDGPUISD::REGISTER_LOAD
, DL
, ElemVT
,
1603 DAG
.getTargetConstant(Channel
, MVT::i32
),
1606 for (unsigned i
= NumElemVT
; i
< 4; ++i
) {
1607 Loads
[i
] = DAG
.getUNDEF(ElemVT
);
1609 EVT TargetVT
= EVT::getVectorVT(*DAG
.getContext(), ElemVT
, 4);
1610 LoweredLoad
= DAG
.getNode(ISD::BUILD_VECTOR
, DL
, TargetVT
, Loads
);
1612 LoweredLoad
= DAG
.getNode(AMDGPUISD::REGISTER_LOAD
, DL
, VT
,
1614 DAG
.getTargetConstant(0, MVT::i32
), // Channel
1623 return DAG
.getMergeValues(Ops
, DL
);
1626 SDValue
R600TargetLowering::LowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const {
1627 SDValue Chain
= Op
.getOperand(0);
1628 SDValue Cond
= Op
.getOperand(1);
1629 SDValue Jump
= Op
.getOperand(2);
1631 return DAG
.getNode(AMDGPUISD::BRANCH_COND
, SDLoc(Op
), Op
.getValueType(),
1635 /// XXX Only kernel functions are supported, so we can assume for now that
1636 /// every function is a kernel function, but in the future we should use
1637 /// separate calling conventions for kernel and non-kernel functions.
1638 SDValue
R600TargetLowering::LowerFormalArguments(
1640 CallingConv::ID CallConv
,
1642 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
1643 SDLoc DL
, SelectionDAG
&DAG
,
1644 SmallVectorImpl
<SDValue
> &InVals
) const {
1645 SmallVector
<CCValAssign
, 16> ArgLocs
;
1646 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), ArgLocs
,
1648 MachineFunction
&MF
= DAG
.getMachineFunction();
1649 R600MachineFunctionInfo
*MFI
= MF
.getInfo
<R600MachineFunctionInfo
>();
1651 SmallVector
<ISD::InputArg
, 8> LocalIns
;
1653 getOriginalFunctionArgs(DAG
, MF
.getFunction(), Ins
, LocalIns
);
1655 AnalyzeFormalArguments(CCInfo
, LocalIns
);
1657 for (unsigned i
= 0, e
= Ins
.size(); i
< e
; ++i
) {
1658 CCValAssign
&VA
= ArgLocs
[i
];
1659 const ISD::InputArg
&In
= Ins
[i
];
1661 EVT MemVT
= VA
.getLocVT();
1662 if (!VT
.isVector() && MemVT
.isVector()) {
1663 // Get load source type if scalarized.
1664 MemVT
= MemVT
.getVectorElementType();
1667 if (MFI
->getShaderType() != ShaderType::COMPUTE
) {
1668 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), &AMDGPU::R600_Reg128RegClass
);
1669 SDValue Register
= DAG
.getCopyFromReg(Chain
, DL
, Reg
, VT
);
1670 InVals
.push_back(Register
);
1674 PointerType
*PtrTy
= PointerType::get(VT
.getTypeForEVT(*DAG
.getContext()),
1675 AMDGPUAS::CONSTANT_BUFFER_0
);
1677 // i64 isn't a legal type, so the register type used ends up as i32, which
1678 // isn't expected here. It attempts to create this sextload, but it ends up
1679 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1682 // The first 36 bytes of the input buffer contains information about
1683 // thread group and global sizes.
1684 ISD::LoadExtType Ext
= ISD::NON_EXTLOAD
;
1685 if (MemVT
.getScalarSizeInBits() != VT
.getScalarSizeInBits()) {
1686 // FIXME: This should really check the extload type, but the handling of
1687 // extload vector parameters seems to be broken.
1689 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1690 Ext
= ISD::SEXTLOAD
;
1693 // Compute the offset from the value.
1694 // XXX - I think PartOffset should give you this, but it seems to give the
1695 // size of the register which isn't useful.
1697 unsigned ValBase
= ArgLocs
[In
.OrigArgIndex
].getLocMemOffset();
1698 unsigned PartOffset
= VA
.getLocMemOffset();
1699 unsigned Offset
= 36 + VA
.getLocMemOffset();
1701 MachinePointerInfo
PtrInfo(UndefValue::get(PtrTy
), PartOffset
- ValBase
);
1702 SDValue Arg
= DAG
.getLoad(ISD::UNINDEXED
, Ext
, VT
, DL
, Chain
,
1703 DAG
.getConstant(Offset
, MVT::i32
),
1704 DAG
.getUNDEF(MVT::i32
),
1706 MemVT
, false, true, true, 4);
1708 // 4 is the preferred alignment for the CONSTANT memory space.
1709 InVals
.push_back(Arg
);
1710 MFI
->ABIArgOffset
= Offset
+ MemVT
.getStoreSize();
1715 EVT
R600TargetLowering::getSetCCResultType(LLVMContext
&, EVT VT
) const {
1718 return VT
.changeVectorElementTypeToInteger();
1721 static SDValue
CompactSwizzlableVector(
1722 SelectionDAG
&DAG
, SDValue VectorEntry
,
1723 DenseMap
<unsigned, unsigned> &RemapSwizzle
) {
1724 assert(VectorEntry
.getOpcode() == ISD::BUILD_VECTOR
);
1725 assert(RemapSwizzle
.empty());
1726 SDValue NewBldVec
[4] = {
1727 VectorEntry
.getOperand(0),
1728 VectorEntry
.getOperand(1),
1729 VectorEntry
.getOperand(2),
1730 VectorEntry
.getOperand(3)
1733 for (unsigned i
= 0; i
< 4; i
++) {
1734 if (NewBldVec
[i
].getOpcode() == ISD::UNDEF
)
1735 // We mask write here to teach later passes that the ith element of this
1736 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1737 // break false dependencies and additionnaly make assembly easier to read.
1738 RemapSwizzle
[i
] = 7; // SEL_MASK_WRITE
1739 if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(NewBldVec
[i
])) {
1741 RemapSwizzle
[i
] = 4; // SEL_0
1742 NewBldVec
[i
] = DAG
.getUNDEF(MVT::f32
);
1743 } else if (C
->isExactlyValue(1.0)) {
1744 RemapSwizzle
[i
] = 5; // SEL_1
1745 NewBldVec
[i
] = DAG
.getUNDEF(MVT::f32
);
1749 if (NewBldVec
[i
].getOpcode() == ISD::UNDEF
)
1751 for (unsigned j
= 0; j
< i
; j
++) {
1752 if (NewBldVec
[i
] == NewBldVec
[j
]) {
1753 NewBldVec
[i
] = DAG
.getUNDEF(NewBldVec
[i
].getValueType());
1754 RemapSwizzle
[i
] = j
;
1760 return DAG
.getNode(ISD::BUILD_VECTOR
, SDLoc(VectorEntry
),
1761 VectorEntry
.getValueType(), NewBldVec
);
1764 static SDValue
ReorganizeVector(SelectionDAG
&DAG
, SDValue VectorEntry
,
1765 DenseMap
<unsigned, unsigned> &RemapSwizzle
) {
1766 assert(VectorEntry
.getOpcode() == ISD::BUILD_VECTOR
);
1767 assert(RemapSwizzle
.empty());
1768 SDValue NewBldVec
[4] = {
1769 VectorEntry
.getOperand(0),
1770 VectorEntry
.getOperand(1),
1771 VectorEntry
.getOperand(2),
1772 VectorEntry
.getOperand(3)
1774 bool isUnmovable
[4] = { false, false, false, false };
1775 for (unsigned i
= 0; i
< 4; i
++) {
1776 RemapSwizzle
[i
] = i
;
1777 if (NewBldVec
[i
].getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
1778 unsigned Idx
= dyn_cast
<ConstantSDNode
>(NewBldVec
[i
].getOperand(1))
1781 isUnmovable
[Idx
] = true;
1785 for (unsigned i
= 0; i
< 4; i
++) {
1786 if (NewBldVec
[i
].getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
1787 unsigned Idx
= dyn_cast
<ConstantSDNode
>(NewBldVec
[i
].getOperand(1))
1789 if (isUnmovable
[Idx
])
1792 std::swap(NewBldVec
[Idx
], NewBldVec
[i
]);
1793 std::swap(RemapSwizzle
[i
], RemapSwizzle
[Idx
]);
1798 return DAG
.getNode(ISD::BUILD_VECTOR
, SDLoc(VectorEntry
),
1799 VectorEntry
.getValueType(), NewBldVec
);
1803 SDValue
R600TargetLowering::OptimizeSwizzle(SDValue BuildVector
,
1804 SDValue Swz
[4], SelectionDAG
&DAG
) const {
1805 assert(BuildVector
.getOpcode() == ISD::BUILD_VECTOR
);
1806 // Old -> New swizzle values
1807 DenseMap
<unsigned, unsigned> SwizzleRemap
;
1809 BuildVector
= CompactSwizzlableVector(DAG
, BuildVector
, SwizzleRemap
);
1810 for (unsigned i
= 0; i
< 4; i
++) {
1811 unsigned Idx
= dyn_cast
<ConstantSDNode
>(Swz
[i
])->getZExtValue();
1812 if (SwizzleRemap
.find(Idx
) != SwizzleRemap
.end())
1813 Swz
[i
] = DAG
.getConstant(SwizzleRemap
[Idx
], MVT::i32
);
1816 SwizzleRemap
.clear();
1817 BuildVector
= ReorganizeVector(DAG
, BuildVector
, SwizzleRemap
);
1818 for (unsigned i
= 0; i
< 4; i
++) {
1819 unsigned Idx
= dyn_cast
<ConstantSDNode
>(Swz
[i
])->getZExtValue();
1820 if (SwizzleRemap
.find(Idx
) != SwizzleRemap
.end())
1821 Swz
[i
] = DAG
.getConstant(SwizzleRemap
[Idx
], MVT::i32
);
1828 //===----------------------------------------------------------------------===//
1829 // Custom DAG Optimizations
1830 //===----------------------------------------------------------------------===//
1832 SDValue
R600TargetLowering::PerformDAGCombine(SDNode
*N
,
1833 DAGCombinerInfo
&DCI
) const {
1834 SelectionDAG
&DAG
= DCI
.DAG
;
1836 switch (N
->getOpcode()) {
1837 default: return AMDGPUTargetLowering::PerformDAGCombine(N
, DCI
);
1838 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1839 case ISD::FP_ROUND
: {
1840 SDValue Arg
= N
->getOperand(0);
1841 if (Arg
.getOpcode() == ISD::UINT_TO_FP
&& Arg
.getValueType() == MVT::f64
) {
1842 return DAG
.getNode(ISD::UINT_TO_FP
, SDLoc(N
), N
->getValueType(0),
1848 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1849 // (i32 select_cc f32, f32, -1, 0 cc)
1851 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1852 // this to one of the SET*_DX10 instructions.
1853 case ISD::FP_TO_SINT
: {
1854 SDValue FNeg
= N
->getOperand(0);
1855 if (FNeg
.getOpcode() != ISD::FNEG
) {
1858 SDValue SelectCC
= FNeg
.getOperand(0);
1859 if (SelectCC
.getOpcode() != ISD::SELECT_CC
||
1860 SelectCC
.getOperand(0).getValueType() != MVT::f32
|| // LHS
1861 SelectCC
.getOperand(2).getValueType() != MVT::f32
|| // True
1862 !isHWTrueValue(SelectCC
.getOperand(2)) ||
1863 !isHWFalseValue(SelectCC
.getOperand(3))) {
1867 return DAG
.getNode(ISD::SELECT_CC
, SDLoc(N
), N
->getValueType(0),
1868 SelectCC
.getOperand(0), // LHS
1869 SelectCC
.getOperand(1), // RHS
1870 DAG
.getConstant(-1, MVT::i32
), // True
1871 DAG
.getConstant(0, MVT::i32
), // Flase
1872 SelectCC
.getOperand(4)); // CC
1877 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1878 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1879 case ISD::INSERT_VECTOR_ELT
: {
1880 SDValue InVec
= N
->getOperand(0);
1881 SDValue InVal
= N
->getOperand(1);
1882 SDValue EltNo
= N
->getOperand(2);
1885 // If the inserted element is an UNDEF, just use the input vector.
1886 if (InVal
.getOpcode() == ISD::UNDEF
)
1889 EVT VT
= InVec
.getValueType();
1891 // If we can't generate a legal BUILD_VECTOR, exit
1892 if (!isOperationLegal(ISD::BUILD_VECTOR
, VT
))
1895 // Check that we know which element is being inserted
1896 if (!isa
<ConstantSDNode
>(EltNo
))
1898 unsigned Elt
= cast
<ConstantSDNode
>(EltNo
)->getZExtValue();
1900 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1901 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1903 SmallVector
<SDValue
, 8> Ops
;
1904 if (InVec
.getOpcode() == ISD::BUILD_VECTOR
) {
1905 Ops
.append(InVec
.getNode()->op_begin(),
1906 InVec
.getNode()->op_end());
1907 } else if (InVec
.getOpcode() == ISD::UNDEF
) {
1908 unsigned NElts
= VT
.getVectorNumElements();
1909 Ops
.append(NElts
, DAG
.getUNDEF(InVal
.getValueType()));
1914 // Insert the element
1915 if (Elt
< Ops
.size()) {
1916 // All the operands of BUILD_VECTOR must have the same type;
1917 // we enforce that here.
1918 EVT OpVT
= Ops
[0].getValueType();
1919 if (InVal
.getValueType() != OpVT
)
1920 InVal
= OpVT
.bitsGT(InVal
.getValueType()) ?
1921 DAG
.getNode(ISD::ANY_EXTEND
, dl
, OpVT
, InVal
) :
1922 DAG
.getNode(ISD::TRUNCATE
, dl
, OpVT
, InVal
);
1926 // Return the new vector
1927 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, VT
, Ops
);
1930 // Extract_vec (Build_vector) generated by custom lowering
1931 // also needs to be customly combined
1932 case ISD::EXTRACT_VECTOR_ELT
: {
1933 SDValue Arg
= N
->getOperand(0);
1934 if (Arg
.getOpcode() == ISD::BUILD_VECTOR
) {
1935 if (ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
1936 unsigned Element
= Const
->getZExtValue();
1937 return Arg
->getOperand(Element
);
1940 if (Arg
.getOpcode() == ISD::BITCAST
&&
1941 Arg
.getOperand(0).getOpcode() == ISD::BUILD_VECTOR
) {
1942 if (ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
1943 unsigned Element
= Const
->getZExtValue();
1944 return DAG
.getNode(ISD::BITCAST
, SDLoc(N
), N
->getVTList(),
1945 Arg
->getOperand(0).getOperand(Element
));
1950 case ISD::SELECT_CC
: {
1951 // Try common optimizations
1952 SDValue Ret
= AMDGPUTargetLowering::PerformDAGCombine(N
, DCI
);
1956 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1957 // selectcc x, y, a, b, inv(cc)
1959 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1960 // selectcc x, y, a, b, cc
1961 SDValue LHS
= N
->getOperand(0);
1962 if (LHS
.getOpcode() != ISD::SELECT_CC
) {
1966 SDValue RHS
= N
->getOperand(1);
1967 SDValue True
= N
->getOperand(2);
1968 SDValue False
= N
->getOperand(3);
1969 ISD::CondCode NCC
= cast
<CondCodeSDNode
>(N
->getOperand(4))->get();
1971 if (LHS
.getOperand(2).getNode() != True
.getNode() ||
1972 LHS
.getOperand(3).getNode() != False
.getNode() ||
1973 RHS
.getNode() != False
.getNode()) {
1978 default: return SDValue();
1979 case ISD::SETNE
: return LHS
;
1981 ISD::CondCode LHSCC
= cast
<CondCodeSDNode
>(LHS
.getOperand(4))->get();
1982 LHSCC
= ISD::getSetCCInverse(LHSCC
,
1983 LHS
.getOperand(0).getValueType().isInteger());
1984 if (DCI
.isBeforeLegalizeOps() ||
1985 isCondCodeLegal(LHSCC
, LHS
.getOperand(0).getSimpleValueType()))
1986 return DAG
.getSelectCC(SDLoc(N
),
1998 case AMDGPUISD::EXPORT
: {
1999 SDValue Arg
= N
->getOperand(1);
2000 if (Arg
.getOpcode() != ISD::BUILD_VECTOR
)
2003 SDValue NewArgs
[8] = {
2004 N
->getOperand(0), // Chain
2006 N
->getOperand(2), // ArrayBase
2007 N
->getOperand(3), // Type
2008 N
->getOperand(4), // SWZ_X
2009 N
->getOperand(5), // SWZ_Y
2010 N
->getOperand(6), // SWZ_Z
2011 N
->getOperand(7) // SWZ_W
2014 NewArgs
[1] = OptimizeSwizzle(N
->getOperand(1), &NewArgs
[4], DAG
);
2015 return DAG
.getNode(AMDGPUISD::EXPORT
, DL
, N
->getVTList(), NewArgs
);
2017 case AMDGPUISD::TEXTURE_FETCH
: {
2018 SDValue Arg
= N
->getOperand(1);
2019 if (Arg
.getOpcode() != ISD::BUILD_VECTOR
)
2022 SDValue NewArgs
[19] = {
2043 NewArgs
[1] = OptimizeSwizzle(N
->getOperand(1), &NewArgs
[2], DAG
);
2044 return DAG
.getNode(AMDGPUISD::TEXTURE_FETCH
, SDLoc(N
), N
->getVTList(),
2049 return AMDGPUTargetLowering::PerformDAGCombine(N
, DCI
);
2053 FoldOperand(SDNode
*ParentNode
, unsigned SrcIdx
, SDValue
&Src
, SDValue
&Neg
,
2054 SDValue
&Abs
, SDValue
&Sel
, SDValue
&Imm
, SelectionDAG
&DAG
) {
2055 const R600InstrInfo
*TII
=
2056 static_cast<const R600InstrInfo
*>(DAG
.getSubtarget().getInstrInfo());
2057 if (!Src
.isMachineOpcode())
2059 switch (Src
.getMachineOpcode()) {
2060 case AMDGPU::FNEG_R600
:
2063 Src
= Src
.getOperand(0);
2064 Neg
= DAG
.getTargetConstant(1, MVT::i32
);
2066 case AMDGPU::FABS_R600
:
2069 Src
= Src
.getOperand(0);
2070 Abs
= DAG
.getTargetConstant(1, MVT::i32
);
2072 case AMDGPU::CONST_COPY
: {
2073 unsigned Opcode
= ParentNode
->getMachineOpcode();
2074 bool HasDst
= TII
->getOperandIdx(Opcode
, AMDGPU::OpName::dst
) > -1;
2079 SDValue CstOffset
= Src
.getOperand(0);
2080 if (ParentNode
->getValueType(0).isVector())
2083 // Gather constants values
2084 int SrcIndices
[] = {
2085 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0
),
2086 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1
),
2087 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src2
),
2088 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_X
),
2089 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_Y
),
2090 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_Z
),
2091 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_W
),
2092 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_X
),
2093 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_Y
),
2094 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_Z
),
2095 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_W
)
2097 std::vector
<unsigned> Consts
;
2098 for (int OtherSrcIdx
: SrcIndices
) {
2099 int OtherSelIdx
= TII
->getSelIdx(Opcode
, OtherSrcIdx
);
2100 if (OtherSrcIdx
< 0 || OtherSelIdx
< 0)
2106 if (RegisterSDNode
*Reg
=
2107 dyn_cast
<RegisterSDNode
>(ParentNode
->getOperand(OtherSrcIdx
))) {
2108 if (Reg
->getReg() == AMDGPU::ALU_CONST
) {
2110 = cast
<ConstantSDNode
>(ParentNode
->getOperand(OtherSelIdx
));
2111 Consts
.push_back(Cst
->getZExtValue());
2116 ConstantSDNode
*Cst
= cast
<ConstantSDNode
>(CstOffset
);
2117 Consts
.push_back(Cst
->getZExtValue());
2118 if (!TII
->fitsConstReadLimitations(Consts
)) {
2123 Src
= DAG
.getRegister(AMDGPU::ALU_CONST
, MVT::f32
);
2126 case AMDGPU::MOV_IMM_I32
:
2127 case AMDGPU::MOV_IMM_F32
: {
2128 unsigned ImmReg
= AMDGPU::ALU_LITERAL_X
;
2129 uint64_t ImmValue
= 0;
2132 if (Src
.getMachineOpcode() == AMDGPU::MOV_IMM_F32
) {
2133 ConstantFPSDNode
*FPC
= dyn_cast
<ConstantFPSDNode
>(Src
.getOperand(0));
2134 float FloatValue
= FPC
->getValueAPF().convertToFloat();
2135 if (FloatValue
== 0.0) {
2136 ImmReg
= AMDGPU::ZERO
;
2137 } else if (FloatValue
== 0.5) {
2138 ImmReg
= AMDGPU::HALF
;
2139 } else if (FloatValue
== 1.0) {
2140 ImmReg
= AMDGPU::ONE
;
2142 ImmValue
= FPC
->getValueAPF().bitcastToAPInt().getZExtValue();
2145 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Src
.getOperand(0));
2146 uint64_t Value
= C
->getZExtValue();
2148 ImmReg
= AMDGPU::ZERO
;
2149 } else if (Value
== 1) {
2150 ImmReg
= AMDGPU::ONE_INT
;
2156 // Check that we aren't already using an immediate.
2157 // XXX: It's possible for an instruction to have more than one
2158 // immediate operand, but this is not supported yet.
2159 if (ImmReg
== AMDGPU::ALU_LITERAL_X
) {
2162 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Imm
);
2164 if (C
->getZExtValue())
2166 Imm
= DAG
.getTargetConstant(ImmValue
, MVT::i32
);
2168 Src
= DAG
.getRegister(ImmReg
, MVT::i32
);
2177 /// \brief Fold the instructions after selecting them
2178 SDNode
*R600TargetLowering::PostISelFolding(MachineSDNode
*Node
,
2179 SelectionDAG
&DAG
) const {
2180 const R600InstrInfo
*TII
=
2181 static_cast<const R600InstrInfo
*>(DAG
.getSubtarget().getInstrInfo());
2182 if (!Node
->isMachineOpcode())
2184 unsigned Opcode
= Node
->getMachineOpcode();
2187 std::vector
<SDValue
> Ops
;
2188 for (const SDUse
&I
: Node
->ops())
2191 if (Opcode
== AMDGPU::DOT_4
) {
2192 int OperandIdx
[] = {
2193 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_X
),
2194 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_Y
),
2195 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_Z
),
2196 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_W
),
2197 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_X
),
2198 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_Y
),
2199 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_Z
),
2200 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_W
)
2203 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_neg_X
),
2204 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_neg_Y
),
2205 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_neg_Z
),
2206 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_neg_W
),
2207 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_neg_X
),
2208 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_neg_Y
),
2209 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_neg_Z
),
2210 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_neg_W
)
2213 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_abs_X
),
2214 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_abs_Y
),
2215 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_abs_Z
),
2216 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_abs_W
),
2217 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_abs_X
),
2218 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_abs_Y
),
2219 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_abs_Z
),
2220 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_abs_W
)
2222 for (unsigned i
= 0; i
< 8; i
++) {
2223 if (OperandIdx
[i
] < 0)
2225 SDValue
&Src
= Ops
[OperandIdx
[i
] - 1];
2226 SDValue
&Neg
= Ops
[NegIdx
[i
] - 1];
2227 SDValue
&Abs
= Ops
[AbsIdx
[i
] - 1];
2228 bool HasDst
= TII
->getOperandIdx(Opcode
, AMDGPU::OpName::dst
) > -1;
2229 int SelIdx
= TII
->getSelIdx(Opcode
, OperandIdx
[i
]);
2232 SDValue
&Sel
= (SelIdx
> -1) ? Ops
[SelIdx
] : FakeOp
;
2233 if (FoldOperand(Node
, i
, Src
, Neg
, Abs
, Sel
, FakeOp
, DAG
))
2234 return DAG
.getMachineNode(Opcode
, SDLoc(Node
), Node
->getVTList(), Ops
);
2236 } else if (Opcode
== AMDGPU::REG_SEQUENCE
) {
2237 for (unsigned i
= 1, e
= Node
->getNumOperands(); i
< e
; i
+= 2) {
2238 SDValue
&Src
= Ops
[i
];
2239 if (FoldOperand(Node
, i
, Src
, FakeOp
, FakeOp
, FakeOp
, FakeOp
, DAG
))
2240 return DAG
.getMachineNode(Opcode
, SDLoc(Node
), Node
->getVTList(), Ops
);
2242 } else if (Opcode
== AMDGPU::CLAMP_R600
) {
2243 SDValue Src
= Node
->getOperand(0);
2244 if (!Src
.isMachineOpcode() ||
2245 !TII
->hasInstrModifiers(Src
.getMachineOpcode()))
2247 int ClampIdx
= TII
->getOperandIdx(Src
.getMachineOpcode(),
2248 AMDGPU::OpName::clamp
);
2251 std::vector
<SDValue
> Ops
;
2252 unsigned NumOp
= Src
.getNumOperands();
2253 for(unsigned i
= 0; i
< NumOp
; ++i
)
2254 Ops
.push_back(Src
.getOperand(i
));
2255 Ops
[ClampIdx
- 1] = DAG
.getTargetConstant(1, MVT::i32
);
2256 return DAG
.getMachineNode(Src
.getMachineOpcode(), SDLoc(Node
),
2257 Node
->getVTList(), Ops
);
2259 if (!TII
->hasInstrModifiers(Opcode
))
2261 int OperandIdx
[] = {
2262 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0
),
2263 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1
),
2264 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src2
)
2267 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_neg
),
2268 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_neg
),
2269 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src2_neg
)
2272 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src0_abs
),
2273 TII
->getOperandIdx(Opcode
, AMDGPU::OpName::src1_abs
),
2276 for (unsigned i
= 0; i
< 3; i
++) {
2277 if (OperandIdx
[i
] < 0)
2279 SDValue
&Src
= Ops
[OperandIdx
[i
] - 1];
2280 SDValue
&Neg
= Ops
[NegIdx
[i
] - 1];
2282 SDValue
&Abs
= (AbsIdx
[i
] > -1) ? Ops
[AbsIdx
[i
] - 1] : FakeAbs
;
2283 bool HasDst
= TII
->getOperandIdx(Opcode
, AMDGPU::OpName::dst
) > -1;
2284 int SelIdx
= TII
->getSelIdx(Opcode
, OperandIdx
[i
]);
2285 int ImmIdx
= TII
->getOperandIdx(Opcode
, AMDGPU::OpName::literal
);
2290 SDValue
&Sel
= (SelIdx
> -1) ? Ops
[SelIdx
] : FakeOp
;
2291 SDValue
&Imm
= Ops
[ImmIdx
];
2292 if (FoldOperand(Node
, i
, Src
, Neg
, Abs
, Sel
, Imm
, DAG
))
2293 return DAG
.getMachineNode(Opcode
, SDLoc(Node
), Node
->getVTList(), Ops
);