]>
git.proxmox.com Git - rustc.git/blob - src/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
1 //===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Subclass of MipsTargetLowering specialized for mips32/64.
12 //===----------------------------------------------------------------------===//
13 #include "MipsSEISelLowering.h"
14 #include "MipsMachineFunction.h"
15 #include "MipsRegisterInfo.h"
16 #include "MipsTargetMachine.h"
17 #include "llvm/CodeGen/MachineInstrBuilder.h"
18 #include "llvm/CodeGen/MachineRegisterInfo.h"
19 #include "llvm/IR/Intrinsics.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Target/TargetInstrInfo.h"
27 #define DEBUG_TYPE "mips-isel"
30 EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden
,
31 cl::desc("MIPS: Enable tail calls."), cl::init(false));
33 static cl::opt
<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
34 cl::desc("Expand double precision loads and "
35 "stores to their single precision "
38 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine
&TM
,
39 const MipsSubtarget
&STI
)
40 : MipsTargetLowering(TM
, STI
) {
41 // Set up the register classes
42 addRegisterClass(MVT::i32
, &Mips::GPR32RegClass
);
44 if (Subtarget
.isGP64bit())
45 addRegisterClass(MVT::i64
, &Mips::GPR64RegClass
);
47 if (Subtarget
.hasDSP() || Subtarget
.hasMSA()) {
48 // Expand all truncating stores and extending loads.
49 for (MVT VT0
: MVT::vector_valuetypes()) {
50 for (MVT VT1
: MVT::vector_valuetypes()) {
51 setTruncStoreAction(VT0
, VT1
, Expand
);
52 setLoadExtAction(ISD::SEXTLOAD
, VT0
, VT1
, Expand
);
53 setLoadExtAction(ISD::ZEXTLOAD
, VT0
, VT1
, Expand
);
54 setLoadExtAction(ISD::EXTLOAD
, VT0
, VT1
, Expand
);
59 if (Subtarget
.hasDSP()) {
60 MVT::SimpleValueType VecTys
[2] = {MVT::v2i16
, MVT::v4i8
};
62 for (unsigned i
= 0; i
< array_lengthof(VecTys
); ++i
) {
63 addRegisterClass(VecTys
[i
], &Mips::DSPRRegClass
);
65 // Expand all builtin opcodes.
66 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
67 setOperationAction(Opc
, VecTys
[i
], Expand
);
69 setOperationAction(ISD::ADD
, VecTys
[i
], Legal
);
70 setOperationAction(ISD::SUB
, VecTys
[i
], Legal
);
71 setOperationAction(ISD::LOAD
, VecTys
[i
], Legal
);
72 setOperationAction(ISD::STORE
, VecTys
[i
], Legal
);
73 setOperationAction(ISD::BITCAST
, VecTys
[i
], Legal
);
76 setTargetDAGCombine(ISD::SHL
);
77 setTargetDAGCombine(ISD::SRA
);
78 setTargetDAGCombine(ISD::SRL
);
79 setTargetDAGCombine(ISD::SETCC
);
80 setTargetDAGCombine(ISD::VSELECT
);
83 if (Subtarget
.hasDSPR2())
84 setOperationAction(ISD::MUL
, MVT::v2i16
, Legal
);
86 if (Subtarget
.hasMSA()) {
87 addMSAIntType(MVT::v16i8
, &Mips::MSA128BRegClass
);
88 addMSAIntType(MVT::v8i16
, &Mips::MSA128HRegClass
);
89 addMSAIntType(MVT::v4i32
, &Mips::MSA128WRegClass
);
90 addMSAIntType(MVT::v2i64
, &Mips::MSA128DRegClass
);
91 addMSAFloatType(MVT::v8f16
, &Mips::MSA128HRegClass
);
92 addMSAFloatType(MVT::v4f32
, &Mips::MSA128WRegClass
);
93 addMSAFloatType(MVT::v2f64
, &Mips::MSA128DRegClass
);
95 setTargetDAGCombine(ISD::AND
);
96 setTargetDAGCombine(ISD::OR
);
97 setTargetDAGCombine(ISD::SRA
);
98 setTargetDAGCombine(ISD::VSELECT
);
99 setTargetDAGCombine(ISD::XOR
);
102 if (!Subtarget
.abiUsesSoftFloat()) {
103 addRegisterClass(MVT::f32
, &Mips::FGR32RegClass
);
105 // When dealing with single precision only, use libcalls
106 if (!Subtarget
.isSingleFloat()) {
107 if (Subtarget
.isFP64bit())
108 addRegisterClass(MVT::f64
, &Mips::FGR64RegClass
);
110 addRegisterClass(MVT::f64
, &Mips::AFGR64RegClass
);
114 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Custom
);
115 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Custom
);
116 setOperationAction(ISD::MULHS
, MVT::i32
, Custom
);
117 setOperationAction(ISD::MULHU
, MVT::i32
, Custom
);
119 if (Subtarget
.hasCnMips())
120 setOperationAction(ISD::MUL
, MVT::i64
, Legal
);
121 else if (Subtarget
.isGP64bit())
122 setOperationAction(ISD::MUL
, MVT::i64
, Custom
);
124 if (Subtarget
.isGP64bit()) {
125 setOperationAction(ISD::MULHS
, MVT::i64
, Custom
);
126 setOperationAction(ISD::MULHU
, MVT::i64
, Custom
);
127 setOperationAction(ISD::SDIVREM
, MVT::i64
, Custom
);
128 setOperationAction(ISD::UDIVREM
, MVT::i64
, Custom
);
131 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i64
, Custom
);
132 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i64
, Custom
);
134 setOperationAction(ISD::SDIVREM
, MVT::i32
, Custom
);
135 setOperationAction(ISD::UDIVREM
, MVT::i32
, Custom
);
136 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
137 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
138 setOperationAction(ISD::STORE
, MVT::i32
, Custom
);
140 setTargetDAGCombine(ISD::ADDE
);
141 setTargetDAGCombine(ISD::SUBE
);
142 setTargetDAGCombine(ISD::MUL
);
144 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
145 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
146 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
149 setOperationAction(ISD::LOAD
, MVT::f64
, Custom
);
150 setOperationAction(ISD::STORE
, MVT::f64
, Custom
);
153 if (Subtarget
.hasMips32r6()) {
154 // MIPS32r6 replaces the accumulator-based multiplies with a three register
156 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
157 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
158 setOperationAction(ISD::MUL
, MVT::i32
, Legal
);
159 setOperationAction(ISD::MULHS
, MVT::i32
, Legal
);
160 setOperationAction(ISD::MULHU
, MVT::i32
, Legal
);
162 // MIPS32r6 replaces the accumulator-based division/remainder with separate
163 // three register division and remainder instructions.
164 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
165 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
166 setOperationAction(ISD::SDIV
, MVT::i32
, Legal
);
167 setOperationAction(ISD::UDIV
, MVT::i32
, Legal
);
168 setOperationAction(ISD::SREM
, MVT::i32
, Legal
);
169 setOperationAction(ISD::UREM
, MVT::i32
, Legal
);
171 // MIPS32r6 replaces conditional moves with an equivalent that removes the
172 // need for three GPR read ports.
173 setOperationAction(ISD::SETCC
, MVT::i32
, Legal
);
174 setOperationAction(ISD::SELECT
, MVT::i32
, Legal
);
175 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Expand
);
177 setOperationAction(ISD::SETCC
, MVT::f32
, Legal
);
178 setOperationAction(ISD::SELECT
, MVT::f32
, Legal
);
179 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Expand
);
181 assert(Subtarget
.isFP64bit() && "FR=1 is required for MIPS32r6");
182 setOperationAction(ISD::SETCC
, MVT::f64
, Legal
);
183 setOperationAction(ISD::SELECT
, MVT::f64
, Legal
);
184 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Expand
);
186 setOperationAction(ISD::BRCOND
, MVT::Other
, Legal
);
188 // Floating point > and >= are supported via < and <=
189 setCondCodeAction(ISD::SETOGE
, MVT::f32
, Expand
);
190 setCondCodeAction(ISD::SETOGT
, MVT::f32
, Expand
);
191 setCondCodeAction(ISD::SETUGE
, MVT::f32
, Expand
);
192 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
194 setCondCodeAction(ISD::SETOGE
, MVT::f64
, Expand
);
195 setCondCodeAction(ISD::SETOGT
, MVT::f64
, Expand
);
196 setCondCodeAction(ISD::SETUGE
, MVT::f64
, Expand
);
197 setCondCodeAction(ISD::SETUGT
, MVT::f64
, Expand
);
200 if (Subtarget
.hasMips64r6()) {
201 // MIPS64r6 replaces the accumulator-based multiplies with a three register
203 setOperationAction(ISD::MUL
, MVT::i64
, Legal
);
204 setOperationAction(ISD::MULHS
, MVT::i64
, Legal
);
205 setOperationAction(ISD::MULHU
, MVT::i64
, Legal
);
207 // MIPS32r6 replaces the accumulator-based division/remainder with separate
208 // three register division and remainder instructions.
209 setOperationAction(ISD::SDIVREM
, MVT::i64
, Expand
);
210 setOperationAction(ISD::UDIVREM
, MVT::i64
, Expand
);
211 setOperationAction(ISD::SDIV
, MVT::i64
, Legal
);
212 setOperationAction(ISD::UDIV
, MVT::i64
, Legal
);
213 setOperationAction(ISD::SREM
, MVT::i64
, Legal
);
214 setOperationAction(ISD::UREM
, MVT::i64
, Legal
);
216 // MIPS64r6 replaces conditional moves with an equivalent that removes the
217 // need for three GPR read ports.
218 setOperationAction(ISD::SETCC
, MVT::i64
, Legal
);
219 setOperationAction(ISD::SELECT
, MVT::i64
, Legal
);
220 setOperationAction(ISD::SELECT_CC
, MVT::i64
, Expand
);
223 computeRegisterProperties();
226 const MipsTargetLowering
*
227 llvm::createMipsSETargetLowering(const MipsTargetMachine
&TM
,
228 const MipsSubtarget
&STI
) {
229 return new MipsSETargetLowering(TM
, STI
);
232 const TargetRegisterClass
*
233 MipsSETargetLowering::getRepRegClassFor(MVT VT
) const {
234 if (VT
== MVT::Untyped
)
235 return Subtarget
.hasDSP() ? &Mips::ACC64DSPRegClass
: &Mips::ACC64RegClass
;
237 return TargetLowering::getRepRegClassFor(VT
);
240 // Enable MSA support for the given integer type and Register class.
241 void MipsSETargetLowering::
242 addMSAIntType(MVT::SimpleValueType Ty
, const TargetRegisterClass
*RC
) {
243 addRegisterClass(Ty
, RC
);
245 // Expand all builtin opcodes.
246 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
247 setOperationAction(Opc
, Ty
, Expand
);
249 setOperationAction(ISD::BITCAST
, Ty
, Legal
);
250 setOperationAction(ISD::LOAD
, Ty
, Legal
);
251 setOperationAction(ISD::STORE
, Ty
, Legal
);
252 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, Ty
, Custom
);
253 setOperationAction(ISD::INSERT_VECTOR_ELT
, Ty
, Legal
);
254 setOperationAction(ISD::BUILD_VECTOR
, Ty
, Custom
);
256 setOperationAction(ISD::ADD
, Ty
, Legal
);
257 setOperationAction(ISD::AND
, Ty
, Legal
);
258 setOperationAction(ISD::CTLZ
, Ty
, Legal
);
259 setOperationAction(ISD::CTPOP
, Ty
, Legal
);
260 setOperationAction(ISD::MUL
, Ty
, Legal
);
261 setOperationAction(ISD::OR
, Ty
, Legal
);
262 setOperationAction(ISD::SDIV
, Ty
, Legal
);
263 setOperationAction(ISD::SREM
, Ty
, Legal
);
264 setOperationAction(ISD::SHL
, Ty
, Legal
);
265 setOperationAction(ISD::SRA
, Ty
, Legal
);
266 setOperationAction(ISD::SRL
, Ty
, Legal
);
267 setOperationAction(ISD::SUB
, Ty
, Legal
);
268 setOperationAction(ISD::UDIV
, Ty
, Legal
);
269 setOperationAction(ISD::UREM
, Ty
, Legal
);
270 setOperationAction(ISD::VECTOR_SHUFFLE
, Ty
, Custom
);
271 setOperationAction(ISD::VSELECT
, Ty
, Legal
);
272 setOperationAction(ISD::XOR
, Ty
, Legal
);
274 if (Ty
== MVT::v4i32
|| Ty
== MVT::v2i64
) {
275 setOperationAction(ISD::FP_TO_SINT
, Ty
, Legal
);
276 setOperationAction(ISD::FP_TO_UINT
, Ty
, Legal
);
277 setOperationAction(ISD::SINT_TO_FP
, Ty
, Legal
);
278 setOperationAction(ISD::UINT_TO_FP
, Ty
, Legal
);
281 setOperationAction(ISD::SETCC
, Ty
, Legal
);
282 setCondCodeAction(ISD::SETNE
, Ty
, Expand
);
283 setCondCodeAction(ISD::SETGE
, Ty
, Expand
);
284 setCondCodeAction(ISD::SETGT
, Ty
, Expand
);
285 setCondCodeAction(ISD::SETUGE
, Ty
, Expand
);
286 setCondCodeAction(ISD::SETUGT
, Ty
, Expand
);
289 // Enable MSA support for the given floating-point type and Register class.
290 void MipsSETargetLowering::
291 addMSAFloatType(MVT::SimpleValueType Ty
, const TargetRegisterClass
*RC
) {
292 addRegisterClass(Ty
, RC
);
294 // Expand all builtin opcodes.
295 for (unsigned Opc
= 0; Opc
< ISD::BUILTIN_OP_END
; ++Opc
)
296 setOperationAction(Opc
, Ty
, Expand
);
298 setOperationAction(ISD::LOAD
, Ty
, Legal
);
299 setOperationAction(ISD::STORE
, Ty
, Legal
);
300 setOperationAction(ISD::BITCAST
, Ty
, Legal
);
301 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, Ty
, Legal
);
302 setOperationAction(ISD::INSERT_VECTOR_ELT
, Ty
, Legal
);
303 setOperationAction(ISD::BUILD_VECTOR
, Ty
, Custom
);
305 if (Ty
!= MVT::v8f16
) {
306 setOperationAction(ISD::FABS
, Ty
, Legal
);
307 setOperationAction(ISD::FADD
, Ty
, Legal
);
308 setOperationAction(ISD::FDIV
, Ty
, Legal
);
309 setOperationAction(ISD::FEXP2
, Ty
, Legal
);
310 setOperationAction(ISD::FLOG2
, Ty
, Legal
);
311 setOperationAction(ISD::FMA
, Ty
, Legal
);
312 setOperationAction(ISD::FMUL
, Ty
, Legal
);
313 setOperationAction(ISD::FRINT
, Ty
, Legal
);
314 setOperationAction(ISD::FSQRT
, Ty
, Legal
);
315 setOperationAction(ISD::FSUB
, Ty
, Legal
);
316 setOperationAction(ISD::VSELECT
, Ty
, Legal
);
318 setOperationAction(ISD::SETCC
, Ty
, Legal
);
319 setCondCodeAction(ISD::SETOGE
, Ty
, Expand
);
320 setCondCodeAction(ISD::SETOGT
, Ty
, Expand
);
321 setCondCodeAction(ISD::SETUGE
, Ty
, Expand
);
322 setCondCodeAction(ISD::SETUGT
, Ty
, Expand
);
323 setCondCodeAction(ISD::SETGE
, Ty
, Expand
);
324 setCondCodeAction(ISD::SETGT
, Ty
, Expand
);
329 MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT
,
333 MVT::SimpleValueType SVT
= VT
.getSimpleVT().SimpleTy
;
335 if (Subtarget
.systemSupportsUnalignedAccess()) {
336 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
337 // implementation defined whether this is handled by hardware, software, or
338 // a hybrid of the two but it's expected that most implementations will
339 // handle the majority of cases in hardware.
356 SDValue
MipsSETargetLowering::LowerOperation(SDValue Op
,
357 SelectionDAG
&DAG
) const {
358 switch(Op
.getOpcode()) {
359 case ISD::LOAD
: return lowerLOAD(Op
, DAG
);
360 case ISD::STORE
: return lowerSTORE(Op
, DAG
);
361 case ISD::SMUL_LOHI
: return lowerMulDiv(Op
, MipsISD::Mult
, true, true, DAG
);
362 case ISD::UMUL_LOHI
: return lowerMulDiv(Op
, MipsISD::Multu
, true, true, DAG
);
363 case ISD::MULHS
: return lowerMulDiv(Op
, MipsISD::Mult
, false, true, DAG
);
364 case ISD::MULHU
: return lowerMulDiv(Op
, MipsISD::Multu
, false, true, DAG
);
365 case ISD::MUL
: return lowerMulDiv(Op
, MipsISD::Mult
, true, false, DAG
);
366 case ISD::SDIVREM
: return lowerMulDiv(Op
, MipsISD::DivRem
, true, true, DAG
);
367 case ISD::UDIVREM
: return lowerMulDiv(Op
, MipsISD::DivRemU
, true, true,
369 case ISD::INTRINSIC_WO_CHAIN
: return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
370 case ISD::INTRINSIC_W_CHAIN
: return lowerINTRINSIC_W_CHAIN(Op
, DAG
);
371 case ISD::INTRINSIC_VOID
: return lowerINTRINSIC_VOID(Op
, DAG
);
372 case ISD::EXTRACT_VECTOR_ELT
: return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
373 case ISD::BUILD_VECTOR
: return lowerBUILD_VECTOR(Op
, DAG
);
374 case ISD::VECTOR_SHUFFLE
: return lowerVECTOR_SHUFFLE(Op
, DAG
);
377 return MipsTargetLowering::LowerOperation(Op
, DAG
);
381 // Transforms a subgraph in CurDAG if the following pattern is found:
382 // (addc multLo, Lo0), (adde multHi, Hi0),
384 // multHi/Lo: product of multiplication
385 // Lo0: initial value of Lo register
386 // Hi0: initial value of Hi register
387 // Return true if pattern matching was successful.
388 static bool selectMADD(SDNode
*ADDENode
, SelectionDAG
*CurDAG
) {
389 // ADDENode's second operand must be a flag output of an ADDC node in order
390 // for the matching to be successful.
391 SDNode
*ADDCNode
= ADDENode
->getOperand(2).getNode();
393 if (ADDCNode
->getOpcode() != ISD::ADDC
)
396 SDValue MultHi
= ADDENode
->getOperand(0);
397 SDValue MultLo
= ADDCNode
->getOperand(0);
398 SDNode
*MultNode
= MultHi
.getNode();
399 unsigned MultOpc
= MultHi
.getOpcode();
401 // MultHi and MultLo must be generated by the same node,
402 if (MultLo
.getNode() != MultNode
)
405 // and it must be a multiplication.
406 if (MultOpc
!= ISD::SMUL_LOHI
&& MultOpc
!= ISD::UMUL_LOHI
)
409 // MultLo amd MultHi must be the first and second output of MultNode
411 if (MultHi
.getResNo() != 1 || MultLo
.getResNo() != 0)
414 // Transform this to a MADD only if ADDENode and ADDCNode are the only users
415 // of the values of MultNode, in which case MultNode will be removed in later
417 // If there exist users other than ADDENode or ADDCNode, this function returns
418 // here, which will result in MultNode being mapped to a single MULT
419 // instruction node rather than a pair of MULT and MADD instructions being
421 if (!MultHi
.hasOneUse() || !MultLo
.hasOneUse())
426 // Initialize accumulator.
427 SDValue ACCIn
= CurDAG
->getNode(MipsISD::MTLOHI
, DL
, MVT::Untyped
,
428 ADDCNode
->getOperand(1),
429 ADDENode
->getOperand(1));
431 // create MipsMAdd(u) node
432 MultOpc
= MultOpc
== ISD::UMUL_LOHI
? MipsISD::MAddu
: MipsISD::MAdd
;
434 SDValue MAdd
= CurDAG
->getNode(MultOpc
, DL
, MVT::Untyped
,
435 MultNode
->getOperand(0),// Factor 0
436 MultNode
->getOperand(1),// Factor 1
439 // replace uses of adde and addc here
440 if (!SDValue(ADDCNode
, 0).use_empty()) {
441 SDValue LoOut
= CurDAG
->getNode(MipsISD::MFLO
, DL
, MVT::i32
, MAdd
);
442 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(ADDCNode
, 0), LoOut
);
444 if (!SDValue(ADDENode
, 0).use_empty()) {
445 SDValue HiOut
= CurDAG
->getNode(MipsISD::MFHI
, DL
, MVT::i32
, MAdd
);
446 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(ADDENode
, 0), HiOut
);
453 // Transforms a subgraph in CurDAG if the following pattern is found:
454 // (addc Lo0, multLo), (sube Hi0, multHi),
456 // multHi/Lo: product of multiplication
457 // Lo0: initial value of Lo register
458 // Hi0: initial value of Hi register
459 // Return true if pattern matching was successful.
460 static bool selectMSUB(SDNode
*SUBENode
, SelectionDAG
*CurDAG
) {
461 // SUBENode's second operand must be a flag output of an SUBC node in order
462 // for the matching to be successful.
463 SDNode
*SUBCNode
= SUBENode
->getOperand(2).getNode();
465 if (SUBCNode
->getOpcode() != ISD::SUBC
)
468 SDValue MultHi
= SUBENode
->getOperand(1);
469 SDValue MultLo
= SUBCNode
->getOperand(1);
470 SDNode
*MultNode
= MultHi
.getNode();
471 unsigned MultOpc
= MultHi
.getOpcode();
473 // MultHi and MultLo must be generated by the same node,
474 if (MultLo
.getNode() != MultNode
)
477 // and it must be a multiplication.
478 if (MultOpc
!= ISD::SMUL_LOHI
&& MultOpc
!= ISD::UMUL_LOHI
)
481 // MultLo amd MultHi must be the first and second output of MultNode
483 if (MultHi
.getResNo() != 1 || MultLo
.getResNo() != 0)
486 // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
487 // of the values of MultNode, in which case MultNode will be removed in later
489 // If there exist users other than SUBENode or SUBCNode, this function returns
490 // here, which will result in MultNode being mapped to a single MULT
491 // instruction node rather than a pair of MULT and MSUB instructions being
493 if (!MultHi
.hasOneUse() || !MultLo
.hasOneUse())
498 // Initialize accumulator.
499 SDValue ACCIn
= CurDAG
->getNode(MipsISD::MTLOHI
, DL
, MVT::Untyped
,
500 SUBCNode
->getOperand(0),
501 SUBENode
->getOperand(0));
503 // create MipsSub(u) node
504 MultOpc
= MultOpc
== ISD::UMUL_LOHI
? MipsISD::MSubu
: MipsISD::MSub
;
506 SDValue MSub
= CurDAG
->getNode(MultOpc
, DL
, MVT::Glue
,
507 MultNode
->getOperand(0),// Factor 0
508 MultNode
->getOperand(1),// Factor 1
511 // replace uses of sube and subc here
512 if (!SDValue(SUBCNode
, 0).use_empty()) {
513 SDValue LoOut
= CurDAG
->getNode(MipsISD::MFLO
, DL
, MVT::i32
, MSub
);
514 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(SUBCNode
, 0), LoOut
);
516 if (!SDValue(SUBENode
, 0).use_empty()) {
517 SDValue HiOut
= CurDAG
->getNode(MipsISD::MFHI
, DL
, MVT::i32
, MSub
);
518 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(SUBENode
, 0), HiOut
);
524 static SDValue
performADDECombine(SDNode
*N
, SelectionDAG
&DAG
,
525 TargetLowering::DAGCombinerInfo
&DCI
,
526 const MipsSubtarget
&Subtarget
) {
527 if (DCI
.isBeforeLegalize())
530 if (Subtarget
.hasMips32() && !Subtarget
.hasMips32r6() &&
531 N
->getValueType(0) == MVT::i32
&& selectMADD(N
, &DAG
))
532 return SDValue(N
, 0);
537 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
539 // Performs the following transformations:
540 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
541 // sign/zero-extension is completely overwritten by the new one performed by
543 // - Removes redundant zero extensions performed by an ISD::AND.
544 static SDValue
performANDCombine(SDNode
*N
, SelectionDAG
&DAG
,
545 TargetLowering::DAGCombinerInfo
&DCI
,
546 const MipsSubtarget
&Subtarget
) {
547 if (!Subtarget
.hasMSA())
550 SDValue Op0
= N
->getOperand(0);
551 SDValue Op1
= N
->getOperand(1);
552 unsigned Op0Opcode
= Op0
->getOpcode();
554 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
555 // where $d + 1 == 2^n and n == 32
556 // or $d + 1 == 2^n and n <= 32 and ZExt
557 // -> (MipsVExtractZExt $a, $b, $c)
558 if (Op0Opcode
== MipsISD::VEXTRACT_SEXT_ELT
||
559 Op0Opcode
== MipsISD::VEXTRACT_ZEXT_ELT
) {
560 ConstantSDNode
*Mask
= dyn_cast
<ConstantSDNode
>(Op1
);
565 int32_t Log2IfPositive
= (Mask
->getAPIntValue() + 1).exactLogBase2();
567 if (Log2IfPositive
<= 0)
568 return SDValue(); // Mask+1 is not a power of 2
570 SDValue Op0Op2
= Op0
->getOperand(2);
571 EVT ExtendTy
= cast
<VTSDNode
>(Op0Op2
)->getVT();
572 unsigned ExtendTySize
= ExtendTy
.getSizeInBits();
573 unsigned Log2
= Log2IfPositive
;
575 if ((Op0Opcode
== MipsISD::VEXTRACT_ZEXT_ELT
&& Log2
>= ExtendTySize
) ||
576 Log2
== ExtendTySize
) {
577 SDValue Ops
[] = { Op0
->getOperand(0), Op0
->getOperand(1), Op0Op2
};
578 return DAG
.getNode(MipsISD::VEXTRACT_ZEXT_ELT
, SDLoc(Op0
),
580 makeArrayRef(Ops
, Op0
->getNumOperands()));
587 // Determine if the specified node is a constant vector splat.
589 // Returns true and sets Imm if:
590 // * N is a ISD::BUILD_VECTOR representing a constant splat
592 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
593 // differences are that it assumes the MSA has already been checked and the
594 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and
595 // must not be in order for binsri.d to be selectable).
596 static bool isVSplat(SDValue N
, APInt
&Imm
, bool IsLittleEndian
) {
597 BuildVectorSDNode
*Node
= dyn_cast
<BuildVectorSDNode
>(N
.getNode());
602 APInt SplatValue
, SplatUndef
;
603 unsigned SplatBitSize
;
606 if (!Node
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
615 // Test whether the given node is an all-ones build_vector.
616 static bool isVectorAllOnes(SDValue N
) {
617 // Look through bitcasts. Endianness doesn't matter because we are looking
618 // for an all-ones value.
619 if (N
->getOpcode() == ISD::BITCAST
)
620 N
= N
->getOperand(0);
622 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(N
);
627 APInt SplatValue
, SplatUndef
;
628 unsigned SplatBitSize
;
631 // Endianness doesn't matter in this context because we are looking for
632 // an all-ones value.
633 if (BVN
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
))
634 return SplatValue
.isAllOnesValue();
639 // Test whether N is the bitwise inverse of OfNode.
640 static bool isBitwiseInverse(SDValue N
, SDValue OfNode
) {
641 if (N
->getOpcode() != ISD::XOR
)
644 if (isVectorAllOnes(N
->getOperand(0)))
645 return N
->getOperand(1) == OfNode
;
647 if (isVectorAllOnes(N
->getOperand(1)))
648 return N
->getOperand(0) == OfNode
;
653 // Perform combines where ISD::OR is the root node.
655 // Performs the following transformations:
656 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
657 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
659 static SDValue
performORCombine(SDNode
*N
, SelectionDAG
&DAG
,
660 TargetLowering::DAGCombinerInfo
&DCI
,
661 const MipsSubtarget
&Subtarget
) {
662 if (!Subtarget
.hasMSA())
665 EVT Ty
= N
->getValueType(0);
667 if (!Ty
.is128BitVector())
670 SDValue Op0
= N
->getOperand(0);
671 SDValue Op1
= N
->getOperand(1);
673 if (Op0
->getOpcode() == ISD::AND
&& Op1
->getOpcode() == ISD::AND
) {
674 SDValue Op0Op0
= Op0
->getOperand(0);
675 SDValue Op0Op1
= Op0
->getOperand(1);
676 SDValue Op1Op0
= Op1
->getOperand(0);
677 SDValue Op1Op1
= Op1
->getOperand(1);
678 bool IsLittleEndian
= !Subtarget
.isLittle();
680 SDValue IfSet
, IfClr
, Cond
;
681 bool IsConstantMask
= false;
684 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
685 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
687 // IfClr will be set if we find a valid match.
688 if (isVSplat(Op0Op0
, Mask
, IsLittleEndian
)) {
692 if (isVSplat(Op1Op0
, InvMask
, IsLittleEndian
) &&
693 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
695 else if (isVSplat(Op1Op1
, InvMask
, IsLittleEndian
) &&
696 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
699 IsConstantMask
= true;
702 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
703 // thing again using this mask.
704 // IfClr will be set if we find a valid match.
705 if (!IfClr
.getNode() && isVSplat(Op0Op1
, Mask
, IsLittleEndian
)) {
709 if (isVSplat(Op1Op0
, InvMask
, IsLittleEndian
) &&
710 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
712 else if (isVSplat(Op1Op1
, InvMask
, IsLittleEndian
) &&
713 Mask
.getBitWidth() == InvMask
.getBitWidth() && Mask
== ~InvMask
)
716 IsConstantMask
= true;
719 // If IfClr is not yet set, try looking for a non-constant match.
720 // IfClr will be set if we find a valid match amongst the eight
722 if (!IfClr
.getNode()) {
723 if (isBitwiseInverse(Op0Op0
, Op1Op0
)) {
727 } else if (isBitwiseInverse(Op0Op1
, Op1Op0
)) {
731 } else if (isBitwiseInverse(Op0Op0
, Op1Op1
)) {
735 } else if (isBitwiseInverse(Op0Op1
, Op1Op1
)) {
739 } else if (isBitwiseInverse(Op1Op0
, Op0Op0
)) {
743 } else if (isBitwiseInverse(Op1Op1
, Op0Op0
)) {
747 } else if (isBitwiseInverse(Op1Op0
, Op0Op1
)) {
751 } else if (isBitwiseInverse(Op1Op1
, Op0Op1
)) {
758 // At this point, IfClr will be set if we have a valid match.
759 if (!IfClr
.getNode())
762 assert(Cond
.getNode() && IfSet
.getNode());
764 // Fold degenerate cases.
765 if (IsConstantMask
) {
766 if (Mask
.isAllOnesValue())
772 // Transform the DAG into an equivalent VSELECT.
773 return DAG
.getNode(ISD::VSELECT
, SDLoc(N
), Ty
, Cond
, IfSet
, IfClr
);
779 static SDValue
performSUBECombine(SDNode
*N
, SelectionDAG
&DAG
,
780 TargetLowering::DAGCombinerInfo
&DCI
,
781 const MipsSubtarget
&Subtarget
) {
782 if (DCI
.isBeforeLegalize())
785 if (Subtarget
.hasMips32() && N
->getValueType(0) == MVT::i32
&&
787 return SDValue(N
, 0);
792 static SDValue
genConstMult(SDValue X
, uint64_t C
, SDLoc DL
, EVT VT
,
793 EVT ShiftTy
, SelectionDAG
&DAG
) {
794 // Clear the upper (64 - VT.sizeInBits) bits.
795 C
&= ((uint64_t)-1) >> (64 - VT
.getSizeInBits());
799 return DAG
.getConstant(0, VT
);
805 // If c is power of 2, return (shl x, log2(c)).
806 if (isPowerOf2_64(C
))
807 return DAG
.getNode(ISD::SHL
, DL
, VT
, X
,
808 DAG
.getConstant(Log2_64(C
), ShiftTy
));
810 unsigned Log2Ceil
= Log2_64_Ceil(C
);
811 uint64_t Floor
= 1LL << Log2_64(C
);
812 uint64_t Ceil
= Log2Ceil
== 64 ? 0LL : 1LL << Log2Ceil
;
814 // If |c - floor_c| <= |c - ceil_c|,
815 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
816 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
817 if (C
- Floor
<= Ceil
- C
) {
818 SDValue Op0
= genConstMult(X
, Floor
, DL
, VT
, ShiftTy
, DAG
);
819 SDValue Op1
= genConstMult(X
, C
- Floor
, DL
, VT
, ShiftTy
, DAG
);
820 return DAG
.getNode(ISD::ADD
, DL
, VT
, Op0
, Op1
);
823 // If |c - floor_c| > |c - ceil_c|,
824 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
825 SDValue Op0
= genConstMult(X
, Ceil
, DL
, VT
, ShiftTy
, DAG
);
826 SDValue Op1
= genConstMult(X
, Ceil
- C
, DL
, VT
, ShiftTy
, DAG
);
827 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op0
, Op1
);
830 static SDValue
performMULCombine(SDNode
*N
, SelectionDAG
&DAG
,
831 const TargetLowering::DAGCombinerInfo
&DCI
,
832 const MipsSETargetLowering
*TL
) {
833 EVT VT
= N
->getValueType(0);
835 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1)))
837 return genConstMult(N
->getOperand(0), C
->getZExtValue(), SDLoc(N
),
838 VT
, TL
->getScalarShiftAmountTy(VT
), DAG
);
840 return SDValue(N
, 0);
843 static SDValue
performDSPShiftCombine(unsigned Opc
, SDNode
*N
, EVT Ty
,
845 const MipsSubtarget
&Subtarget
) {
846 // See if this is a vector splat immediate node.
847 APInt SplatValue
, SplatUndef
;
848 unsigned SplatBitSize
;
850 unsigned EltSize
= Ty
.getVectorElementType().getSizeInBits();
851 BuildVectorSDNode
*BV
= dyn_cast
<BuildVectorSDNode
>(N
->getOperand(1));
853 if (!Subtarget
.hasDSP())
857 !BV
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
858 EltSize
, !Subtarget
.isLittle()) ||
859 (SplatBitSize
!= EltSize
) ||
860 (SplatValue
.getZExtValue() >= EltSize
))
863 return DAG
.getNode(Opc
, SDLoc(N
), Ty
, N
->getOperand(0),
864 DAG
.getConstant(SplatValue
.getZExtValue(), MVT::i32
));
867 static SDValue
performSHLCombine(SDNode
*N
, SelectionDAG
&DAG
,
868 TargetLowering::DAGCombinerInfo
&DCI
,
869 const MipsSubtarget
&Subtarget
) {
870 EVT Ty
= N
->getValueType(0);
872 if ((Ty
!= MVT::v2i16
) && (Ty
!= MVT::v4i8
))
875 return performDSPShiftCombine(MipsISD::SHLL_DSP
, N
, Ty
, DAG
, Subtarget
);
878 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
879 // constant splats into MipsISD::SHRA_DSP for DSPr2.
881 // Performs the following transformations:
882 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
883 // sign/zero-extension is completely overwritten by the new one performed by
884 // the ISD::SRA and ISD::SHL nodes.
885 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
888 // See performDSPShiftCombine for more information about the transformation
890 static SDValue
performSRACombine(SDNode
*N
, SelectionDAG
&DAG
,
891 TargetLowering::DAGCombinerInfo
&DCI
,
892 const MipsSubtarget
&Subtarget
) {
893 EVT Ty
= N
->getValueType(0);
895 if (Subtarget
.hasMSA()) {
896 SDValue Op0
= N
->getOperand(0);
897 SDValue Op1
= N
->getOperand(1);
899 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
900 // where $d + sizeof($c) == 32
901 // or $d + sizeof($c) <= 32 and SExt
902 // -> (MipsVExtractSExt $a, $b, $c)
903 if (Op0
->getOpcode() == ISD::SHL
&& Op1
== Op0
->getOperand(1)) {
904 SDValue Op0Op0
= Op0
->getOperand(0);
905 ConstantSDNode
*ShAmount
= dyn_cast
<ConstantSDNode
>(Op1
);
910 if (Op0Op0
->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT
&&
911 Op0Op0
->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT
)
914 EVT ExtendTy
= cast
<VTSDNode
>(Op0Op0
->getOperand(2))->getVT();
915 unsigned TotalBits
= ShAmount
->getZExtValue() + ExtendTy
.getSizeInBits();
917 if (TotalBits
== 32 ||
918 (Op0Op0
->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT
&&
920 SDValue Ops
[] = { Op0Op0
->getOperand(0), Op0Op0
->getOperand(1),
921 Op0Op0
->getOperand(2) };
922 return DAG
.getNode(MipsISD::VEXTRACT_SEXT_ELT
, SDLoc(Op0Op0
),
924 makeArrayRef(Ops
, Op0Op0
->getNumOperands()));
929 if ((Ty
!= MVT::v2i16
) && ((Ty
!= MVT::v4i8
) || !Subtarget
.hasDSPR2()))
932 return performDSPShiftCombine(MipsISD::SHRA_DSP
, N
, Ty
, DAG
, Subtarget
);
936 static SDValue
performSRLCombine(SDNode
*N
, SelectionDAG
&DAG
,
937 TargetLowering::DAGCombinerInfo
&DCI
,
938 const MipsSubtarget
&Subtarget
) {
939 EVT Ty
= N
->getValueType(0);
941 if (((Ty
!= MVT::v2i16
) || !Subtarget
.hasDSPR2()) && (Ty
!= MVT::v4i8
))
944 return performDSPShiftCombine(MipsISD::SHRL_DSP
, N
, Ty
, DAG
, Subtarget
);
947 static bool isLegalDSPCondCode(EVT Ty
, ISD::CondCode CC
) {
948 bool IsV216
= (Ty
== MVT::v2i16
);
952 case ISD::SETNE
: return true;
956 case ISD::SETGE
: return IsV216
;
960 case ISD::SETUGE
: return !IsV216
;
961 default: return false;
965 static SDValue
performSETCCCombine(SDNode
*N
, SelectionDAG
&DAG
) {
966 EVT Ty
= N
->getValueType(0);
968 if ((Ty
!= MVT::v2i16
) && (Ty
!= MVT::v4i8
))
971 if (!isLegalDSPCondCode(Ty
, cast
<CondCodeSDNode
>(N
->getOperand(2))->get()))
974 return DAG
.getNode(MipsISD::SETCC_DSP
, SDLoc(N
), Ty
, N
->getOperand(0),
975 N
->getOperand(1), N
->getOperand(2));
978 static SDValue
performVSELECTCombine(SDNode
*N
, SelectionDAG
&DAG
) {
979 EVT Ty
= N
->getValueType(0);
981 if (Ty
.is128BitVector() && Ty
.isInteger()) {
982 // Try the following combines:
983 // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b)
984 // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b)
985 // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b)
986 // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b)
987 // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b)
988 // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b)
989 // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b)
990 // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b)
991 // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but
992 // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the
994 SDValue Op0
= N
->getOperand(0);
996 if (Op0
->getOpcode() != ISD::SETCC
)
999 ISD::CondCode CondCode
= cast
<CondCodeSDNode
>(Op0
->getOperand(2))->get();
1002 if (CondCode
== ISD::SETLT
|| CondCode
== ISD::SETLE
)
1004 else if (CondCode
== ISD::SETULT
|| CondCode
== ISD::SETULE
)
1009 SDValue Op1
= N
->getOperand(1);
1010 SDValue Op2
= N
->getOperand(2);
1011 SDValue Op0Op0
= Op0
->getOperand(0);
1012 SDValue Op0Op1
= Op0
->getOperand(1);
1014 if (Op1
== Op0Op0
&& Op2
== Op0Op1
)
1015 return DAG
.getNode(Signed
? MipsISD::VSMIN
: MipsISD::VUMIN
, SDLoc(N
),
1017 else if (Op1
== Op0Op1
&& Op2
== Op0Op0
)
1018 return DAG
.getNode(Signed
? MipsISD::VSMAX
: MipsISD::VUMAX
, SDLoc(N
),
1020 } else if ((Ty
== MVT::v2i16
) || (Ty
== MVT::v4i8
)) {
1021 SDValue SetCC
= N
->getOperand(0);
1023 if (SetCC
.getOpcode() != MipsISD::SETCC_DSP
)
1026 return DAG
.getNode(MipsISD::SELECT_CC_DSP
, SDLoc(N
), Ty
,
1027 SetCC
.getOperand(0), SetCC
.getOperand(1),
1028 N
->getOperand(1), N
->getOperand(2), SetCC
.getOperand(2));
1034 static SDValue
performXORCombine(SDNode
*N
, SelectionDAG
&DAG
,
1035 const MipsSubtarget
&Subtarget
) {
1036 EVT Ty
= N
->getValueType(0);
1038 if (Subtarget
.hasMSA() && Ty
.is128BitVector() && Ty
.isInteger()) {
1039 // Try the following combines:
1040 // (xor (or $a, $b), (build_vector allones))
1041 // (xor (or $a, $b), (bitcast (build_vector allones)))
1042 SDValue Op0
= N
->getOperand(0);
1043 SDValue Op1
= N
->getOperand(1);
1046 if (ISD::isBuildVectorAllOnes(Op0
.getNode()))
1048 else if (ISD::isBuildVectorAllOnes(Op1
.getNode()))
1053 if (NotOp
->getOpcode() == ISD::OR
)
1054 return DAG
.getNode(MipsISD::VNOR
, SDLoc(N
), Ty
, NotOp
->getOperand(0),
1055 NotOp
->getOperand(1));
1062 MipsSETargetLowering::PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
1063 SelectionDAG
&DAG
= DCI
.DAG
;
1066 switch (N
->getOpcode()) {
1068 return performADDECombine(N
, DAG
, DCI
, Subtarget
);
1070 Val
= performANDCombine(N
, DAG
, DCI
, Subtarget
);
1073 Val
= performORCombine(N
, DAG
, DCI
, Subtarget
);
1076 return performSUBECombine(N
, DAG
, DCI
, Subtarget
);
1078 return performMULCombine(N
, DAG
, DCI
, this);
1080 return performSHLCombine(N
, DAG
, DCI
, Subtarget
);
1082 return performSRACombine(N
, DAG
, DCI
, Subtarget
);
1084 return performSRLCombine(N
, DAG
, DCI
, Subtarget
);
1086 return performVSELECTCombine(N
, DAG
);
1088 Val
= performXORCombine(N
, DAG
, Subtarget
);
1091 Val
= performSETCCCombine(N
, DAG
);
1095 if (Val
.getNode()) {
1096 DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1097 N
->printrWithDepth(dbgs(), &DAG
);
1098 dbgs() << "\n=> \n";
1099 Val
.getNode()->printrWithDepth(dbgs(), &DAG
);
1104 return MipsTargetLowering::PerformDAGCombine(N
, DCI
);
1108 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr
*MI
,
1109 MachineBasicBlock
*BB
) const {
1110 switch (MI
->getOpcode()) {
1112 return MipsTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
1113 case Mips::BPOSGE32_PSEUDO
:
1114 return emitBPOSGE32(MI
, BB
);
1115 case Mips::SNZ_B_PSEUDO
:
1116 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_B
);
1117 case Mips::SNZ_H_PSEUDO
:
1118 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_H
);
1119 case Mips::SNZ_W_PSEUDO
:
1120 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_W
);
1121 case Mips::SNZ_D_PSEUDO
:
1122 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_D
);
1123 case Mips::SNZ_V_PSEUDO
:
1124 return emitMSACBranchPseudo(MI
, BB
, Mips::BNZ_V
);
1125 case Mips::SZ_B_PSEUDO
:
1126 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_B
);
1127 case Mips::SZ_H_PSEUDO
:
1128 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_H
);
1129 case Mips::SZ_W_PSEUDO
:
1130 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_W
);
1131 case Mips::SZ_D_PSEUDO
:
1132 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_D
);
1133 case Mips::SZ_V_PSEUDO
:
1134 return emitMSACBranchPseudo(MI
, BB
, Mips::BZ_V
);
1135 case Mips::COPY_FW_PSEUDO
:
1136 return emitCOPY_FW(MI
, BB
);
1137 case Mips::COPY_FD_PSEUDO
:
1138 return emitCOPY_FD(MI
, BB
);
1139 case Mips::INSERT_FW_PSEUDO
:
1140 return emitINSERT_FW(MI
, BB
);
1141 case Mips::INSERT_FD_PSEUDO
:
1142 return emitINSERT_FD(MI
, BB
);
1143 case Mips::INSERT_B_VIDX_PSEUDO
:
1144 return emitINSERT_DF_VIDX(MI
, BB
, 1, false);
1145 case Mips::INSERT_H_VIDX_PSEUDO
:
1146 return emitINSERT_DF_VIDX(MI
, BB
, 2, false);
1147 case Mips::INSERT_W_VIDX_PSEUDO
:
1148 return emitINSERT_DF_VIDX(MI
, BB
, 4, false);
1149 case Mips::INSERT_D_VIDX_PSEUDO
:
1150 return emitINSERT_DF_VIDX(MI
, BB
, 8, false);
1151 case Mips::INSERT_FW_VIDX_PSEUDO
:
1152 return emitINSERT_DF_VIDX(MI
, BB
, 4, true);
1153 case Mips::INSERT_FD_VIDX_PSEUDO
:
1154 return emitINSERT_DF_VIDX(MI
, BB
, 8, true);
1155 case Mips::FILL_FW_PSEUDO
:
1156 return emitFILL_FW(MI
, BB
);
1157 case Mips::FILL_FD_PSEUDO
:
1158 return emitFILL_FD(MI
, BB
);
1159 case Mips::FEXP2_W_1_PSEUDO
:
1160 return emitFEXP2_W_1(MI
, BB
);
1161 case Mips::FEXP2_D_1_PSEUDO
:
1162 return emitFEXP2_D_1(MI
, BB
);
1166 bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1167 const CCState
&CCInfo
, unsigned NextStackOffset
,
1168 const MipsFunctionInfo
&FI
) const {
1169 if (!EnableMipsTailCalls
)
1172 // Return false if either the callee or caller has a byval argument.
1173 if (CCInfo
.getInRegsParamsCount() > 0 || FI
.hasByvalArg())
1176 // Return true if the callee's argument area is no larger than the
1178 return NextStackOffset
<= FI
.getIncomingArgSize();
1181 void MipsSETargetLowering::
1182 getOpndList(SmallVectorImpl
<SDValue
> &Ops
,
1183 std::deque
< std::pair
<unsigned, SDValue
> > &RegsToPass
,
1184 bool IsPICCall
, bool GlobalOrExternal
, bool InternalLinkage
,
1185 bool IsCallReloc
, CallLoweringInfo
&CLI
, SDValue Callee
,
1186 SDValue Chain
) const {
1187 Ops
.push_back(Callee
);
1188 MipsTargetLowering::getOpndList(Ops
, RegsToPass
, IsPICCall
, GlobalOrExternal
,
1189 InternalLinkage
, IsCallReloc
, CLI
, Callee
,
1193 SDValue
MipsSETargetLowering::lowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
1194 LoadSDNode
&Nd
= *cast
<LoadSDNode
>(Op
);
1196 if (Nd
.getMemoryVT() != MVT::f64
|| !NoDPLoadStore
)
1197 return MipsTargetLowering::lowerLOAD(Op
, DAG
);
1199 // Replace a double precision load with two i32 loads and a buildpair64.
1201 SDValue Ptr
= Nd
.getBasePtr(), Chain
= Nd
.getChain();
1202 EVT PtrVT
= Ptr
.getValueType();
1204 // i32 load from lower address.
1205 SDValue Lo
= DAG
.getLoad(MVT::i32
, DL
, Chain
, Ptr
,
1206 MachinePointerInfo(), Nd
.isVolatile(),
1207 Nd
.isNonTemporal(), Nd
.isInvariant(),
1210 // i32 load from higher address.
1211 Ptr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Ptr
, DAG
.getConstant(4, PtrVT
));
1212 SDValue Hi
= DAG
.getLoad(MVT::i32
, DL
, Lo
.getValue(1), Ptr
,
1213 MachinePointerInfo(), Nd
.isVolatile(),
1214 Nd
.isNonTemporal(), Nd
.isInvariant(),
1215 std::min(Nd
.getAlignment(), 4U));
1217 if (!Subtarget
.isLittle())
1220 SDValue BP
= DAG
.getNode(MipsISD::BuildPairF64
, DL
, MVT::f64
, Lo
, Hi
);
1221 SDValue Ops
[2] = {BP
, Hi
.getValue(1)};
1222 return DAG
.getMergeValues(Ops
, DL
);
1225 SDValue
MipsSETargetLowering::lowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
1226 StoreSDNode
&Nd
= *cast
<StoreSDNode
>(Op
);
1228 if (Nd
.getMemoryVT() != MVT::f64
|| !NoDPLoadStore
)
1229 return MipsTargetLowering::lowerSTORE(Op
, DAG
);
1231 // Replace a double precision store with two extractelement64s and i32 stores.
1233 SDValue Val
= Nd
.getValue(), Ptr
= Nd
.getBasePtr(), Chain
= Nd
.getChain();
1234 EVT PtrVT
= Ptr
.getValueType();
1235 SDValue Lo
= DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
,
1236 Val
, DAG
.getConstant(0, MVT::i32
));
1237 SDValue Hi
= DAG
.getNode(MipsISD::ExtractElementF64
, DL
, MVT::i32
,
1238 Val
, DAG
.getConstant(1, MVT::i32
));
1240 if (!Subtarget
.isLittle())
1243 // i32 store to lower address.
1244 Chain
= DAG
.getStore(Chain
, DL
, Lo
, Ptr
, MachinePointerInfo(),
1245 Nd
.isVolatile(), Nd
.isNonTemporal(), Nd
.getAlignment(),
1248 // i32 store to higher address.
1249 Ptr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Ptr
, DAG
.getConstant(4, PtrVT
));
1250 return DAG
.getStore(Chain
, DL
, Hi
, Ptr
, MachinePointerInfo(),
1251 Nd
.isVolatile(), Nd
.isNonTemporal(),
1252 std::min(Nd
.getAlignment(), 4U), Nd
.getAAInfo());
1255 SDValue
MipsSETargetLowering::lowerMulDiv(SDValue Op
, unsigned NewOpc
,
1256 bool HasLo
, bool HasHi
,
1257 SelectionDAG
&DAG
) const {
1258 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1259 assert(!Subtarget
.hasMips32r6());
1261 EVT Ty
= Op
.getOperand(0).getValueType();
1263 SDValue Mult
= DAG
.getNode(NewOpc
, DL
, MVT::Untyped
,
1264 Op
.getOperand(0), Op
.getOperand(1));
1268 Lo
= DAG
.getNode(MipsISD::MFLO
, DL
, Ty
, Mult
);
1270 Hi
= DAG
.getNode(MipsISD::MFHI
, DL
, Ty
, Mult
);
1272 if (!HasLo
|| !HasHi
)
1273 return HasLo
? Lo
: Hi
;
1275 SDValue Vals
[] = { Lo
, Hi
};
1276 return DAG
.getMergeValues(Vals
, DL
);
1280 static SDValue
initAccumulator(SDValue In
, SDLoc DL
, SelectionDAG
&DAG
) {
1281 SDValue InLo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
, In
,
1282 DAG
.getConstant(0, MVT::i32
));
1283 SDValue InHi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i32
, In
,
1284 DAG
.getConstant(1, MVT::i32
));
1285 return DAG
.getNode(MipsISD::MTLOHI
, DL
, MVT::Untyped
, InLo
, InHi
);
1288 static SDValue
extractLOHI(SDValue Op
, SDLoc DL
, SelectionDAG
&DAG
) {
1289 SDValue Lo
= DAG
.getNode(MipsISD::MFLO
, DL
, MVT::i32
, Op
);
1290 SDValue Hi
= DAG
.getNode(MipsISD::MFHI
, DL
, MVT::i32
, Op
);
1291 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i64
, Lo
, Hi
);
1294 // This function expands mips intrinsic nodes which have 64-bit input operands
1295 // or output values.
1297 // out64 = intrinsic-node in64
1299 // lo = copy (extract-element (in64, 0))
1300 // hi = copy (extract-element (in64, 1))
1301 // mips-specific-node
1304 // out64 = merge-values (v0, v1)
1306 static SDValue
lowerDSPIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Opc
) {
1308 bool HasChainIn
= Op
->getOperand(0).getValueType() == MVT::Other
;
1309 SmallVector
<SDValue
, 3> Ops
;
1312 // See if Op has a chain input.
1314 Ops
.push_back(Op
->getOperand(OpNo
++));
1316 // The next operand is the intrinsic opcode.
1317 assert(Op
->getOperand(OpNo
).getOpcode() == ISD::TargetConstant
);
1319 // See if the next operand has type i64.
1320 SDValue Opnd
= Op
->getOperand(++OpNo
), In64
;
1322 if (Opnd
.getValueType() == MVT::i64
)
1323 In64
= initAccumulator(Opnd
, DL
, DAG
);
1325 Ops
.push_back(Opnd
);
1327 // Push the remaining operands.
1328 for (++OpNo
; OpNo
< Op
->getNumOperands(); ++OpNo
)
1329 Ops
.push_back(Op
->getOperand(OpNo
));
1331 // Add In64 to the end of the list.
1333 Ops
.push_back(In64
);
1336 SmallVector
<EVT
, 2> ResTys
;
1338 for (SDNode::value_iterator I
= Op
->value_begin(), E
= Op
->value_end();
1340 ResTys
.push_back((*I
== MVT::i64
) ? MVT::Untyped
: *I
);
1343 SDValue Val
= DAG
.getNode(Opc
, DL
, ResTys
, Ops
);
1344 SDValue Out
= (ResTys
[0] == MVT::Untyped
) ? extractLOHI(Val
, DL
, DAG
) : Val
;
1349 assert(Val
->getValueType(1) == MVT::Other
);
1350 SDValue Vals
[] = { Out
, SDValue(Val
.getNode(), 1) };
1351 return DAG
.getMergeValues(Vals
, DL
);
1354 // Lower an MSA copy intrinsic into the specified SelectionDAG node
1355 static SDValue
lowerMSACopyIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Opc
) {
1357 SDValue Vec
= Op
->getOperand(1);
1358 SDValue Idx
= Op
->getOperand(2);
1359 EVT ResTy
= Op
->getValueType(0);
1360 EVT EltTy
= Vec
->getValueType(0).getVectorElementType();
1362 SDValue Result
= DAG
.getNode(Opc
, DL
, ResTy
, Vec
, Idx
,
1363 DAG
.getValueType(EltTy
));
1368 static SDValue
lowerMSASplatZExt(SDValue Op
, unsigned OpNr
, SelectionDAG
&DAG
) {
1369 EVT ResVecTy
= Op
->getValueType(0);
1370 EVT ViaVecTy
= ResVecTy
;
1373 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1374 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1377 SDValue LaneB
= Op
->getOperand(2);
1379 if (ResVecTy
== MVT::v2i64
) {
1380 LaneA
= DAG
.getConstant(0, MVT::i32
);
1381 ViaVecTy
= MVT::v4i32
;
1385 SDValue Ops
[16] = { LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
,
1386 LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
, LaneA
, LaneB
};
1388 SDValue Result
= DAG
.getNode(ISD::BUILD_VECTOR
, DL
, ViaVecTy
,
1389 makeArrayRef(Ops
, ViaVecTy
.getVectorNumElements()));
1391 if (ViaVecTy
!= ResVecTy
)
1392 Result
= DAG
.getNode(ISD::BITCAST
, DL
, ResVecTy
, Result
);
1397 static SDValue
lowerMSASplatImm(SDValue Op
, unsigned ImmOp
, SelectionDAG
&DAG
) {
1398 return DAG
.getConstant(Op
->getConstantOperandVal(ImmOp
), Op
->getValueType(0));
1401 static SDValue
getBuildVectorSplat(EVT VecTy
, SDValue SplatValue
,
1402 bool BigEndian
, SelectionDAG
&DAG
) {
1403 EVT ViaVecTy
= VecTy
;
1404 SDValue SplatValueA
= SplatValue
;
1405 SDValue SplatValueB
= SplatValue
;
1406 SDLoc
DL(SplatValue
);
1408 if (VecTy
== MVT::v2i64
) {
1409 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1410 ViaVecTy
= MVT::v4i32
;
1412 SplatValueA
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, SplatValue
);
1413 SplatValueB
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, SplatValue
,
1414 DAG
.getConstant(32, MVT::i32
));
1415 SplatValueB
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, SplatValueB
);
1418 // We currently hold the parts in little endian order. Swap them if
1421 std::swap(SplatValueA
, SplatValueB
);
1423 SDValue Ops
[16] = { SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1424 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1425 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
,
1426 SplatValueA
, SplatValueB
, SplatValueA
, SplatValueB
};
1428 SDValue Result
= DAG
.getNode(ISD::BUILD_VECTOR
, DL
, ViaVecTy
,
1429 makeArrayRef(Ops
, ViaVecTy
.getVectorNumElements()));
1431 if (VecTy
!= ViaVecTy
)
1432 Result
= DAG
.getNode(ISD::BITCAST
, DL
, VecTy
, Result
);
1437 static SDValue
lowerMSABinaryBitImmIntr(SDValue Op
, SelectionDAG
&DAG
,
1438 unsigned Opc
, SDValue Imm
,
1440 EVT VecTy
= Op
->getValueType(0);
1444 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1446 if (VecTy
== MVT::v2i64
) {
1447 if (ConstantSDNode
*CImm
= dyn_cast
<ConstantSDNode
>(Imm
)) {
1448 APInt BitImm
= APInt(64, 1) << CImm
->getAPIntValue();
1450 SDValue BitImmHiOp
= DAG
.getConstant(BitImm
.lshr(32).trunc(32), MVT::i32
);
1451 SDValue BitImmLoOp
= DAG
.getConstant(BitImm
.trunc(32), MVT::i32
);
1454 std::swap(BitImmLoOp
, BitImmHiOp
);
1457 DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
,
1458 DAG
.getNode(ISD::BUILD_VECTOR
, DL
, MVT::v4i32
, BitImmLoOp
,
1459 BitImmHiOp
, BitImmLoOp
, BitImmHiOp
));
1463 if (!Exp2Imm
.getNode()) {
1464 // We couldnt constant fold, do a vector shift instead
1466 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1467 // only values 0-63 are valid.
1468 if (VecTy
== MVT::v2i64
)
1469 Imm
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, Imm
);
1471 Exp2Imm
= getBuildVectorSplat(VecTy
, Imm
, BigEndian
, DAG
);
1474 DAG
.getNode(ISD::SHL
, DL
, VecTy
, DAG
.getConstant(1, VecTy
), Exp2Imm
);
1477 return DAG
.getNode(Opc
, DL
, VecTy
, Op
->getOperand(1), Exp2Imm
);
1480 static SDValue
lowerMSABitClear(SDValue Op
, SelectionDAG
&DAG
) {
1481 EVT ResTy
= Op
->getValueType(0);
1483 SDValue One
= DAG
.getConstant(1, ResTy
);
1484 SDValue Bit
= DAG
.getNode(ISD::SHL
, DL
, ResTy
, One
, Op
->getOperand(2));
1486 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Op
->getOperand(1),
1487 DAG
.getNOT(DL
, Bit
, ResTy
));
1490 static SDValue
lowerMSABitClearImm(SDValue Op
, SelectionDAG
&DAG
) {
1492 EVT ResTy
= Op
->getValueType(0);
1493 APInt BitImm
= APInt(ResTy
.getVectorElementType().getSizeInBits(), 1)
1494 << cast
<ConstantSDNode
>(Op
->getOperand(2))->getAPIntValue();
1495 SDValue BitMask
= DAG
.getConstant(~BitImm
, ResTy
);
1497 return DAG
.getNode(ISD::AND
, DL
, ResTy
, Op
->getOperand(1), BitMask
);
1500 SDValue
MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
1501 SelectionDAG
&DAG
) const {
1504 switch (cast
<ConstantSDNode
>(Op
->getOperand(0))->getZExtValue()) {
1507 case Intrinsic::mips_shilo
:
1508 return lowerDSPIntr(Op
, DAG
, MipsISD::SHILO
);
1509 case Intrinsic::mips_dpau_h_qbl
:
1510 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAU_H_QBL
);
1511 case Intrinsic::mips_dpau_h_qbr
:
1512 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAU_H_QBR
);
1513 case Intrinsic::mips_dpsu_h_qbl
:
1514 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSU_H_QBL
);
1515 case Intrinsic::mips_dpsu_h_qbr
:
1516 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSU_H_QBR
);
1517 case Intrinsic::mips_dpa_w_ph
:
1518 return lowerDSPIntr(Op
, DAG
, MipsISD::DPA_W_PH
);
1519 case Intrinsic::mips_dps_w_ph
:
1520 return lowerDSPIntr(Op
, DAG
, MipsISD::DPS_W_PH
);
1521 case Intrinsic::mips_dpax_w_ph
:
1522 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAX_W_PH
);
1523 case Intrinsic::mips_dpsx_w_ph
:
1524 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSX_W_PH
);
1525 case Intrinsic::mips_mulsa_w_ph
:
1526 return lowerDSPIntr(Op
, DAG
, MipsISD::MULSA_W_PH
);
1527 case Intrinsic::mips_mult
:
1528 return lowerDSPIntr(Op
, DAG
, MipsISD::Mult
);
1529 case Intrinsic::mips_multu
:
1530 return lowerDSPIntr(Op
, DAG
, MipsISD::Multu
);
1531 case Intrinsic::mips_madd
:
1532 return lowerDSPIntr(Op
, DAG
, MipsISD::MAdd
);
1533 case Intrinsic::mips_maddu
:
1534 return lowerDSPIntr(Op
, DAG
, MipsISD::MAddu
);
1535 case Intrinsic::mips_msub
:
1536 return lowerDSPIntr(Op
, DAG
, MipsISD::MSub
);
1537 case Intrinsic::mips_msubu
:
1538 return lowerDSPIntr(Op
, DAG
, MipsISD::MSubu
);
1539 case Intrinsic::mips_addv_b
:
1540 case Intrinsic::mips_addv_h
:
1541 case Intrinsic::mips_addv_w
:
1542 case Intrinsic::mips_addv_d
:
1543 return DAG
.getNode(ISD::ADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1545 case Intrinsic::mips_addvi_b
:
1546 case Intrinsic::mips_addvi_h
:
1547 case Intrinsic::mips_addvi_w
:
1548 case Intrinsic::mips_addvi_d
:
1549 return DAG
.getNode(ISD::ADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1550 lowerMSASplatImm(Op
, 2, DAG
));
1551 case Intrinsic::mips_and_v
:
1552 return DAG
.getNode(ISD::AND
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1554 case Intrinsic::mips_andi_b
:
1555 return DAG
.getNode(ISD::AND
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1556 lowerMSASplatImm(Op
, 2, DAG
));
1557 case Intrinsic::mips_bclr_b
:
1558 case Intrinsic::mips_bclr_h
:
1559 case Intrinsic::mips_bclr_w
:
1560 case Intrinsic::mips_bclr_d
:
1561 return lowerMSABitClear(Op
, DAG
);
1562 case Intrinsic::mips_bclri_b
:
1563 case Intrinsic::mips_bclri_h
:
1564 case Intrinsic::mips_bclri_w
:
1565 case Intrinsic::mips_bclri_d
:
1566 return lowerMSABitClearImm(Op
, DAG
);
1567 case Intrinsic::mips_binsli_b
:
1568 case Intrinsic::mips_binsli_h
:
1569 case Intrinsic::mips_binsli_w
:
1570 case Intrinsic::mips_binsli_d
: {
1571 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1572 EVT VecTy
= Op
->getValueType(0);
1573 EVT EltTy
= VecTy
.getVectorElementType();
1574 APInt Mask
= APInt::getHighBitsSet(EltTy
.getSizeInBits(),
1575 Op
->getConstantOperandVal(3));
1576 return DAG
.getNode(ISD::VSELECT
, DL
, VecTy
,
1577 DAG
.getConstant(Mask
, VecTy
, true), Op
->getOperand(2),
1580 case Intrinsic::mips_binsri_b
:
1581 case Intrinsic::mips_binsri_h
:
1582 case Intrinsic::mips_binsri_w
:
1583 case Intrinsic::mips_binsri_d
: {
1584 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1585 EVT VecTy
= Op
->getValueType(0);
1586 EVT EltTy
= VecTy
.getVectorElementType();
1587 APInt Mask
= APInt::getLowBitsSet(EltTy
.getSizeInBits(),
1588 Op
->getConstantOperandVal(3));
1589 return DAG
.getNode(ISD::VSELECT
, DL
, VecTy
,
1590 DAG
.getConstant(Mask
, VecTy
, true), Op
->getOperand(2),
1593 case Intrinsic::mips_bmnz_v
:
1594 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0), Op
->getOperand(3),
1595 Op
->getOperand(2), Op
->getOperand(1));
1596 case Intrinsic::mips_bmnzi_b
:
1597 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1598 lowerMSASplatImm(Op
, 3, DAG
), Op
->getOperand(2),
1600 case Intrinsic::mips_bmz_v
:
1601 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0), Op
->getOperand(3),
1602 Op
->getOperand(1), Op
->getOperand(2));
1603 case Intrinsic::mips_bmzi_b
:
1604 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1605 lowerMSASplatImm(Op
, 3, DAG
), Op
->getOperand(1),
1607 case Intrinsic::mips_bneg_b
:
1608 case Intrinsic::mips_bneg_h
:
1609 case Intrinsic::mips_bneg_w
:
1610 case Intrinsic::mips_bneg_d
: {
1611 EVT VecTy
= Op
->getValueType(0);
1612 SDValue One
= DAG
.getConstant(1, VecTy
);
1614 return DAG
.getNode(ISD::XOR
, DL
, VecTy
, Op
->getOperand(1),
1615 DAG
.getNode(ISD::SHL
, DL
, VecTy
, One
,
1616 Op
->getOperand(2)));
1618 case Intrinsic::mips_bnegi_b
:
1619 case Intrinsic::mips_bnegi_h
:
1620 case Intrinsic::mips_bnegi_w
:
1621 case Intrinsic::mips_bnegi_d
:
1622 return lowerMSABinaryBitImmIntr(Op
, DAG
, ISD::XOR
, Op
->getOperand(2),
1623 !Subtarget
.isLittle());
1624 case Intrinsic::mips_bnz_b
:
1625 case Intrinsic::mips_bnz_h
:
1626 case Intrinsic::mips_bnz_w
:
1627 case Intrinsic::mips_bnz_d
:
1628 return DAG
.getNode(MipsISD::VALL_NONZERO
, DL
, Op
->getValueType(0),
1630 case Intrinsic::mips_bnz_v
:
1631 return DAG
.getNode(MipsISD::VANY_NONZERO
, DL
, Op
->getValueType(0),
1633 case Intrinsic::mips_bsel_v
:
1634 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1635 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1636 Op
->getOperand(1), Op
->getOperand(3),
1638 case Intrinsic::mips_bseli_b
:
1639 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1640 return DAG
.getNode(ISD::VSELECT
, DL
, Op
->getValueType(0),
1641 Op
->getOperand(1), lowerMSASplatImm(Op
, 3, DAG
),
1643 case Intrinsic::mips_bset_b
:
1644 case Intrinsic::mips_bset_h
:
1645 case Intrinsic::mips_bset_w
:
1646 case Intrinsic::mips_bset_d
: {
1647 EVT VecTy
= Op
->getValueType(0);
1648 SDValue One
= DAG
.getConstant(1, VecTy
);
1650 return DAG
.getNode(ISD::OR
, DL
, VecTy
, Op
->getOperand(1),
1651 DAG
.getNode(ISD::SHL
, DL
, VecTy
, One
,
1652 Op
->getOperand(2)));
1654 case Intrinsic::mips_bseti_b
:
1655 case Intrinsic::mips_bseti_h
:
1656 case Intrinsic::mips_bseti_w
:
1657 case Intrinsic::mips_bseti_d
:
1658 return lowerMSABinaryBitImmIntr(Op
, DAG
, ISD::OR
, Op
->getOperand(2),
1659 !Subtarget
.isLittle());
1660 case Intrinsic::mips_bz_b
:
1661 case Intrinsic::mips_bz_h
:
1662 case Intrinsic::mips_bz_w
:
1663 case Intrinsic::mips_bz_d
:
1664 return DAG
.getNode(MipsISD::VALL_ZERO
, DL
, Op
->getValueType(0),
1666 case Intrinsic::mips_bz_v
:
1667 return DAG
.getNode(MipsISD::VANY_ZERO
, DL
, Op
->getValueType(0),
1669 case Intrinsic::mips_ceq_b
:
1670 case Intrinsic::mips_ceq_h
:
1671 case Intrinsic::mips_ceq_w
:
1672 case Intrinsic::mips_ceq_d
:
1673 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1674 Op
->getOperand(2), ISD::SETEQ
);
1675 case Intrinsic::mips_ceqi_b
:
1676 case Intrinsic::mips_ceqi_h
:
1677 case Intrinsic::mips_ceqi_w
:
1678 case Intrinsic::mips_ceqi_d
:
1679 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1680 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETEQ
);
1681 case Intrinsic::mips_cle_s_b
:
1682 case Intrinsic::mips_cle_s_h
:
1683 case Intrinsic::mips_cle_s_w
:
1684 case Intrinsic::mips_cle_s_d
:
1685 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1686 Op
->getOperand(2), ISD::SETLE
);
1687 case Intrinsic::mips_clei_s_b
:
1688 case Intrinsic::mips_clei_s_h
:
1689 case Intrinsic::mips_clei_s_w
:
1690 case Intrinsic::mips_clei_s_d
:
1691 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1692 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETLE
);
1693 case Intrinsic::mips_cle_u_b
:
1694 case Intrinsic::mips_cle_u_h
:
1695 case Intrinsic::mips_cle_u_w
:
1696 case Intrinsic::mips_cle_u_d
:
1697 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1698 Op
->getOperand(2), ISD::SETULE
);
1699 case Intrinsic::mips_clei_u_b
:
1700 case Intrinsic::mips_clei_u_h
:
1701 case Intrinsic::mips_clei_u_w
:
1702 case Intrinsic::mips_clei_u_d
:
1703 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1704 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETULE
);
1705 case Intrinsic::mips_clt_s_b
:
1706 case Intrinsic::mips_clt_s_h
:
1707 case Intrinsic::mips_clt_s_w
:
1708 case Intrinsic::mips_clt_s_d
:
1709 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1710 Op
->getOperand(2), ISD::SETLT
);
1711 case Intrinsic::mips_clti_s_b
:
1712 case Intrinsic::mips_clti_s_h
:
1713 case Intrinsic::mips_clti_s_w
:
1714 case Intrinsic::mips_clti_s_d
:
1715 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1716 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETLT
);
1717 case Intrinsic::mips_clt_u_b
:
1718 case Intrinsic::mips_clt_u_h
:
1719 case Intrinsic::mips_clt_u_w
:
1720 case Intrinsic::mips_clt_u_d
:
1721 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1722 Op
->getOperand(2), ISD::SETULT
);
1723 case Intrinsic::mips_clti_u_b
:
1724 case Intrinsic::mips_clti_u_h
:
1725 case Intrinsic::mips_clti_u_w
:
1726 case Intrinsic::mips_clti_u_d
:
1727 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1728 lowerMSASplatImm(Op
, 2, DAG
), ISD::SETULT
);
1729 case Intrinsic::mips_copy_s_b
:
1730 case Intrinsic::mips_copy_s_h
:
1731 case Intrinsic::mips_copy_s_w
:
1732 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_SEXT_ELT
);
1733 case Intrinsic::mips_copy_s_d
:
1734 if (Subtarget
.hasMips64())
1735 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1736 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_SEXT_ELT
);
1738 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1739 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1740 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op
),
1741 Op
->getValueType(0), Op
->getOperand(1),
1744 case Intrinsic::mips_copy_u_b
:
1745 case Intrinsic::mips_copy_u_h
:
1746 case Intrinsic::mips_copy_u_w
:
1747 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_ZEXT_ELT
);
1748 case Intrinsic::mips_copy_u_d
:
1749 if (Subtarget
.hasMips64())
1750 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1751 return lowerMSACopyIntr(Op
, DAG
, MipsISD::VEXTRACT_ZEXT_ELT
);
1753 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1754 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1755 // Note: When i64 is illegal, this results in copy_s.w instructions
1756 // instead of copy_u.w instructions. This makes no difference to the
1757 // behaviour since i64 is only illegal when the register file is 32-bit.
1758 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op
),
1759 Op
->getValueType(0), Op
->getOperand(1),
1762 case Intrinsic::mips_div_s_b
:
1763 case Intrinsic::mips_div_s_h
:
1764 case Intrinsic::mips_div_s_w
:
1765 case Intrinsic::mips_div_s_d
:
1766 return DAG
.getNode(ISD::SDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1768 case Intrinsic::mips_div_u_b
:
1769 case Intrinsic::mips_div_u_h
:
1770 case Intrinsic::mips_div_u_w
:
1771 case Intrinsic::mips_div_u_d
:
1772 return DAG
.getNode(ISD::UDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1774 case Intrinsic::mips_fadd_w
:
1775 case Intrinsic::mips_fadd_d
:
1776 return DAG
.getNode(ISD::FADD
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1778 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1779 case Intrinsic::mips_fceq_w
:
1780 case Intrinsic::mips_fceq_d
:
1781 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1782 Op
->getOperand(2), ISD::SETOEQ
);
1783 case Intrinsic::mips_fcle_w
:
1784 case Intrinsic::mips_fcle_d
:
1785 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1786 Op
->getOperand(2), ISD::SETOLE
);
1787 case Intrinsic::mips_fclt_w
:
1788 case Intrinsic::mips_fclt_d
:
1789 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1790 Op
->getOperand(2), ISD::SETOLT
);
1791 case Intrinsic::mips_fcne_w
:
1792 case Intrinsic::mips_fcne_d
:
1793 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1794 Op
->getOperand(2), ISD::SETONE
);
1795 case Intrinsic::mips_fcor_w
:
1796 case Intrinsic::mips_fcor_d
:
1797 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1798 Op
->getOperand(2), ISD::SETO
);
1799 case Intrinsic::mips_fcueq_w
:
1800 case Intrinsic::mips_fcueq_d
:
1801 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1802 Op
->getOperand(2), ISD::SETUEQ
);
1803 case Intrinsic::mips_fcule_w
:
1804 case Intrinsic::mips_fcule_d
:
1805 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1806 Op
->getOperand(2), ISD::SETULE
);
1807 case Intrinsic::mips_fcult_w
:
1808 case Intrinsic::mips_fcult_d
:
1809 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1810 Op
->getOperand(2), ISD::SETULT
);
1811 case Intrinsic::mips_fcun_w
:
1812 case Intrinsic::mips_fcun_d
:
1813 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1814 Op
->getOperand(2), ISD::SETUO
);
1815 case Intrinsic::mips_fcune_w
:
1816 case Intrinsic::mips_fcune_d
:
1817 return DAG
.getSetCC(DL
, Op
->getValueType(0), Op
->getOperand(1),
1818 Op
->getOperand(2), ISD::SETUNE
);
1819 case Intrinsic::mips_fdiv_w
:
1820 case Intrinsic::mips_fdiv_d
:
1821 return DAG
.getNode(ISD::FDIV
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1823 case Intrinsic::mips_ffint_u_w
:
1824 case Intrinsic::mips_ffint_u_d
:
1825 return DAG
.getNode(ISD::UINT_TO_FP
, DL
, Op
->getValueType(0),
1827 case Intrinsic::mips_ffint_s_w
:
1828 case Intrinsic::mips_ffint_s_d
:
1829 return DAG
.getNode(ISD::SINT_TO_FP
, DL
, Op
->getValueType(0),
1831 case Intrinsic::mips_fill_b
:
1832 case Intrinsic::mips_fill_h
:
1833 case Intrinsic::mips_fill_w
:
1834 case Intrinsic::mips_fill_d
: {
1835 SmallVector
<SDValue
, 16> Ops
;
1836 EVT ResTy
= Op
->getValueType(0);
1838 for (unsigned i
= 0; i
< ResTy
.getVectorNumElements(); ++i
)
1839 Ops
.push_back(Op
->getOperand(1));
1841 // If ResTy is v2i64 then the type legalizer will break this node down into
1842 // an equivalent v4i32.
1843 return DAG
.getNode(ISD::BUILD_VECTOR
, DL
, ResTy
, Ops
);
1845 case Intrinsic::mips_fexp2_w
:
1846 case Intrinsic::mips_fexp2_d
: {
1847 EVT ResTy
= Op
->getValueType(0);
1849 ISD::FMUL
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1850 DAG
.getNode(ISD::FEXP2
, SDLoc(Op
), ResTy
, Op
->getOperand(2)));
1852 case Intrinsic::mips_flog2_w
:
1853 case Intrinsic::mips_flog2_d
:
1854 return DAG
.getNode(ISD::FLOG2
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1855 case Intrinsic::mips_fmadd_w
:
1856 case Intrinsic::mips_fmadd_d
:
1857 return DAG
.getNode(ISD::FMA
, SDLoc(Op
), Op
->getValueType(0),
1858 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
1859 case Intrinsic::mips_fmul_w
:
1860 case Intrinsic::mips_fmul_d
:
1861 return DAG
.getNode(ISD::FMUL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1863 case Intrinsic::mips_fmsub_w
:
1864 case Intrinsic::mips_fmsub_d
: {
1865 EVT ResTy
= Op
->getValueType(0);
1866 return DAG
.getNode(ISD::FSUB
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1867 DAG
.getNode(ISD::FMUL
, SDLoc(Op
), ResTy
,
1868 Op
->getOperand(2), Op
->getOperand(3)));
1870 case Intrinsic::mips_frint_w
:
1871 case Intrinsic::mips_frint_d
:
1872 return DAG
.getNode(ISD::FRINT
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1873 case Intrinsic::mips_fsqrt_w
:
1874 case Intrinsic::mips_fsqrt_d
:
1875 return DAG
.getNode(ISD::FSQRT
, DL
, Op
->getValueType(0), Op
->getOperand(1));
1876 case Intrinsic::mips_fsub_w
:
1877 case Intrinsic::mips_fsub_d
:
1878 return DAG
.getNode(ISD::FSUB
, DL
, Op
->getValueType(0), Op
->getOperand(1),
1880 case Intrinsic::mips_ftrunc_u_w
:
1881 case Intrinsic::mips_ftrunc_u_d
:
1882 return DAG
.getNode(ISD::FP_TO_UINT
, DL
, Op
->getValueType(0),
1884 case Intrinsic::mips_ftrunc_s_w
:
1885 case Intrinsic::mips_ftrunc_s_d
:
1886 return DAG
.getNode(ISD::FP_TO_SINT
, DL
, Op
->getValueType(0),
1888 case Intrinsic::mips_ilvev_b
:
1889 case Intrinsic::mips_ilvev_h
:
1890 case Intrinsic::mips_ilvev_w
:
1891 case Intrinsic::mips_ilvev_d
:
1892 return DAG
.getNode(MipsISD::ILVEV
, DL
, Op
->getValueType(0),
1893 Op
->getOperand(1), Op
->getOperand(2));
1894 case Intrinsic::mips_ilvl_b
:
1895 case Intrinsic::mips_ilvl_h
:
1896 case Intrinsic::mips_ilvl_w
:
1897 case Intrinsic::mips_ilvl_d
:
1898 return DAG
.getNode(MipsISD::ILVL
, DL
, Op
->getValueType(0),
1899 Op
->getOperand(1), Op
->getOperand(2));
1900 case Intrinsic::mips_ilvod_b
:
1901 case Intrinsic::mips_ilvod_h
:
1902 case Intrinsic::mips_ilvod_w
:
1903 case Intrinsic::mips_ilvod_d
:
1904 return DAG
.getNode(MipsISD::ILVOD
, DL
, Op
->getValueType(0),
1905 Op
->getOperand(1), Op
->getOperand(2));
1906 case Intrinsic::mips_ilvr_b
:
1907 case Intrinsic::mips_ilvr_h
:
1908 case Intrinsic::mips_ilvr_w
:
1909 case Intrinsic::mips_ilvr_d
:
1910 return DAG
.getNode(MipsISD::ILVR
, DL
, Op
->getValueType(0),
1911 Op
->getOperand(1), Op
->getOperand(2));
1912 case Intrinsic::mips_insert_b
:
1913 case Intrinsic::mips_insert_h
:
1914 case Intrinsic::mips_insert_w
:
1915 case Intrinsic::mips_insert_d
:
1916 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(Op
), Op
->getValueType(0),
1917 Op
->getOperand(1), Op
->getOperand(3), Op
->getOperand(2));
1918 case Intrinsic::mips_insve_b
:
1919 case Intrinsic::mips_insve_h
:
1920 case Intrinsic::mips_insve_w
:
1921 case Intrinsic::mips_insve_d
:
1922 return DAG
.getNode(MipsISD::INSVE
, DL
, Op
->getValueType(0),
1923 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3),
1924 DAG
.getConstant(0, MVT::i32
));
1925 case Intrinsic::mips_ldi_b
:
1926 case Intrinsic::mips_ldi_h
:
1927 case Intrinsic::mips_ldi_w
:
1928 case Intrinsic::mips_ldi_d
:
1929 return lowerMSASplatImm(Op
, 1, DAG
);
1930 case Intrinsic::mips_lsa
:
1931 case Intrinsic::mips_dlsa
: {
1932 EVT ResTy
= Op
->getValueType(0);
1933 return DAG
.getNode(ISD::ADD
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1934 DAG
.getNode(ISD::SHL
, SDLoc(Op
), ResTy
,
1935 Op
->getOperand(2), Op
->getOperand(3)));
1937 case Intrinsic::mips_maddv_b
:
1938 case Intrinsic::mips_maddv_h
:
1939 case Intrinsic::mips_maddv_w
:
1940 case Intrinsic::mips_maddv_d
: {
1941 EVT ResTy
= Op
->getValueType(0);
1942 return DAG
.getNode(ISD::ADD
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
1943 DAG
.getNode(ISD::MUL
, SDLoc(Op
), ResTy
,
1944 Op
->getOperand(2), Op
->getOperand(3)));
1946 case Intrinsic::mips_max_s_b
:
1947 case Intrinsic::mips_max_s_h
:
1948 case Intrinsic::mips_max_s_w
:
1949 case Intrinsic::mips_max_s_d
:
1950 return DAG
.getNode(MipsISD::VSMAX
, DL
, Op
->getValueType(0),
1951 Op
->getOperand(1), Op
->getOperand(2));
1952 case Intrinsic::mips_max_u_b
:
1953 case Intrinsic::mips_max_u_h
:
1954 case Intrinsic::mips_max_u_w
:
1955 case Intrinsic::mips_max_u_d
:
1956 return DAG
.getNode(MipsISD::VUMAX
, DL
, Op
->getValueType(0),
1957 Op
->getOperand(1), Op
->getOperand(2));
1958 case Intrinsic::mips_maxi_s_b
:
1959 case Intrinsic::mips_maxi_s_h
:
1960 case Intrinsic::mips_maxi_s_w
:
1961 case Intrinsic::mips_maxi_s_d
:
1962 return DAG
.getNode(MipsISD::VSMAX
, DL
, Op
->getValueType(0),
1963 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
1964 case Intrinsic::mips_maxi_u_b
:
1965 case Intrinsic::mips_maxi_u_h
:
1966 case Intrinsic::mips_maxi_u_w
:
1967 case Intrinsic::mips_maxi_u_d
:
1968 return DAG
.getNode(MipsISD::VUMAX
, DL
, Op
->getValueType(0),
1969 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
1970 case Intrinsic::mips_min_s_b
:
1971 case Intrinsic::mips_min_s_h
:
1972 case Intrinsic::mips_min_s_w
:
1973 case Intrinsic::mips_min_s_d
:
1974 return DAG
.getNode(MipsISD::VSMIN
, DL
, Op
->getValueType(0),
1975 Op
->getOperand(1), Op
->getOperand(2));
1976 case Intrinsic::mips_min_u_b
:
1977 case Intrinsic::mips_min_u_h
:
1978 case Intrinsic::mips_min_u_w
:
1979 case Intrinsic::mips_min_u_d
:
1980 return DAG
.getNode(MipsISD::VUMIN
, DL
, Op
->getValueType(0),
1981 Op
->getOperand(1), Op
->getOperand(2));
1982 case Intrinsic::mips_mini_s_b
:
1983 case Intrinsic::mips_mini_s_h
:
1984 case Intrinsic::mips_mini_s_w
:
1985 case Intrinsic::mips_mini_s_d
:
1986 return DAG
.getNode(MipsISD::VSMIN
, DL
, Op
->getValueType(0),
1987 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
1988 case Intrinsic::mips_mini_u_b
:
1989 case Intrinsic::mips_mini_u_h
:
1990 case Intrinsic::mips_mini_u_w
:
1991 case Intrinsic::mips_mini_u_d
:
1992 return DAG
.getNode(MipsISD::VUMIN
, DL
, Op
->getValueType(0),
1993 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
1994 case Intrinsic::mips_mod_s_b
:
1995 case Intrinsic::mips_mod_s_h
:
1996 case Intrinsic::mips_mod_s_w
:
1997 case Intrinsic::mips_mod_s_d
:
1998 return DAG
.getNode(ISD::SREM
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2000 case Intrinsic::mips_mod_u_b
:
2001 case Intrinsic::mips_mod_u_h
:
2002 case Intrinsic::mips_mod_u_w
:
2003 case Intrinsic::mips_mod_u_d
:
2004 return DAG
.getNode(ISD::UREM
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2006 case Intrinsic::mips_mulv_b
:
2007 case Intrinsic::mips_mulv_h
:
2008 case Intrinsic::mips_mulv_w
:
2009 case Intrinsic::mips_mulv_d
:
2010 return DAG
.getNode(ISD::MUL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2012 case Intrinsic::mips_msubv_b
:
2013 case Intrinsic::mips_msubv_h
:
2014 case Intrinsic::mips_msubv_w
:
2015 case Intrinsic::mips_msubv_d
: {
2016 EVT ResTy
= Op
->getValueType(0);
2017 return DAG
.getNode(ISD::SUB
, SDLoc(Op
), ResTy
, Op
->getOperand(1),
2018 DAG
.getNode(ISD::MUL
, SDLoc(Op
), ResTy
,
2019 Op
->getOperand(2), Op
->getOperand(3)));
2021 case Intrinsic::mips_nlzc_b
:
2022 case Intrinsic::mips_nlzc_h
:
2023 case Intrinsic::mips_nlzc_w
:
2024 case Intrinsic::mips_nlzc_d
:
2025 return DAG
.getNode(ISD::CTLZ
, DL
, Op
->getValueType(0), Op
->getOperand(1));
2026 case Intrinsic::mips_nor_v
: {
2027 SDValue Res
= DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2028 Op
->getOperand(1), Op
->getOperand(2));
2029 return DAG
.getNOT(DL
, Res
, Res
->getValueType(0));
2031 case Intrinsic::mips_nori_b
: {
2032 SDValue Res
= DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2034 lowerMSASplatImm(Op
, 2, DAG
));
2035 return DAG
.getNOT(DL
, Res
, Res
->getValueType(0));
2037 case Intrinsic::mips_or_v
:
2038 return DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2040 case Intrinsic::mips_ori_b
:
2041 return DAG
.getNode(ISD::OR
, DL
, Op
->getValueType(0),
2042 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2043 case Intrinsic::mips_pckev_b
:
2044 case Intrinsic::mips_pckev_h
:
2045 case Intrinsic::mips_pckev_w
:
2046 case Intrinsic::mips_pckev_d
:
2047 return DAG
.getNode(MipsISD::PCKEV
, DL
, Op
->getValueType(0),
2048 Op
->getOperand(1), Op
->getOperand(2));
2049 case Intrinsic::mips_pckod_b
:
2050 case Intrinsic::mips_pckod_h
:
2051 case Intrinsic::mips_pckod_w
:
2052 case Intrinsic::mips_pckod_d
:
2053 return DAG
.getNode(MipsISD::PCKOD
, DL
, Op
->getValueType(0),
2054 Op
->getOperand(1), Op
->getOperand(2));
2055 case Intrinsic::mips_pcnt_b
:
2056 case Intrinsic::mips_pcnt_h
:
2057 case Intrinsic::mips_pcnt_w
:
2058 case Intrinsic::mips_pcnt_d
:
2059 return DAG
.getNode(ISD::CTPOP
, DL
, Op
->getValueType(0), Op
->getOperand(1));
2060 case Intrinsic::mips_shf_b
:
2061 case Intrinsic::mips_shf_h
:
2062 case Intrinsic::mips_shf_w
:
2063 return DAG
.getNode(MipsISD::SHF
, DL
, Op
->getValueType(0),
2064 Op
->getOperand(2), Op
->getOperand(1));
2065 case Intrinsic::mips_sll_b
:
2066 case Intrinsic::mips_sll_h
:
2067 case Intrinsic::mips_sll_w
:
2068 case Intrinsic::mips_sll_d
:
2069 return DAG
.getNode(ISD::SHL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2071 case Intrinsic::mips_slli_b
:
2072 case Intrinsic::mips_slli_h
:
2073 case Intrinsic::mips_slli_w
:
2074 case Intrinsic::mips_slli_d
:
2075 return DAG
.getNode(ISD::SHL
, DL
, Op
->getValueType(0),
2076 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2077 case Intrinsic::mips_splat_b
:
2078 case Intrinsic::mips_splat_h
:
2079 case Intrinsic::mips_splat_w
:
2080 case Intrinsic::mips_splat_d
:
2081 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2082 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2083 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2084 // Instead we lower to MipsISD::VSHF and match from there.
2085 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2086 lowerMSASplatZExt(Op
, 2, DAG
), Op
->getOperand(1),
2088 case Intrinsic::mips_splati_b
:
2089 case Intrinsic::mips_splati_h
:
2090 case Intrinsic::mips_splati_w
:
2091 case Intrinsic::mips_splati_d
:
2092 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2093 lowerMSASplatImm(Op
, 2, DAG
), Op
->getOperand(1),
2095 case Intrinsic::mips_sra_b
:
2096 case Intrinsic::mips_sra_h
:
2097 case Intrinsic::mips_sra_w
:
2098 case Intrinsic::mips_sra_d
:
2099 return DAG
.getNode(ISD::SRA
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2101 case Intrinsic::mips_srai_b
:
2102 case Intrinsic::mips_srai_h
:
2103 case Intrinsic::mips_srai_w
:
2104 case Intrinsic::mips_srai_d
:
2105 return DAG
.getNode(ISD::SRA
, DL
, Op
->getValueType(0),
2106 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2107 case Intrinsic::mips_srl_b
:
2108 case Intrinsic::mips_srl_h
:
2109 case Intrinsic::mips_srl_w
:
2110 case Intrinsic::mips_srl_d
:
2111 return DAG
.getNode(ISD::SRL
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2113 case Intrinsic::mips_srli_b
:
2114 case Intrinsic::mips_srli_h
:
2115 case Intrinsic::mips_srli_w
:
2116 case Intrinsic::mips_srli_d
:
2117 return DAG
.getNode(ISD::SRL
, DL
, Op
->getValueType(0),
2118 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2119 case Intrinsic::mips_subv_b
:
2120 case Intrinsic::mips_subv_h
:
2121 case Intrinsic::mips_subv_w
:
2122 case Intrinsic::mips_subv_d
:
2123 return DAG
.getNode(ISD::SUB
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2125 case Intrinsic::mips_subvi_b
:
2126 case Intrinsic::mips_subvi_h
:
2127 case Intrinsic::mips_subvi_w
:
2128 case Intrinsic::mips_subvi_d
:
2129 return DAG
.getNode(ISD::SUB
, DL
, Op
->getValueType(0),
2130 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2131 case Intrinsic::mips_vshf_b
:
2132 case Intrinsic::mips_vshf_h
:
2133 case Intrinsic::mips_vshf_w
:
2134 case Intrinsic::mips_vshf_d
:
2135 return DAG
.getNode(MipsISD::VSHF
, DL
, Op
->getValueType(0),
2136 Op
->getOperand(1), Op
->getOperand(2), Op
->getOperand(3));
2137 case Intrinsic::mips_xor_v
:
2138 return DAG
.getNode(ISD::XOR
, DL
, Op
->getValueType(0), Op
->getOperand(1),
2140 case Intrinsic::mips_xori_b
:
2141 return DAG
.getNode(ISD::XOR
, DL
, Op
->getValueType(0),
2142 Op
->getOperand(1), lowerMSASplatImm(Op
, 2, DAG
));
2146 static SDValue
lowerMSALoadIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Intr
) {
2148 SDValue ChainIn
= Op
->getOperand(0);
2149 SDValue Address
= Op
->getOperand(2);
2150 SDValue Offset
= Op
->getOperand(3);
2151 EVT ResTy
= Op
->getValueType(0);
2152 EVT PtrTy
= Address
->getValueType(0);
2154 Address
= DAG
.getNode(ISD::ADD
, DL
, PtrTy
, Address
, Offset
);
2156 return DAG
.getLoad(ResTy
, DL
, ChainIn
, Address
, MachinePointerInfo(), false,
2160 SDValue
MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op
,
2161 SelectionDAG
&DAG
) const {
2162 unsigned Intr
= cast
<ConstantSDNode
>(Op
->getOperand(1))->getZExtValue();
2166 case Intrinsic::mips_extp
:
2167 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTP
);
2168 case Intrinsic::mips_extpdp
:
2169 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTPDP
);
2170 case Intrinsic::mips_extr_w
:
2171 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_W
);
2172 case Intrinsic::mips_extr_r_w
:
2173 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_R_W
);
2174 case Intrinsic::mips_extr_rs_w
:
2175 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_RS_W
);
2176 case Intrinsic::mips_extr_s_h
:
2177 return lowerDSPIntr(Op
, DAG
, MipsISD::EXTR_S_H
);
2178 case Intrinsic::mips_mthlip
:
2179 return lowerDSPIntr(Op
, DAG
, MipsISD::MTHLIP
);
2180 case Intrinsic::mips_mulsaq_s_w_ph
:
2181 return lowerDSPIntr(Op
, DAG
, MipsISD::MULSAQ_S_W_PH
);
2182 case Intrinsic::mips_maq_s_w_phl
:
2183 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_S_W_PHL
);
2184 case Intrinsic::mips_maq_s_w_phr
:
2185 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_S_W_PHR
);
2186 case Intrinsic::mips_maq_sa_w_phl
:
2187 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_SA_W_PHL
);
2188 case Intrinsic::mips_maq_sa_w_phr
:
2189 return lowerDSPIntr(Op
, DAG
, MipsISD::MAQ_SA_W_PHR
);
2190 case Intrinsic::mips_dpaq_s_w_ph
:
2191 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQ_S_W_PH
);
2192 case Intrinsic::mips_dpsq_s_w_ph
:
2193 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQ_S_W_PH
);
2194 case Intrinsic::mips_dpaq_sa_l_w
:
2195 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQ_SA_L_W
);
2196 case Intrinsic::mips_dpsq_sa_l_w
:
2197 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQ_SA_L_W
);
2198 case Intrinsic::mips_dpaqx_s_w_ph
:
2199 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQX_S_W_PH
);
2200 case Intrinsic::mips_dpaqx_sa_w_ph
:
2201 return lowerDSPIntr(Op
, DAG
, MipsISD::DPAQX_SA_W_PH
);
2202 case Intrinsic::mips_dpsqx_s_w_ph
:
2203 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQX_S_W_PH
);
2204 case Intrinsic::mips_dpsqx_sa_w_ph
:
2205 return lowerDSPIntr(Op
, DAG
, MipsISD::DPSQX_SA_W_PH
);
2206 case Intrinsic::mips_ld_b
:
2207 case Intrinsic::mips_ld_h
:
2208 case Intrinsic::mips_ld_w
:
2209 case Intrinsic::mips_ld_d
:
2210 return lowerMSALoadIntr(Op
, DAG
, Intr
);
2214 static SDValue
lowerMSAStoreIntr(SDValue Op
, SelectionDAG
&DAG
, unsigned Intr
) {
2216 SDValue ChainIn
= Op
->getOperand(0);
2217 SDValue Value
= Op
->getOperand(2);
2218 SDValue Address
= Op
->getOperand(3);
2219 SDValue Offset
= Op
->getOperand(4);
2220 EVT PtrTy
= Address
->getValueType(0);
2222 Address
= DAG
.getNode(ISD::ADD
, DL
, PtrTy
, Address
, Offset
);
2224 return DAG
.getStore(ChainIn
, DL
, Value
, Address
, MachinePointerInfo(), false,
2228 SDValue
MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op
,
2229 SelectionDAG
&DAG
) const {
2230 unsigned Intr
= cast
<ConstantSDNode
>(Op
->getOperand(1))->getZExtValue();
2234 case Intrinsic::mips_st_b
:
2235 case Intrinsic::mips_st_h
:
2236 case Intrinsic::mips_st_w
:
2237 case Intrinsic::mips_st_d
:
2238 return lowerMSAStoreIntr(Op
, DAG
, Intr
);
2242 /// \brief Check if the given BuildVectorSDNode is a splat.
2243 /// This method currently relies on DAG nodes being reused when equivalent,
2244 /// so it's possible for this to return false even when isConstantSplat returns
2246 static bool isSplatVector(const BuildVectorSDNode
*N
) {
2247 unsigned int nOps
= N
->getNumOperands();
2248 assert(nOps
> 1 && "isSplatVector has 0 or 1 sized build vector");
2250 SDValue Operand0
= N
->getOperand(0);
2252 for (unsigned int i
= 1; i
< nOps
; ++i
) {
2253 if (N
->getOperand(i
) != Operand0
)
2260 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2262 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2263 // choose to sign-extend but we could have equally chosen zero-extend. The
2264 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2265 // result into this node later (possibly changing it to a zero-extend in the
2267 SDValue
MipsSETargetLowering::
2268 lowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const {
2270 EVT ResTy
= Op
->getValueType(0);
2271 SDValue Op0
= Op
->getOperand(0);
2272 EVT VecTy
= Op0
->getValueType(0);
2274 if (!VecTy
.is128BitVector())
2277 if (ResTy
.isInteger()) {
2278 SDValue Op1
= Op
->getOperand(1);
2279 EVT EltTy
= VecTy
.getVectorElementType();
2280 return DAG
.getNode(MipsISD::VEXTRACT_SEXT_ELT
, DL
, ResTy
, Op0
, Op1
,
2281 DAG
.getValueType(EltTy
));
2287 static bool isConstantOrUndef(const SDValue Op
) {
2288 if (Op
->getOpcode() == ISD::UNDEF
)
2290 if (dyn_cast
<ConstantSDNode
>(Op
))
2292 if (dyn_cast
<ConstantFPSDNode
>(Op
))
2297 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode
*Op
) {
2298 for (unsigned i
= 0; i
< Op
->getNumOperands(); ++i
)
2299 if (isConstantOrUndef(Op
->getOperand(i
)))
2304 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2307 // Lowers according to the following rules:
2308 // - Constant splats are legal as-is as long as the SplatBitSize is a power of
2309 // 2 less than or equal to 64 and the value fits into a signed 10-bit
2311 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2312 // is a power of 2 less than or equal to 64 and the value does not fit into a
2313 // signed 10-bit immediate
2314 // - Non-constant splats are legal as-is.
2315 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2316 // - All others are illegal and must be expanded.
2317 SDValue
MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op
,
2318 SelectionDAG
&DAG
) const {
2319 BuildVectorSDNode
*Node
= cast
<BuildVectorSDNode
>(Op
);
2320 EVT ResTy
= Op
->getValueType(0);
2322 APInt SplatValue
, SplatUndef
;
2323 unsigned SplatBitSize
;
2326 if (!Subtarget
.hasMSA() || !ResTy
.is128BitVector())
2329 if (Node
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
,
2331 !Subtarget
.isLittle()) && SplatBitSize
<= 64) {
2332 // We can only cope with 8, 16, 32, or 64-bit elements
2333 if (SplatBitSize
!= 8 && SplatBitSize
!= 16 && SplatBitSize
!= 32 &&
2337 // If the value fits into a simm10 then we can use ldi.[bhwd]
2338 // However, if it isn't an integer type we will have to bitcast from an
2339 // integer type first. Also, if there are any undefs, we must lower them
2340 // to defined values first.
2341 if (ResTy
.isInteger() && !HasAnyUndefs
&& SplatValue
.isSignedIntN(10))
2346 switch (SplatBitSize
) {
2350 ViaVecTy
= MVT::v16i8
;
2353 ViaVecTy
= MVT::v8i16
;
2356 ViaVecTy
= MVT::v4i32
;
2359 // There's no fill.d to fall back on for 64-bit values
2363 // SelectionDAG::getConstant will promote SplatValue appropriately.
2364 SDValue Result
= DAG
.getConstant(SplatValue
, ViaVecTy
);
2366 // Bitcast to the type we originally wanted
2367 if (ViaVecTy
!= ResTy
)
2368 Result
= DAG
.getNode(ISD::BITCAST
, SDLoc(Node
), ResTy
, Result
);
2371 } else if (isSplatVector(Node
))
2373 else if (!isConstantOrUndefBUILD_VECTOR(Node
)) {
2374 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2375 // The resulting code is the same length as the expansion, but it doesn't
2376 // use memory operations
2377 EVT ResTy
= Node
->getValueType(0);
2379 assert(ResTy
.isVector());
2381 unsigned NumElts
= ResTy
.getVectorNumElements();
2382 SDValue Vector
= DAG
.getUNDEF(ResTy
);
2383 for (unsigned i
= 0; i
< NumElts
; ++i
) {
2384 Vector
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, ResTy
, Vector
,
2385 Node
->getOperand(i
),
2386 DAG
.getConstant(i
, MVT::i32
));
2394 // Lower VECTOR_SHUFFLE into SHF (if possible).
2396 // SHF splits the vector into blocks of four elements, then shuffles these
2397 // elements according to a <4 x i2> constant (encoded as an integer immediate).
2399 // It is therefore possible to lower into SHF when the mask takes the form:
2400 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2401 // When undef's appear they are treated as if they were whatever value is
2402 // necessary in order to fit the above form.
2405 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2406 // <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2407 // i32 7, i32 6, i32 5, i32 4>
2409 // (SHF_H $w0, $w1, 27)
2410 // where the 27 comes from:
2411 // 3 + (2 << 2) + (1 << 4) + (0 << 6)
2412 static SDValue
lowerVECTOR_SHUFFLE_SHF(SDValue Op
, EVT ResTy
,
2413 SmallVector
<int, 16> Indices
,
2414 SelectionDAG
&DAG
) {
2415 int SHFIndices
[4] = { -1, -1, -1, -1 };
2417 if (Indices
.size() < 4)
2420 for (unsigned i
= 0; i
< 4; ++i
) {
2421 for (unsigned j
= i
; j
< Indices
.size(); j
+= 4) {
2422 int Idx
= Indices
[j
];
2424 // Convert from vector index to 4-element subvector index
2425 // If an index refers to an element outside of the subvector then give up
2428 if (Idx
< 0 || Idx
>= 4)
2432 // If the mask has an undef, replace it with the current index.
2433 // Note that it might still be undef if the current index is also undef
2434 if (SHFIndices
[i
] == -1)
2435 SHFIndices
[i
] = Idx
;
2437 // Check that non-undef values are the same as in the mask. If they
2438 // aren't then give up
2439 if (!(Idx
== -1 || Idx
== SHFIndices
[i
]))
2444 // Calculate the immediate. Replace any remaining undefs with zero
2446 for (int i
= 3; i
>= 0; --i
) {
2447 int Idx
= SHFIndices
[i
];
2456 return DAG
.getNode(MipsISD::SHF
, SDLoc(Op
), ResTy
,
2457 DAG
.getConstant(Imm
, MVT::i32
), Op
->getOperand(0));
2460 // Lower VECTOR_SHUFFLE into ILVEV (if possible).
2462 // ILVEV interleaves the even elements from each vector.
2464 // It is possible to lower into ILVEV when the mask takes the form:
2465 // <0, n, 2, n+2, 4, n+4, ...>
2466 // where n is the number of elements in the vector.
2468 // When undef's appear in the mask they are treated as if they were whatever
2469 // value is necessary in order to fit the above form.
2470 static SDValue
lowerVECTOR_SHUFFLE_ILVEV(SDValue Op
, EVT ResTy
,
2471 SmallVector
<int, 16> Indices
,
2472 SelectionDAG
&DAG
) {
2473 assert ((Indices
.size() % 2) == 0);
2475 int WtIdx
= ResTy
.getVectorNumElements();
2477 for (unsigned i
= 0; i
< Indices
.size(); i
+= 2) {
2478 if (Indices
[i
] != -1 && Indices
[i
] != WsIdx
)
2480 if (Indices
[i
+1] != -1 && Indices
[i
+1] != WtIdx
)
2486 return DAG
.getNode(MipsISD::ILVEV
, SDLoc(Op
), ResTy
, Op
->getOperand(0),
2490 // Lower VECTOR_SHUFFLE into ILVOD (if possible).
2492 // ILVOD interleaves the odd elements from each vector.
2494 // It is possible to lower into ILVOD when the mask takes the form:
2495 // <1, n+1, 3, n+3, 5, n+5, ...>
2496 // where n is the number of elements in the vector.
2498 // When undef's appear in the mask they are treated as if they were whatever
2499 // value is necessary in order to fit the above form.
2500 static SDValue
lowerVECTOR_SHUFFLE_ILVOD(SDValue Op
, EVT ResTy
,
2501 SmallVector
<int, 16> Indices
,
2502 SelectionDAG
&DAG
) {
2503 assert ((Indices
.size() % 2) == 0);
2505 int WtIdx
= ResTy
.getVectorNumElements() + 1;
2507 for (unsigned i
= 0; i
< Indices
.size(); i
+= 2) {
2508 if (Indices
[i
] != -1 && Indices
[i
] != WsIdx
)
2510 if (Indices
[i
+1] != -1 && Indices
[i
+1] != WtIdx
)
2516 return DAG
.getNode(MipsISD::ILVOD
, SDLoc(Op
), ResTy
, Op
->getOperand(0),
2520 // Lower VECTOR_SHUFFLE into ILVL (if possible).
2522 // ILVL interleaves consecutive elements from the left half of each vector.
2524 // It is possible to lower into ILVL when the mask takes the form:
2525 // <0, n, 1, n+1, 2, n+2, ...>
2526 // where n is the number of elements in the vector.
2528 // When undef's appear in the mask they are treated as if they were whatever
2529 // value is necessary in order to fit the above form.
2530 static SDValue
lowerVECTOR_SHUFFLE_ILVL(SDValue Op
, EVT ResTy
,
2531 SmallVector
<int, 16> Indices
,
2532 SelectionDAG
&DAG
) {
2533 assert ((Indices
.size() % 2) == 0);
2535 int WtIdx
= ResTy
.getVectorNumElements();
2537 for (unsigned i
= 0; i
< Indices
.size(); i
+= 2) {
2538 if (Indices
[i
] != -1 && Indices
[i
] != WsIdx
)
2540 if (Indices
[i
+1] != -1 && Indices
[i
+1] != WtIdx
)
2546 return DAG
.getNode(MipsISD::ILVL
, SDLoc(Op
), ResTy
, Op
->getOperand(0),
2550 // Lower VECTOR_SHUFFLE into ILVR (if possible).
2552 // ILVR interleaves consecutive elements from the right half of each vector.
2554 // It is possible to lower into ILVR when the mask takes the form:
2555 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2556 // where n is the number of elements in the vector and x is half n.
2558 // When undef's appear in the mask they are treated as if they were whatever
2559 // value is necessary in order to fit the above form.
2560 static SDValue
lowerVECTOR_SHUFFLE_ILVR(SDValue Op
, EVT ResTy
,
2561 SmallVector
<int, 16> Indices
,
2562 SelectionDAG
&DAG
) {
2563 assert ((Indices
.size() % 2) == 0);
2564 unsigned NumElts
= ResTy
.getVectorNumElements();
2565 int WsIdx
= NumElts
/ 2;
2566 int WtIdx
= NumElts
+ NumElts
/ 2;
2568 for (unsigned i
= 0; i
< Indices
.size(); i
+= 2) {
2569 if (Indices
[i
] != -1 && Indices
[i
] != WsIdx
)
2571 if (Indices
[i
+1] != -1 && Indices
[i
+1] != WtIdx
)
2577 return DAG
.getNode(MipsISD::ILVR
, SDLoc(Op
), ResTy
, Op
->getOperand(0),
2581 // Lower VECTOR_SHUFFLE into PCKEV (if possible).
2583 // PCKEV copies the even elements of each vector into the result vector.
2585 // It is possible to lower into PCKEV when the mask takes the form:
2586 // <0, 2, 4, ..., n, n+2, n+4, ...>
2587 // where n is the number of elements in the vector.
2589 // When undef's appear in the mask they are treated as if they were whatever
2590 // value is necessary in order to fit the above form.
2591 static SDValue
lowerVECTOR_SHUFFLE_PCKEV(SDValue Op
, EVT ResTy
,
2592 SmallVector
<int, 16> Indices
,
2593 SelectionDAG
&DAG
) {
2594 assert ((Indices
.size() % 2) == 0);
2597 for (unsigned i
= 0; i
< Indices
.size(); ++i
) {
2598 if (Indices
[i
] != -1 && Indices
[i
] != Idx
)
2603 return DAG
.getNode(MipsISD::PCKEV
, SDLoc(Op
), ResTy
, Op
->getOperand(0),
2607 // Lower VECTOR_SHUFFLE into PCKOD (if possible).
2609 // PCKOD copies the odd elements of each vector into the result vector.
2611 // It is possible to lower into PCKOD when the mask takes the form:
2612 // <1, 3, 5, ..., n+1, n+3, n+5, ...>
2613 // where n is the number of elements in the vector.
2615 // When undef's appear in the mask they are treated as if they were whatever
2616 // value is necessary in order to fit the above form.
2617 static SDValue
lowerVECTOR_SHUFFLE_PCKOD(SDValue Op
, EVT ResTy
,
2618 SmallVector
<int, 16> Indices
,
2619 SelectionDAG
&DAG
) {
2620 assert ((Indices
.size() % 2) == 0);
2623 for (unsigned i
= 0; i
< Indices
.size(); ++i
) {
2624 if (Indices
[i
] != -1 && Indices
[i
] != Idx
)
2629 return DAG
.getNode(MipsISD::PCKOD
, SDLoc(Op
), ResTy
, Op
->getOperand(0),
2633 // Lower VECTOR_SHUFFLE into VSHF.
2635 // This mostly consists of converting the shuffle indices in Indices into a
2636 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2637 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2638 // if the type is v8i16 and all the indices are less than 8 then the second
2639 // operand is unused and can be replaced with anything. We choose to replace it
2640 // with the used operand since this reduces the number of instructions overall.
2641 static SDValue
lowerVECTOR_SHUFFLE_VSHF(SDValue Op
, EVT ResTy
,
2642 SmallVector
<int, 16> Indices
,
2643 SelectionDAG
&DAG
) {
2644 SmallVector
<SDValue
, 16> Ops
;
2647 EVT MaskVecTy
= ResTy
.changeVectorElementTypeToInteger();
2648 EVT MaskEltTy
= MaskVecTy
.getVectorElementType();
2649 bool Using1stVec
= false;
2650 bool Using2ndVec
= false;
2652 int ResTyNumElts
= ResTy
.getVectorNumElements();
2654 for (int i
= 0; i
< ResTyNumElts
; ++i
) {
2655 // Idx == -1 means UNDEF
2656 int Idx
= Indices
[i
];
2658 if (0 <= Idx
&& Idx
< ResTyNumElts
)
2660 if (ResTyNumElts
<= Idx
&& Idx
< ResTyNumElts
* 2)
2664 for (SmallVector
<int, 16>::iterator I
= Indices
.begin(); I
!= Indices
.end();
2666 Ops
.push_back(DAG
.getTargetConstant(*I
, MaskEltTy
));
2668 SDValue MaskVec
= DAG
.getNode(ISD::BUILD_VECTOR
, DL
, MaskVecTy
, Ops
);
2670 if (Using1stVec
&& Using2ndVec
) {
2671 Op0
= Op
->getOperand(0);
2672 Op1
= Op
->getOperand(1);
2673 } else if (Using1stVec
)
2674 Op0
= Op1
= Op
->getOperand(0);
2675 else if (Using2ndVec
)
2676 Op0
= Op1
= Op
->getOperand(1);
2678 llvm_unreachable("shuffle vector mask references neither vector operand?");
2680 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2681 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2682 // VSHF concatenates the vectors in a bitwise fashion:
2683 // <0b00, 0b01> + <0b10, 0b11> ->
2684 // 0b0100 + 0b1110 -> 0b01001110
2685 // <0b10, 0b11, 0b00, 0b01>
2686 // We must therefore swap the operands to get the correct result.
2687 return DAG
.getNode(MipsISD::VSHF
, DL
, ResTy
, MaskVec
, Op1
, Op0
);
2690 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
2691 // indices in the shuffle.
2692 SDValue
MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op
,
2693 SelectionDAG
&DAG
) const {
2694 ShuffleVectorSDNode
*Node
= cast
<ShuffleVectorSDNode
>(Op
);
2695 EVT ResTy
= Op
->getValueType(0);
2697 if (!ResTy
.is128BitVector())
2700 int ResTyNumElts
= ResTy
.getVectorNumElements();
2701 SmallVector
<int, 16> Indices
;
2703 for (int i
= 0; i
< ResTyNumElts
; ++i
)
2704 Indices
.push_back(Node
->getMaskElt(i
));
2706 SDValue Result
= lowerVECTOR_SHUFFLE_SHF(Op
, ResTy
, Indices
, DAG
);
2707 if (Result
.getNode())
2709 Result
= lowerVECTOR_SHUFFLE_ILVEV(Op
, ResTy
, Indices
, DAG
);
2710 if (Result
.getNode())
2712 Result
= lowerVECTOR_SHUFFLE_ILVOD(Op
, ResTy
, Indices
, DAG
);
2713 if (Result
.getNode())
2715 Result
= lowerVECTOR_SHUFFLE_ILVL(Op
, ResTy
, Indices
, DAG
);
2716 if (Result
.getNode())
2718 Result
= lowerVECTOR_SHUFFLE_ILVR(Op
, ResTy
, Indices
, DAG
);
2719 if (Result
.getNode())
2721 Result
= lowerVECTOR_SHUFFLE_PCKEV(Op
, ResTy
, Indices
, DAG
);
2722 if (Result
.getNode())
2724 Result
= lowerVECTOR_SHUFFLE_PCKOD(Op
, ResTy
, Indices
, DAG
);
2725 if (Result
.getNode())
2727 return lowerVECTOR_SHUFFLE_VSHF(Op
, ResTy
, Indices
, DAG
);
2730 MachineBasicBlock
* MipsSETargetLowering::
2731 emitBPOSGE32(MachineInstr
*MI
, MachineBasicBlock
*BB
) const{
2733 // bposge32_pseudo $vr0
2743 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
2745 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
2746 const TargetInstrInfo
*TII
=
2747 getTargetMachine().getSubtargetImpl()->getInstrInfo();
2748 const TargetRegisterClass
*RC
= &Mips::GPR32RegClass
;
2749 DebugLoc DL
= MI
->getDebugLoc();
2750 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
2751 MachineFunction::iterator It
= std::next(MachineFunction::iterator(BB
));
2752 MachineFunction
*F
= BB
->getParent();
2753 MachineBasicBlock
*FBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
2754 MachineBasicBlock
*TBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
2755 MachineBasicBlock
*Sink
= F
->CreateMachineBasicBlock(LLVM_BB
);
2758 F
->insert(It
, Sink
);
2760 // Transfer the remainder of BB and its successor edges to Sink.
2761 Sink
->splice(Sink
->begin(), BB
, std::next(MachineBasicBlock::iterator(MI
)),
2763 Sink
->transferSuccessorsAndUpdatePHIs(BB
);
2766 BB
->addSuccessor(FBB
);
2767 BB
->addSuccessor(TBB
);
2768 FBB
->addSuccessor(Sink
);
2769 TBB
->addSuccessor(Sink
);
2771 // Insert the real bposge32 instruction to $BB.
2772 BuildMI(BB
, DL
, TII
->get(Mips::BPOSGE32
)).addMBB(TBB
);
2775 unsigned VR2
= RegInfo
.createVirtualRegister(RC
);
2776 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::ADDiu
), VR2
)
2777 .addReg(Mips::ZERO
).addImm(0);
2778 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::B
)).addMBB(Sink
);
2781 unsigned VR1
= RegInfo
.createVirtualRegister(RC
);
2782 BuildMI(*TBB
, TBB
->end(), DL
, TII
->get(Mips::ADDiu
), VR1
)
2783 .addReg(Mips::ZERO
).addImm(1);
2785 // Insert phi function to $Sink.
2786 BuildMI(*Sink
, Sink
->begin(), DL
, TII
->get(Mips::PHI
),
2787 MI
->getOperand(0).getReg())
2788 .addReg(VR2
).addMBB(FBB
).addReg(VR1
).addMBB(TBB
);
2790 MI
->eraseFromParent(); // The pseudo instruction is gone now.
2794 MachineBasicBlock
* MipsSETargetLowering::
2795 emitMSACBranchPseudo(MachineInstr
*MI
, MachineBasicBlock
*BB
,
2796 unsigned BranchOp
) const{
2798 // vany_nonzero $rd, $ws
2809 // $rd = phi($rd1, $fbb, $rd2, $tbb)
2811 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
2812 const TargetInstrInfo
*TII
=
2813 getTargetMachine().getSubtargetImpl()->getInstrInfo();
2814 const TargetRegisterClass
*RC
= &Mips::GPR32RegClass
;
2815 DebugLoc DL
= MI
->getDebugLoc();
2816 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
2817 MachineFunction::iterator It
= std::next(MachineFunction::iterator(BB
));
2818 MachineFunction
*F
= BB
->getParent();
2819 MachineBasicBlock
*FBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
2820 MachineBasicBlock
*TBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
2821 MachineBasicBlock
*Sink
= F
->CreateMachineBasicBlock(LLVM_BB
);
2824 F
->insert(It
, Sink
);
2826 // Transfer the remainder of BB and its successor edges to Sink.
2827 Sink
->splice(Sink
->begin(), BB
, std::next(MachineBasicBlock::iterator(MI
)),
2829 Sink
->transferSuccessorsAndUpdatePHIs(BB
);
2832 BB
->addSuccessor(FBB
);
2833 BB
->addSuccessor(TBB
);
2834 FBB
->addSuccessor(Sink
);
2835 TBB
->addSuccessor(Sink
);
2837 // Insert the real bnz.b instruction to $BB.
2838 BuildMI(BB
, DL
, TII
->get(BranchOp
))
2839 .addReg(MI
->getOperand(1).getReg())
2843 unsigned RD1
= RegInfo
.createVirtualRegister(RC
);
2844 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::ADDiu
), RD1
)
2845 .addReg(Mips::ZERO
).addImm(0);
2846 BuildMI(*FBB
, FBB
->end(), DL
, TII
->get(Mips::B
)).addMBB(Sink
);
2849 unsigned RD2
= RegInfo
.createVirtualRegister(RC
);
2850 BuildMI(*TBB
, TBB
->end(), DL
, TII
->get(Mips::ADDiu
), RD2
)
2851 .addReg(Mips::ZERO
).addImm(1);
2853 // Insert phi function to $Sink.
2854 BuildMI(*Sink
, Sink
->begin(), DL
, TII
->get(Mips::PHI
),
2855 MI
->getOperand(0).getReg())
2856 .addReg(RD1
).addMBB(FBB
).addReg(RD2
).addMBB(TBB
);
2858 MI
->eraseFromParent(); // The pseudo instruction is gone now.
2862 // Emit the COPY_FW pseudo instruction.
2864 // copy_fw_pseudo $fd, $ws, n
2866 // copy_u_w $rt, $ws, $n
2869 // When n is zero, the equivalent operation can be performed with (potentially)
2870 // zero instructions due to register overlaps. This optimization is never valid
2871 // for lane 1 because it would require FR=0 mode which isn't supported by MSA.
2872 MachineBasicBlock
* MipsSETargetLowering::
2873 emitCOPY_FW(MachineInstr
*MI
, MachineBasicBlock
*BB
) const{
2874 const TargetInstrInfo
*TII
=
2875 getTargetMachine().getSubtargetImpl()->getInstrInfo();
2876 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
2877 DebugLoc DL
= MI
->getDebugLoc();
2878 unsigned Fd
= MI
->getOperand(0).getReg();
2879 unsigned Ws
= MI
->getOperand(1).getReg();
2880 unsigned Lane
= MI
->getOperand(2).getImm();
2883 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Ws
, 0, Mips::sub_lo
);
2885 unsigned Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
2887 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_W
), Wt
).addReg(Ws
).addImm(Lane
);
2888 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_lo
);
2891 MI
->eraseFromParent(); // The pseudo instruction is gone now.
2895 // Emit the COPY_FD pseudo instruction.
2897 // copy_fd_pseudo $fd, $ws, n
2899 // splati.d $wt, $ws, $n
2900 // copy $fd, $wt:sub_64
2902 // When n is zero, the equivalent operation can be performed with (potentially)
2903 // zero instructions due to register overlaps. This optimization is always
2904 // valid because FR=1 mode which is the only supported mode in MSA.
2905 MachineBasicBlock
* MipsSETargetLowering::
2906 emitCOPY_FD(MachineInstr
*MI
, MachineBasicBlock
*BB
) const{
2907 assert(Subtarget
.isFP64bit());
2909 const TargetInstrInfo
*TII
=
2910 getTargetMachine().getSubtargetImpl()->getInstrInfo();
2911 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
2912 unsigned Fd
= MI
->getOperand(0).getReg();
2913 unsigned Ws
= MI
->getOperand(1).getReg();
2914 unsigned Lane
= MI
->getOperand(2).getImm() * 2;
2915 DebugLoc DL
= MI
->getDebugLoc();
2918 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Ws
, 0, Mips::sub_64
);
2920 unsigned Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
2922 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_D
), Wt
).addReg(Ws
).addImm(1);
2923 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::COPY
), Fd
).addReg(Wt
, 0, Mips::sub_64
);
2926 MI
->eraseFromParent(); // The pseudo instruction is gone now.
2930 // Emit the INSERT_FW pseudo instruction.
2932 // insert_fw_pseudo $wd, $wd_in, $n, $fs
2934 // subreg_to_reg $wt:sub_lo, $fs
2935 // insve_w $wd[$n], $wd_in, $wt[0]
2937 MipsSETargetLowering::emitINSERT_FW(MachineInstr
*MI
,
2938 MachineBasicBlock
*BB
) const {
2939 const TargetInstrInfo
*TII
=
2940 getTargetMachine().getSubtargetImpl()->getInstrInfo();
2941 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
2942 DebugLoc DL
= MI
->getDebugLoc();
2943 unsigned Wd
= MI
->getOperand(0).getReg();
2944 unsigned Wd_in
= MI
->getOperand(1).getReg();
2945 unsigned Lane
= MI
->getOperand(2).getImm();
2946 unsigned Fs
= MI
->getOperand(3).getReg();
2947 unsigned Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
2949 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
2952 .addImm(Mips::sub_lo
);
2953 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSVE_W
), Wd
)
2959 MI
->eraseFromParent(); // The pseudo instruction is gone now.
2963 // Emit the INSERT_FD pseudo instruction.
2965 // insert_fd_pseudo $wd, $fs, n
2967 // subreg_to_reg $wt:sub_64, $fs
2968 // insve_d $wd[$n], $wd_in, $wt[0]
2970 MipsSETargetLowering::emitINSERT_FD(MachineInstr
*MI
,
2971 MachineBasicBlock
*BB
) const {
2972 assert(Subtarget
.isFP64bit());
2974 const TargetInstrInfo
*TII
=
2975 getTargetMachine().getSubtargetImpl()->getInstrInfo();
2976 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
2977 DebugLoc DL
= MI
->getDebugLoc();
2978 unsigned Wd
= MI
->getOperand(0).getReg();
2979 unsigned Wd_in
= MI
->getOperand(1).getReg();
2980 unsigned Lane
= MI
->getOperand(2).getImm();
2981 unsigned Fs
= MI
->getOperand(3).getReg();
2982 unsigned Wt
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
2984 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
2987 .addImm(Mips::sub_64
);
2988 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSVE_D
), Wd
)
2994 MI
->eraseFromParent(); // The pseudo instruction is gone now.
2998 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3001 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3003 // (SLL $lanetmp1, $lane, <log2size)
3004 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3005 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3006 // (NEG $lanetmp2, $lanetmp1)
3007 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3009 // For floating point:
3010 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3012 // (SUBREG_TO_REG $wt, $fs, <subreg>)
3013 // (SLL $lanetmp1, $lane, <log2size)
3014 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3015 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3016 // (NEG $lanetmp2, $lanetmp1)
3017 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3019 MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr
*MI
,
3020 MachineBasicBlock
*BB
,
3021 unsigned EltSizeInBytes
,
3023 const TargetInstrInfo
*TII
=
3024 getTargetMachine().getSubtargetImpl()->getInstrInfo();
3025 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3026 DebugLoc DL
= MI
->getDebugLoc();
3027 unsigned Wd
= MI
->getOperand(0).getReg();
3028 unsigned SrcVecReg
= MI
->getOperand(1).getReg();
3029 unsigned LaneReg
= MI
->getOperand(2).getReg();
3030 unsigned SrcValReg
= MI
->getOperand(3).getReg();
3032 const TargetRegisterClass
*VecRC
= nullptr;
3033 const TargetRegisterClass
*GPRRC
=
3034 Subtarget
.isGP64bit() ? &Mips::GPR64RegClass
: &Mips::GPR32RegClass
;
3035 unsigned EltLog2Size
;
3036 unsigned InsertOp
= 0;
3037 unsigned InsveOp
= 0;
3038 switch (EltSizeInBytes
) {
3040 llvm_unreachable("Unexpected size");
3043 InsertOp
= Mips::INSERT_B
;
3044 InsveOp
= Mips::INSVE_B
;
3045 VecRC
= &Mips::MSA128BRegClass
;
3049 InsertOp
= Mips::INSERT_H
;
3050 InsveOp
= Mips::INSVE_H
;
3051 VecRC
= &Mips::MSA128HRegClass
;
3055 InsertOp
= Mips::INSERT_W
;
3056 InsveOp
= Mips::INSVE_W
;
3057 VecRC
= &Mips::MSA128WRegClass
;
3061 InsertOp
= Mips::INSERT_D
;
3062 InsveOp
= Mips::INSVE_D
;
3063 VecRC
= &Mips::MSA128DRegClass
;
3068 unsigned Wt
= RegInfo
.createVirtualRegister(VecRC
);
3069 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUBREG_TO_REG
), Wt
)
3072 .addImm(EltSizeInBytes
== 8 ? Mips::sub_64
: Mips::sub_lo
);
3076 // Convert the lane index into a byte index
3077 if (EltSizeInBytes
!= 1) {
3078 unsigned LaneTmp1
= RegInfo
.createVirtualRegister(GPRRC
);
3079 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SLL
), LaneTmp1
)
3081 .addImm(EltLog2Size
);
3085 // Rotate bytes around so that the desired lane is element zero
3086 unsigned WdTmp1
= RegInfo
.createVirtualRegister(VecRC
);
3087 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SLD_B
), WdTmp1
)
3092 unsigned WdTmp2
= RegInfo
.createVirtualRegister(VecRC
);
3094 // Use insve.df to insert to element zero
3095 BuildMI(*BB
, MI
, DL
, TII
->get(InsveOp
), WdTmp2
)
3101 // Use insert.df to insert to element zero
3102 BuildMI(*BB
, MI
, DL
, TII
->get(InsertOp
), WdTmp2
)
3108 // Rotate elements the rest of the way for a full rotation.
3109 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3110 // the lane index to do this.
3111 unsigned LaneTmp2
= RegInfo
.createVirtualRegister(GPRRC
);
3112 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SUB
), LaneTmp2
)
3115 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SLD_B
), Wd
)
3120 MI
->eraseFromParent(); // The pseudo instruction is gone now.
3124 // Emit the FILL_FW pseudo instruction.
3126 // fill_fw_pseudo $wd, $fs
3128 // implicit_def $wt1
3129 // insert_subreg $wt2:subreg_lo, $wt1, $fs
3130 // splati.w $wd, $wt2[0]
3132 MipsSETargetLowering::emitFILL_FW(MachineInstr
*MI
,
3133 MachineBasicBlock
*BB
) const {
3134 const TargetInstrInfo
*TII
=
3135 getTargetMachine().getSubtargetImpl()->getInstrInfo();
3136 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3137 DebugLoc DL
= MI
->getDebugLoc();
3138 unsigned Wd
= MI
->getOperand(0).getReg();
3139 unsigned Fs
= MI
->getOperand(1).getReg();
3140 unsigned Wt1
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3141 unsigned Wt2
= RegInfo
.createVirtualRegister(&Mips::MSA128WRegClass
);
3143 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::IMPLICIT_DEF
), Wt1
);
3144 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_SUBREG
), Wt2
)
3147 .addImm(Mips::sub_lo
);
3148 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_W
), Wd
).addReg(Wt2
).addImm(0);
3150 MI
->eraseFromParent(); // The pseudo instruction is gone now.
3154 // Emit the FILL_FD pseudo instruction.
3156 // fill_fd_pseudo $wd, $fs
3158 // implicit_def $wt1
3159 // insert_subreg $wt2:subreg_64, $wt1, $fs
3160 // splati.d $wd, $wt2[0]
3162 MipsSETargetLowering::emitFILL_FD(MachineInstr
*MI
,
3163 MachineBasicBlock
*BB
) const {
3164 assert(Subtarget
.isFP64bit());
3166 const TargetInstrInfo
*TII
=
3167 getTargetMachine().getSubtargetImpl()->getInstrInfo();
3168 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3169 DebugLoc DL
= MI
->getDebugLoc();
3170 unsigned Wd
= MI
->getOperand(0).getReg();
3171 unsigned Fs
= MI
->getOperand(1).getReg();
3172 unsigned Wt1
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3173 unsigned Wt2
= RegInfo
.createVirtualRegister(&Mips::MSA128DRegClass
);
3175 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::IMPLICIT_DEF
), Wt1
);
3176 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::INSERT_SUBREG
), Wt2
)
3179 .addImm(Mips::sub_64
);
3180 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::SPLATI_D
), Wd
).addReg(Wt2
).addImm(0);
3182 MI
->eraseFromParent(); // The pseudo instruction is gone now.
3186 // Emit the FEXP2_W_1 pseudo instructions.
3188 // fexp2_w_1_pseudo $wd, $wt
3191 // fexp2.w $wd, $ws, $wt
3193 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr
*MI
,
3194 MachineBasicBlock
*BB
) const {
3195 const TargetInstrInfo
*TII
=
3196 getTargetMachine().getSubtargetImpl()->getInstrInfo();
3197 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3198 const TargetRegisterClass
*RC
= &Mips::MSA128WRegClass
;
3199 unsigned Ws1
= RegInfo
.createVirtualRegister(RC
);
3200 unsigned Ws2
= RegInfo
.createVirtualRegister(RC
);
3201 DebugLoc DL
= MI
->getDebugLoc();
3203 // Splat 1.0 into a vector
3204 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::LDI_W
), Ws1
).addImm(1);
3205 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FFINT_U_W
), Ws2
).addReg(Ws1
);
3207 // Emit 1.0 * fexp2(Wt)
3208 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXP2_W
), MI
->getOperand(0).getReg())
3210 .addReg(MI
->getOperand(1).getReg());
3212 MI
->eraseFromParent(); // The pseudo instruction is gone now.
3216 // Emit the FEXP2_D_1 pseudo instructions.
3218 // fexp2_d_1_pseudo $wd, $wt
3221 // fexp2.d $wd, $ws, $wt
3223 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr
*MI
,
3224 MachineBasicBlock
*BB
) const {
3225 const TargetInstrInfo
*TII
=
3226 getTargetMachine().getSubtargetImpl()->getInstrInfo();
3227 MachineRegisterInfo
&RegInfo
= BB
->getParent()->getRegInfo();
3228 const TargetRegisterClass
*RC
= &Mips::MSA128DRegClass
;
3229 unsigned Ws1
= RegInfo
.createVirtualRegister(RC
);
3230 unsigned Ws2
= RegInfo
.createVirtualRegister(RC
);
3231 DebugLoc DL
= MI
->getDebugLoc();
3233 // Splat 1.0 into a vector
3234 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::LDI_D
), Ws1
).addImm(1);
3235 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FFINT_U_D
), Ws2
).addReg(Ws1
);
3237 // Emit 1.0 * fexp2(Wt)
3238 BuildMI(*BB
, MI
, DL
, TII
->get(Mips::FEXP2_D
), MI
->getOperand(0).getReg())
3240 .addReg(MI
->getOperand(1).getReg());
3242 MI
->eraseFromParent(); // The pseudo instruction is gone now.