]>
git.proxmox.com Git - rustc.git/blob - src/llvm/lib/Target/ARM/ARMFastISel.cpp
1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the ARM-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // ARMGenFastISel.inc, which is #included here.
14 //===----------------------------------------------------------------------===//
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMTargetMachine.h"
20 #include "ARMSubtarget.h"
21 #include "ARMConstantPoolValue.h"
22 #include "MCTargetDesc/ARMAddressingModes.h"
23 #include "llvm/CallingConv.h"
24 #include "llvm/DerivedTypes.h"
25 #include "llvm/GlobalVariable.h"
26 #include "llvm/Instructions.h"
27 #include "llvm/IntrinsicInst.h"
28 #include "llvm/Module.h"
29 #include "llvm/Operator.h"
30 #include "llvm/CodeGen/Analysis.h"
31 #include "llvm/CodeGen/FastISel.h"
32 #include "llvm/CodeGen/FunctionLoweringInfo.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineModuleInfo.h"
35 #include "llvm/CodeGen/MachineConstantPool.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineMemOperand.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/Support/CallSite.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/GetElementPtrTypeIterator.h"
43 #include "llvm/Target/TargetData.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetLowering.h"
46 #include "llvm/Target/TargetMachine.h"
47 #include "llvm/Target/TargetOptions.h"
50 extern cl::opt
<bool> EnableARMLongCalls
;
54 // All possible address modes, plus some.
55 typedef struct Address
{
68 // Innocuous defaults for our address.
70 : BaseType(RegBase
), Offset(0) {
75 class ARMFastISel
: public FastISel
{
77 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
78 /// make the right decision when generating code for different targets.
79 const ARMSubtarget
*Subtarget
;
80 const TargetMachine
&TM
;
81 const TargetInstrInfo
&TII
;
82 const TargetLowering
&TLI
;
85 // Convenience variables to avoid some queries.
90 explicit ARMFastISel(FunctionLoweringInfo
&funcInfo
,
91 const TargetLibraryInfo
*libInfo
,
92 GCFunctionInfo
&gcInfo
)
93 : FastISel(funcInfo
, libInfo
, gcInfo
),
94 TM(funcInfo
.MF
->getTarget()),
95 TII(*TM
.getInstrInfo()),
96 TLI(*TM
.getTargetLowering()) {
97 Subtarget
= &TM
.getSubtarget
<ARMSubtarget
>();
98 AFI
= funcInfo
.MF
->getInfo
<ARMFunctionInfo
>();
99 isThumb2
= AFI
->isThumbFunction();
100 Context
= &funcInfo
.Fn
->getContext();
103 // Code from FastISel.cpp.
105 unsigned FastEmitInst_(unsigned MachineInstOpcode
,
106 const TargetRegisterClass
*RC
);
107 unsigned FastEmitInst_r(unsigned MachineInstOpcode
,
108 const TargetRegisterClass
*RC
,
109 unsigned Op0
, bool Op0IsKill
);
110 unsigned FastEmitInst_rr(unsigned MachineInstOpcode
,
111 const TargetRegisterClass
*RC
,
112 unsigned Op0
, bool Op0IsKill
,
113 unsigned Op1
, bool Op1IsKill
);
114 unsigned FastEmitInst_rrr(unsigned MachineInstOpcode
,
115 const TargetRegisterClass
*RC
,
116 unsigned Op0
, bool Op0IsKill
,
117 unsigned Op1
, bool Op1IsKill
,
118 unsigned Op2
, bool Op2IsKill
);
119 unsigned FastEmitInst_ri(unsigned MachineInstOpcode
,
120 const TargetRegisterClass
*RC
,
121 unsigned Op0
, bool Op0IsKill
,
123 unsigned FastEmitInst_rf(unsigned MachineInstOpcode
,
124 const TargetRegisterClass
*RC
,
125 unsigned Op0
, bool Op0IsKill
,
126 const ConstantFP
*FPImm
);
127 unsigned FastEmitInst_rri(unsigned MachineInstOpcode
,
128 const TargetRegisterClass
*RC
,
129 unsigned Op0
, bool Op0IsKill
,
130 unsigned Op1
, bool Op1IsKill
,
132 unsigned FastEmitInst_i(unsigned MachineInstOpcode
,
133 const TargetRegisterClass
*RC
,
135 unsigned FastEmitInst_ii(unsigned MachineInstOpcode
,
136 const TargetRegisterClass
*RC
,
137 uint64_t Imm1
, uint64_t Imm2
);
139 unsigned FastEmitInst_extractsubreg(MVT RetVT
,
140 unsigned Op0
, bool Op0IsKill
,
143 // Backend specific FastISel code.
145 virtual bool TargetSelectInstruction(const Instruction
*I
);
146 virtual unsigned TargetMaterializeConstant(const Constant
*C
);
147 virtual unsigned TargetMaterializeAlloca(const AllocaInst
*AI
);
148 virtual bool TryToFoldLoad(MachineInstr
*MI
, unsigned OpNo
,
151 #include "ARMGenFastISel.inc"
153 // Instruction selection routines.
155 bool SelectLoad(const Instruction
*I
);
156 bool SelectStore(const Instruction
*I
);
157 bool SelectBranch(const Instruction
*I
);
158 bool SelectIndirectBr(const Instruction
*I
);
159 bool SelectCmp(const Instruction
*I
);
160 bool SelectFPExt(const Instruction
*I
);
161 bool SelectFPTrunc(const Instruction
*I
);
162 bool SelectBinaryIntOp(const Instruction
*I
, unsigned ISDOpcode
);
163 bool SelectBinaryFPOp(const Instruction
*I
, unsigned ISDOpcode
);
164 bool SelectIToFP(const Instruction
*I
, bool isSigned
);
165 bool SelectFPToI(const Instruction
*I
, bool isSigned
);
166 bool SelectDiv(const Instruction
*I
, bool isSigned
);
167 bool SelectRem(const Instruction
*I
, bool isSigned
);
168 bool SelectCall(const Instruction
*I
, const char *IntrMemName
);
169 bool SelectIntrinsicCall(const IntrinsicInst
&I
);
170 bool SelectSelect(const Instruction
*I
);
171 bool SelectRet(const Instruction
*I
);
172 bool SelectTrunc(const Instruction
*I
);
173 bool SelectIntExt(const Instruction
*I
);
174 bool SelectShift(const Instruction
*I
, ARM_AM::ShiftOpc ShiftTy
);
178 bool isTypeLegal(Type
*Ty
, MVT
&VT
);
179 bool isLoadTypeLegal(Type
*Ty
, MVT
&VT
);
180 bool ARMEmitCmp(const Value
*Src1Value
, const Value
*Src2Value
,
182 bool ARMEmitLoad(EVT VT
, unsigned &ResultReg
, Address
&Addr
,
183 unsigned Alignment
= 0, bool isZExt
= true,
184 bool allocReg
= true);
185 bool ARMEmitStore(EVT VT
, unsigned SrcReg
, Address
&Addr
,
186 unsigned Alignment
= 0);
187 bool ARMComputeAddress(const Value
*Obj
, Address
&Addr
);
188 void ARMSimplifyAddress(Address
&Addr
, EVT VT
, bool useAM3
);
189 bool ARMIsMemCpySmall(uint64_t Len
);
190 bool ARMTryEmitSmallMemCpy(Address Dest
, Address Src
, uint64_t Len
);
191 unsigned ARMEmitIntExt(EVT SrcVT
, unsigned SrcReg
, EVT DestVT
, bool isZExt
);
192 unsigned ARMMaterializeFP(const ConstantFP
*CFP
, EVT VT
);
193 unsigned ARMMaterializeInt(const Constant
*C
, EVT VT
);
194 unsigned ARMMaterializeGV(const GlobalValue
*GV
, EVT VT
);
195 unsigned ARMMoveToFPReg(EVT VT
, unsigned SrcReg
);
196 unsigned ARMMoveToIntReg(EVT VT
, unsigned SrcReg
);
197 unsigned ARMSelectCallOp(bool UseReg
);
198 unsigned ARMLowerPICELF(const GlobalValue
*GV
, unsigned Align
, EVT VT
);
200 // Call handling routines.
202 CCAssignFn
*CCAssignFnForCall(CallingConv::ID CC
,
205 bool ProcessCallArgs(SmallVectorImpl
<Value
*> &Args
,
206 SmallVectorImpl
<unsigned> &ArgRegs
,
207 SmallVectorImpl
<MVT
> &ArgVTs
,
208 SmallVectorImpl
<ISD::ArgFlagsTy
> &ArgFlags
,
209 SmallVectorImpl
<unsigned> &RegArgs
,
213 unsigned getLibcallReg(const Twine
&Name
);
214 bool FinishCall(MVT RetVT
, SmallVectorImpl
<unsigned> &UsedRegs
,
215 const Instruction
*I
, CallingConv::ID CC
,
216 unsigned &NumBytes
, bool isVarArg
);
217 bool ARMEmitLibcall(const Instruction
*I
, RTLIB::Libcall Call
);
219 // OptionalDef handling routines.
221 bool isARMNEONPred(const MachineInstr
*MI
);
222 bool DefinesOptionalPredicate(MachineInstr
*MI
, bool *CPSR
);
223 const MachineInstrBuilder
&AddOptionalDefs(const MachineInstrBuilder
&MIB
);
224 void AddLoadStoreOperands(EVT VT
, Address
&Addr
,
225 const MachineInstrBuilder
&MIB
,
226 unsigned Flags
, bool useAM3
);
229 } // end anonymous namespace
231 #include "ARMGenCallingConv.inc"
233 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
234 // we don't care about implicit defs here, just places we'll need to add a
235 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
236 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr
*MI
, bool *CPSR
) {
237 if (!MI
->hasOptionalDef())
240 // Look to see if our OptionalDef is defining CPSR or CCR.
241 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
242 const MachineOperand
&MO
= MI
->getOperand(i
);
243 if (!MO
.isReg() || !MO
.isDef()) continue;
244 if (MO
.getReg() == ARM::CPSR
)
250 bool ARMFastISel::isARMNEONPred(const MachineInstr
*MI
) {
251 const MCInstrDesc
&MCID
= MI
->getDesc();
253 // If we're a thumb2 or not NEON function we were handled via isPredicable.
254 if ((MCID
.TSFlags
& ARMII::DomainMask
) != ARMII::DomainNEON
||
255 AFI
->isThumb2Function())
258 for (unsigned i
= 0, e
= MCID
.getNumOperands(); i
!= e
; ++i
)
259 if (MCID
.OpInfo
[i
].isPredicate())
265 // If the machine is predicable go ahead and add the predicate operands, if
266 // it needs default CC operands add those.
267 // TODO: If we want to support thumb1 then we'll need to deal with optional
268 // CPSR defs that need to be added before the remaining operands. See s_cc_out
269 // for descriptions why.
270 const MachineInstrBuilder
&
271 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder
&MIB
) {
272 MachineInstr
*MI
= &*MIB
;
274 // Do we use a predicate? or...
275 // Are we NEON in ARM mode and have a predicate operand? If so, I know
276 // we're not predicable but add it anyways.
277 if (TII
.isPredicable(MI
) || isARMNEONPred(MI
))
280 // Do we optionally set a predicate? Preds is size > 0 iff the predicate
281 // defines CPSR. All other OptionalDefines in ARM are the CCR register.
283 if (DefinesOptionalPredicate(MI
, &CPSR
)) {
292 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode
,
293 const TargetRegisterClass
* RC
) {
294 unsigned ResultReg
= createResultReg(RC
);
295 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
297 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
, ResultReg
));
301 unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode
,
302 const TargetRegisterClass
*RC
,
303 unsigned Op0
, bool Op0IsKill
) {
304 unsigned ResultReg
= createResultReg(RC
);
305 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
307 if (II
.getNumDefs() >= 1) {
308 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
, ResultReg
)
309 .addReg(Op0
, Op0IsKill
* RegState::Kill
));
311 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
)
312 .addReg(Op0
, Op0IsKill
* RegState::Kill
));
313 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
314 TII
.get(TargetOpcode::COPY
), ResultReg
)
315 .addReg(II
.ImplicitDefs
[0]));
320 unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode
,
321 const TargetRegisterClass
*RC
,
322 unsigned Op0
, bool Op0IsKill
,
323 unsigned Op1
, bool Op1IsKill
) {
324 unsigned ResultReg
= createResultReg(RC
);
325 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
327 if (II
.getNumDefs() >= 1) {
328 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
, ResultReg
)
329 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
330 .addReg(Op1
, Op1IsKill
* RegState::Kill
));
332 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
)
333 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
334 .addReg(Op1
, Op1IsKill
* RegState::Kill
));
335 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
336 TII
.get(TargetOpcode::COPY
), ResultReg
)
337 .addReg(II
.ImplicitDefs
[0]));
342 unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode
,
343 const TargetRegisterClass
*RC
,
344 unsigned Op0
, bool Op0IsKill
,
345 unsigned Op1
, bool Op1IsKill
,
346 unsigned Op2
, bool Op2IsKill
) {
347 unsigned ResultReg
= createResultReg(RC
);
348 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
350 if (II
.getNumDefs() >= 1) {
351 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
, ResultReg
)
352 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
353 .addReg(Op1
, Op1IsKill
* RegState::Kill
)
354 .addReg(Op2
, Op2IsKill
* RegState::Kill
));
356 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
)
357 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
358 .addReg(Op1
, Op1IsKill
* RegState::Kill
)
359 .addReg(Op2
, Op2IsKill
* RegState::Kill
));
360 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
361 TII
.get(TargetOpcode::COPY
), ResultReg
)
362 .addReg(II
.ImplicitDefs
[0]));
367 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode
,
368 const TargetRegisterClass
*RC
,
369 unsigned Op0
, bool Op0IsKill
,
371 unsigned ResultReg
= createResultReg(RC
);
372 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
374 if (II
.getNumDefs() >= 1) {
375 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
, ResultReg
)
376 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
379 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
)
380 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
382 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
383 TII
.get(TargetOpcode::COPY
), ResultReg
)
384 .addReg(II
.ImplicitDefs
[0]));
389 unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode
,
390 const TargetRegisterClass
*RC
,
391 unsigned Op0
, bool Op0IsKill
,
392 const ConstantFP
*FPImm
) {
393 unsigned ResultReg
= createResultReg(RC
);
394 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
396 if (II
.getNumDefs() >= 1) {
397 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
, ResultReg
)
398 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
401 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
)
402 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
404 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
405 TII
.get(TargetOpcode::COPY
), ResultReg
)
406 .addReg(II
.ImplicitDefs
[0]));
411 unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode
,
412 const TargetRegisterClass
*RC
,
413 unsigned Op0
, bool Op0IsKill
,
414 unsigned Op1
, bool Op1IsKill
,
416 unsigned ResultReg
= createResultReg(RC
);
417 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
419 if (II
.getNumDefs() >= 1) {
420 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
, ResultReg
)
421 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
422 .addReg(Op1
, Op1IsKill
* RegState::Kill
)
425 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
)
426 .addReg(Op0
, Op0IsKill
* RegState::Kill
)
427 .addReg(Op1
, Op1IsKill
* RegState::Kill
)
429 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
430 TII
.get(TargetOpcode::COPY
), ResultReg
)
431 .addReg(II
.ImplicitDefs
[0]));
436 unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode
,
437 const TargetRegisterClass
*RC
,
439 unsigned ResultReg
= createResultReg(RC
);
440 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
442 if (II
.getNumDefs() >= 1) {
443 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
, ResultReg
)
446 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
)
448 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
449 TII
.get(TargetOpcode::COPY
), ResultReg
)
450 .addReg(II
.ImplicitDefs
[0]));
455 unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode
,
456 const TargetRegisterClass
*RC
,
457 uint64_t Imm1
, uint64_t Imm2
) {
458 unsigned ResultReg
= createResultReg(RC
);
459 const MCInstrDesc
&II
= TII
.get(MachineInstOpcode
);
461 if (II
.getNumDefs() >= 1) {
462 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
, ResultReg
)
463 .addImm(Imm1
).addImm(Imm2
));
465 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, II
)
466 .addImm(Imm1
).addImm(Imm2
));
467 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
468 TII
.get(TargetOpcode::COPY
),
470 .addReg(II
.ImplicitDefs
[0]));
475 unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT
,
476 unsigned Op0
, bool Op0IsKill
,
478 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(RetVT
));
479 assert(TargetRegisterInfo::isVirtualRegister(Op0
) &&
480 "Cannot yet extract from physregs");
482 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
483 DL
, TII
.get(TargetOpcode::COPY
), ResultReg
)
484 .addReg(Op0
, getKillRegState(Op0IsKill
), Idx
));
488 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
489 // checks from the various callers.
490 unsigned ARMFastISel::ARMMoveToFPReg(EVT VT
, unsigned SrcReg
) {
491 if (VT
== MVT::f64
) return 0;
493 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(VT
));
494 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
495 TII
.get(ARM::VMOVSR
), MoveReg
)
500 unsigned ARMFastISel::ARMMoveToIntReg(EVT VT
, unsigned SrcReg
) {
501 if (VT
== MVT::i64
) return 0;
503 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(VT
));
504 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
505 TII
.get(ARM::VMOVRS
), MoveReg
)
510 // For double width floating point we need to materialize two constants
511 // (the high and the low) into integer registers then use a move to get
512 // the combined constant into an FP reg.
513 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP
*CFP
, EVT VT
) {
514 const APFloat Val
= CFP
->getValueAPF();
515 bool is64bit
= VT
== MVT::f64
;
517 // This checks to see if we can use VFP3 instructions to materialize
518 // a constant, otherwise we have to go through the constant pool.
519 if (TLI
.isFPImmLegal(Val
, VT
)) {
523 Imm
= ARM_AM::getFP64Imm(Val
);
526 Imm
= ARM_AM::getFP32Imm(Val
);
529 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
530 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
),
536 // Require VFP2 for loading fp constants.
537 if (!Subtarget
->hasVFP2()) return false;
539 // MachineConstantPool wants an explicit alignment.
540 unsigned Align
= TD
.getPrefTypeAlignment(CFP
->getType());
542 // TODO: Figure out if this is correct.
543 Align
= TD
.getTypeAllocSize(CFP
->getType());
545 unsigned Idx
= MCP
.getConstantPoolIndex(cast
<Constant
>(CFP
), Align
);
546 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
547 unsigned Opc
= is64bit
? ARM::VLDRD
: ARM::VLDRS
;
549 // The extra reg is for addrmode5.
550 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
),
552 .addConstantPoolIndex(Idx
)
557 unsigned ARMFastISel::ARMMaterializeInt(const Constant
*C
, EVT VT
) {
559 if (VT
!= MVT::i32
&& VT
!= MVT::i16
&& VT
!= MVT::i8
&& VT
!= MVT::i1
)
562 // If we can do this in a single instruction without a constant pool entry
564 const ConstantInt
*CI
= cast
<ConstantInt
>(C
);
565 if (Subtarget
->hasV6T2Ops() && isUInt
<16>(CI
->getZExtValue())) {
566 unsigned Opc
= isThumb2
? ARM::t2MOVi16
: ARM::MOVi16
;
567 unsigned ImmReg
= createResultReg(TLI
.getRegClassFor(MVT::i32
));
568 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
569 TII
.get(Opc
), ImmReg
)
570 .addImm(CI
->getZExtValue()));
574 // Use MVN to emit negative constants.
575 if (VT
== MVT::i32
&& Subtarget
->hasV6T2Ops() && CI
->isNegative()) {
576 unsigned Imm
= (unsigned)~(CI
->getSExtValue());
577 bool UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
578 (ARM_AM::getSOImmVal(Imm
) != -1);
580 unsigned Opc
= isThumb2
? ARM::t2MVNi
: ARM::MVNi
;
581 unsigned ImmReg
= createResultReg(TLI
.getRegClassFor(MVT::i32
));
582 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
583 TII
.get(Opc
), ImmReg
)
589 // Load from constant pool. For now 32-bit only.
593 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
595 // MachineConstantPool wants an explicit alignment.
596 unsigned Align
= TD
.getPrefTypeAlignment(C
->getType());
598 // TODO: Figure out if this is correct.
599 Align
= TD
.getTypeAllocSize(C
->getType());
601 unsigned Idx
= MCP
.getConstantPoolIndex(C
, Align
);
604 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
605 TII
.get(ARM::t2LDRpci
), DestReg
)
606 .addConstantPoolIndex(Idx
));
608 // The extra immediate is for addrmode2.
609 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
610 TII
.get(ARM::LDRcp
), DestReg
)
611 .addConstantPoolIndex(Idx
)
617 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue
*GV
, EVT VT
) {
618 // For now 32-bit only.
619 if (VT
!= MVT::i32
) return 0;
621 Reloc::Model RelocM
= TM
.getRelocationModel();
622 bool IsIndirect
= Subtarget
->GVIsIndirectSymbol(GV
, RelocM
);
623 unsigned DestReg
= createResultReg(TLI
.getRegClassFor(VT
));
625 // Use movw+movt when possible, it avoids constant pool entries.
626 // Darwin targets don't support movt with Reloc::Static, see
627 // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support
628 // static movt relocations.
629 if (Subtarget
->useMovt() &&
630 Subtarget
->isTargetDarwin() == (RelocM
!= Reloc::Static
)) {
634 Opc
= isThumb2
? ARM::t2MOV_ga_pcrel
: ARM::MOV_ga_pcrel
;
636 case Reloc::DynamicNoPIC
:
637 Opc
= isThumb2
? ARM::t2MOV_ga_dyn
: ARM::MOV_ga_dyn
;
640 Opc
= isThumb2
? ARM::t2MOVi32imm
: ARM::MOVi32imm
;
643 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
),
644 DestReg
).addGlobalAddress(GV
));
646 // MachineConstantPool wants an explicit alignment.
647 unsigned Align
= TD
.getPrefTypeAlignment(GV
->getType());
649 // TODO: Figure out if this is correct.
650 Align
= TD
.getTypeAllocSize(GV
->getType());
653 if (Subtarget
->isTargetELF() && RelocM
== Reloc::PIC_
)
654 return ARMLowerPICELF(GV
, Align
, VT
);
657 unsigned PCAdj
= (RelocM
!= Reloc::PIC_
) ? 0 :
658 (Subtarget
->isThumb() ? 4 : 8);
659 unsigned Id
= AFI
->createPICLabelUId();
660 ARMConstantPoolValue
*CPV
= ARMConstantPoolConstant::Create(GV
, Id
,
663 unsigned Idx
= MCP
.getConstantPoolIndex(CPV
, Align
);
666 MachineInstrBuilder MIB
;
668 unsigned Opc
= (RelocM
!=Reloc::PIC_
) ? ARM::t2LDRpci
: ARM::t2LDRpci_pic
;
669 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
), DestReg
)
670 .addConstantPoolIndex(Idx
);
671 if (RelocM
== Reloc::PIC_
)
673 AddOptionalDefs(MIB
);
675 // The extra immediate is for addrmode2.
676 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(ARM::LDRcp
),
678 .addConstantPoolIndex(Idx
)
680 AddOptionalDefs(MIB
);
682 if (RelocM
== Reloc::PIC_
) {
683 unsigned Opc
= IsIndirect
? ARM::PICLDR
: ARM::PICADD
;
684 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
686 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
687 DL
, TII
.get(Opc
), NewDestReg
)
690 AddOptionalDefs(MIB
);
697 MachineInstrBuilder MIB
;
698 unsigned NewDestReg
= createResultReg(TLI
.getRegClassFor(VT
));
700 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
701 TII
.get(ARM::t2LDRi12
), NewDestReg
)
705 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(ARM::LDRi12
),
709 DestReg
= NewDestReg
;
710 AddOptionalDefs(MIB
);
716 unsigned ARMFastISel::TargetMaterializeConstant(const Constant
*C
) {
717 EVT VT
= TLI
.getValueType(C
->getType(), true);
719 // Only handle simple types.
720 if (!VT
.isSimple()) return 0;
722 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(C
))
723 return ARMMaterializeFP(CFP
, VT
);
724 else if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(C
))
725 return ARMMaterializeGV(GV
, VT
);
726 else if (isa
<ConstantInt
>(C
))
727 return ARMMaterializeInt(C
, VT
);
732 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
734 unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst
*AI
) {
735 // Don't handle dynamic allocas.
736 if (!FuncInfo
.StaticAllocaMap
.count(AI
)) return 0;
739 if (!isLoadTypeLegal(AI
->getType(), VT
)) return 0;
741 DenseMap
<const AllocaInst
*, int>::iterator SI
=
742 FuncInfo
.StaticAllocaMap
.find(AI
);
744 // This will get lowered later into the correct offsets and registers
745 // via rewriteXFrameIndex.
746 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
747 const TargetRegisterClass
* RC
= TLI
.getRegClassFor(VT
);
748 unsigned ResultReg
= createResultReg(RC
);
749 unsigned Opc
= isThumb2
? ARM::t2ADDri
: ARM::ADDri
;
750 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
751 TII
.get(Opc
), ResultReg
)
752 .addFrameIndex(SI
->second
)
760 bool ARMFastISel::isTypeLegal(Type
*Ty
, MVT
&VT
) {
761 EVT evt
= TLI
.getValueType(Ty
, true);
763 // Only handle simple types.
764 if (evt
== MVT::Other
|| !evt
.isSimple()) return false;
765 VT
= evt
.getSimpleVT();
767 // Handle all legal types, i.e. a register that will directly hold this
769 return TLI
.isTypeLegal(VT
);
772 bool ARMFastISel::isLoadTypeLegal(Type
*Ty
, MVT
&VT
) {
773 if (isTypeLegal(Ty
, VT
)) return true;
775 // If this is a type than can be sign or zero-extended to a basic operation
776 // go ahead and accept it now.
777 if (VT
== MVT::i1
|| VT
== MVT::i8
|| VT
== MVT::i16
)
783 // Computes the address to get to an object.
784 bool ARMFastISel::ARMComputeAddress(const Value
*Obj
, Address
&Addr
) {
785 // Some boilerplate from the X86 FastISel.
786 const User
*U
= NULL
;
787 unsigned Opcode
= Instruction::UserOp1
;
788 if (const Instruction
*I
= dyn_cast
<Instruction
>(Obj
)) {
789 // Don't walk into other basic blocks unless the object is an alloca from
790 // another block, otherwise it may not have a virtual register assigned.
791 if (FuncInfo
.StaticAllocaMap
.count(static_cast<const AllocaInst
*>(Obj
)) ||
792 FuncInfo
.MBBMap
[I
->getParent()] == FuncInfo
.MBB
) {
793 Opcode
= I
->getOpcode();
796 } else if (const ConstantExpr
*C
= dyn_cast
<ConstantExpr
>(Obj
)) {
797 Opcode
= C
->getOpcode();
801 if (PointerType
*Ty
= dyn_cast
<PointerType
>(Obj
->getType()))
802 if (Ty
->getAddressSpace() > 255)
803 // Fast instruction selection doesn't support the special
810 case Instruction::BitCast
: {
811 // Look through bitcasts.
812 return ARMComputeAddress(U
->getOperand(0), Addr
);
814 case Instruction::IntToPtr
: {
815 // Look past no-op inttoptrs.
816 if (TLI
.getValueType(U
->getOperand(0)->getType()) == TLI
.getPointerTy())
817 return ARMComputeAddress(U
->getOperand(0), Addr
);
820 case Instruction::PtrToInt
: {
821 // Look past no-op ptrtoints.
822 if (TLI
.getValueType(U
->getType()) == TLI
.getPointerTy())
823 return ARMComputeAddress(U
->getOperand(0), Addr
);
826 case Instruction::GetElementPtr
: {
827 Address SavedAddr
= Addr
;
828 int TmpOffset
= Addr
.Offset
;
830 // Iterate through the GEP folding the constants into offsets where
832 gep_type_iterator GTI
= gep_type_begin(U
);
833 for (User::const_op_iterator i
= U
->op_begin() + 1, e
= U
->op_end();
834 i
!= e
; ++i
, ++GTI
) {
835 const Value
*Op
= *i
;
836 if (StructType
*STy
= dyn_cast
<StructType
>(*GTI
)) {
837 const StructLayout
*SL
= TD
.getStructLayout(STy
);
838 unsigned Idx
= cast
<ConstantInt
>(Op
)->getZExtValue();
839 TmpOffset
+= SL
->getElementOffset(Idx
);
841 uint64_t S
= TD
.getTypeAllocSize(GTI
.getIndexedType());
843 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Op
)) {
844 // Constant-offset addressing.
845 TmpOffset
+= CI
->getSExtValue() * S
;
848 if (isa
<AddOperator
>(Op
) &&
849 (!isa
<Instruction
>(Op
) ||
850 FuncInfo
.MBBMap
[cast
<Instruction
>(Op
)->getParent()]
852 isa
<ConstantInt
>(cast
<AddOperator
>(Op
)->getOperand(1))) {
853 // An add (in the same block) with a constant operand. Fold the
856 cast
<ConstantInt
>(cast
<AddOperator
>(Op
)->getOperand(1));
857 TmpOffset
+= CI
->getSExtValue() * S
;
858 // Iterate on the other operand.
859 Op
= cast
<AddOperator
>(Op
)->getOperand(0);
863 goto unsupported_gep
;
868 // Try to grab the base operand now.
869 Addr
.Offset
= TmpOffset
;
870 if (ARMComputeAddress(U
->getOperand(0), Addr
)) return true;
872 // We failed, restore everything and try the other options.
878 case Instruction::Alloca
: {
879 const AllocaInst
*AI
= cast
<AllocaInst
>(Obj
);
880 DenseMap
<const AllocaInst
*, int>::iterator SI
=
881 FuncInfo
.StaticAllocaMap
.find(AI
);
882 if (SI
!= FuncInfo
.StaticAllocaMap
.end()) {
883 Addr
.BaseType
= Address::FrameIndexBase
;
884 Addr
.Base
.FI
= SI
->second
;
891 // Try to get this in a register if nothing else has worked.
892 if (Addr
.Base
.Reg
== 0) Addr
.Base
.Reg
= getRegForValue(Obj
);
893 return Addr
.Base
.Reg
!= 0;
896 void ARMFastISel::ARMSimplifyAddress(Address
&Addr
, EVT VT
, bool useAM3
) {
898 assert(VT
.isSimple() && "Non-simple types are invalid here!");
900 bool needsLowering
= false;
901 switch (VT
.getSimpleVT().SimpleTy
) {
902 default: llvm_unreachable("Unhandled load/store type!");
908 // Integer loads/stores handle 12-bit offsets.
909 needsLowering
= ((Addr
.Offset
& 0xfff) != Addr
.Offset
);
910 // Handle negative offsets.
911 if (needsLowering
&& isThumb2
)
912 needsLowering
= !(Subtarget
->hasV6T2Ops() && Addr
.Offset
< 0 &&
915 // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
916 needsLowering
= (Addr
.Offset
> 255 || Addr
.Offset
< -255);
921 // Floating point operands handle 8-bit offsets.
922 needsLowering
= ((Addr
.Offset
& 0xff) != Addr
.Offset
);
926 // If this is a stack pointer and the offset needs to be simplified then
927 // put the alloca address into a register, set the base type back to
928 // register and continue. This should almost never happen.
929 if (needsLowering
&& Addr
.BaseType
== Address::FrameIndexBase
) {
930 const TargetRegisterClass
*RC
= isThumb2
?
931 (const TargetRegisterClass
*)&ARM::tGPRRegClass
:
932 (const TargetRegisterClass
*)&ARM::GPRRegClass
;
933 unsigned ResultReg
= createResultReg(RC
);
934 unsigned Opc
= isThumb2
? ARM::t2ADDri
: ARM::ADDri
;
935 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
936 TII
.get(Opc
), ResultReg
)
937 .addFrameIndex(Addr
.Base
.FI
)
939 Addr
.Base
.Reg
= ResultReg
;
940 Addr
.BaseType
= Address::RegBase
;
943 // Since the offset is too large for the load/store instruction
944 // get the reg+offset into a register.
946 Addr
.Base
.Reg
= FastEmit_ri_(MVT::i32
, ISD::ADD
, Addr
.Base
.Reg
,
947 /*Op0IsKill*/false, Addr
.Offset
, MVT::i32
);
952 void ARMFastISel::AddLoadStoreOperands(EVT VT
, Address
&Addr
,
953 const MachineInstrBuilder
&MIB
,
954 unsigned Flags
, bool useAM3
) {
955 // addrmode5 output depends on the selection dag addressing dividing the
956 // offset by 4 that it then later multiplies. Do this here as well.
957 if (VT
.getSimpleVT().SimpleTy
== MVT::f32
||
958 VT
.getSimpleVT().SimpleTy
== MVT::f64
)
961 // Frame base works a bit differently. Handle it separately.
962 if (Addr
.BaseType
== Address::FrameIndexBase
) {
963 int FI
= Addr
.Base
.FI
;
964 int Offset
= Addr
.Offset
;
965 MachineMemOperand
*MMO
=
966 FuncInfo
.MF
->getMachineMemOperand(
967 MachinePointerInfo::getFixedStack(FI
, Offset
),
969 MFI
.getObjectSize(FI
),
970 MFI
.getObjectAlignment(FI
));
971 // Now add the rest of the operands.
972 MIB
.addFrameIndex(FI
);
974 // ARM halfword load/stores and signed byte loads need an additional
977 signed Imm
= (Addr
.Offset
< 0) ? (0x100 | -Addr
.Offset
) : Addr
.Offset
;
981 MIB
.addImm(Addr
.Offset
);
983 MIB
.addMemOperand(MMO
);
985 // Now add the rest of the operands.
986 MIB
.addReg(Addr
.Base
.Reg
);
988 // ARM halfword load/stores and signed byte loads need an additional
991 signed Imm
= (Addr
.Offset
< 0) ? (0x100 | -Addr
.Offset
) : Addr
.Offset
;
995 MIB
.addImm(Addr
.Offset
);
998 AddOptionalDefs(MIB
);
1001 bool ARMFastISel::ARMEmitLoad(EVT VT
, unsigned &ResultReg
, Address
&Addr
,
1002 unsigned Alignment
, bool isZExt
, bool allocReg
) {
1003 assert(VT
.isSimple() && "Non-simple types are invalid here!");
1005 bool useAM3
= false;
1006 bool needVMOV
= false;
1007 const TargetRegisterClass
*RC
;
1008 switch (VT
.getSimpleVT().SimpleTy
) {
1009 // This is mostly going to be Neon/vector support.
1010 default: return false;
1014 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1015 Opc
= isZExt
? ARM::t2LDRBi8
: ARM::t2LDRSBi8
;
1017 Opc
= isZExt
? ARM::t2LDRBi12
: ARM::t2LDRSBi12
;
1026 RC
= &ARM::GPRRegClass
;
1029 if (Alignment
&& Alignment
< 2 && !Subtarget
->allowsUnalignedMem())
1033 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1034 Opc
= isZExt
? ARM::t2LDRHi8
: ARM::t2LDRSHi8
;
1036 Opc
= isZExt
? ARM::t2LDRHi12
: ARM::t2LDRSHi12
;
1038 Opc
= isZExt
? ARM::LDRH
: ARM::LDRSH
;
1041 RC
= &ARM::GPRRegClass
;
1044 if (Alignment
&& Alignment
< 4 && !Subtarget
->allowsUnalignedMem())
1048 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1051 Opc
= ARM::t2LDRi12
;
1055 RC
= &ARM::GPRRegClass
;
1058 if (!Subtarget
->hasVFP2()) return false;
1059 // Unaligned loads need special handling. Floats require word-alignment.
1060 if (Alignment
&& Alignment
< 4) {
1063 Opc
= isThumb2
? ARM::t2LDRi12
: ARM::LDRi12
;
1064 RC
= &ARM::GPRRegClass
;
1067 RC
= TLI
.getRegClassFor(VT
);
1071 if (!Subtarget
->hasVFP2()) return false;
1072 // FIXME: Unaligned loads need special handling. Doublewords require
1074 if (Alignment
&& Alignment
< 4)
1078 RC
= TLI
.getRegClassFor(VT
);
1081 // Simplify this down to something we can handle.
1082 ARMSimplifyAddress(Addr
, VT
, useAM3
);
1084 // Create the base instruction, then add the operands.
1086 ResultReg
= createResultReg(RC
);
1087 assert (ResultReg
> 255 && "Expected an allocated virtual register.");
1088 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1089 TII
.get(Opc
), ResultReg
);
1090 AddLoadStoreOperands(VT
, Addr
, MIB
, MachineMemOperand::MOLoad
, useAM3
);
1092 // If we had an unaligned load of a float we've converted it to an regular
1093 // load. Now we must move from the GRP to the FP register.
1095 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
1096 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1097 TII
.get(ARM::VMOVSR
), MoveReg
)
1098 .addReg(ResultReg
));
1099 ResultReg
= MoveReg
;
1104 bool ARMFastISel::SelectLoad(const Instruction
*I
) {
1105 // Atomic loads need special handling.
1106 if (cast
<LoadInst
>(I
)->isAtomic())
1109 // Verify we have a legal type before going any further.
1111 if (!isLoadTypeLegal(I
->getType(), VT
))
1114 // See if we can handle this address.
1116 if (!ARMComputeAddress(I
->getOperand(0), Addr
)) return false;
1119 if (!ARMEmitLoad(VT
, ResultReg
, Addr
, cast
<LoadInst
>(I
)->getAlignment()))
1121 UpdateValueMap(I
, ResultReg
);
1125 bool ARMFastISel::ARMEmitStore(EVT VT
, unsigned SrcReg
, Address
&Addr
,
1126 unsigned Alignment
) {
1128 bool useAM3
= false;
1129 switch (VT
.getSimpleVT().SimpleTy
) {
1130 // This is mostly going to be Neon/vector support.
1131 default: return false;
1133 unsigned Res
= createResultReg(isThumb2
?
1134 (const TargetRegisterClass
*)&ARM::tGPRRegClass
:
1135 (const TargetRegisterClass
*)&ARM::GPRRegClass
);
1136 unsigned Opc
= isThumb2
? ARM::t2ANDri
: ARM::ANDri
;
1137 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1139 .addReg(SrcReg
).addImm(1));
1141 } // Fallthrough here.
1144 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1145 StrOpc
= ARM::t2STRBi8
;
1147 StrOpc
= ARM::t2STRBi12
;
1149 StrOpc
= ARM::STRBi12
;
1153 if (Alignment
&& Alignment
< 2 && !Subtarget
->allowsUnalignedMem())
1157 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1158 StrOpc
= ARM::t2STRHi8
;
1160 StrOpc
= ARM::t2STRHi12
;
1167 if (Alignment
&& Alignment
< 4 && !Subtarget
->allowsUnalignedMem())
1171 if (Addr
.Offset
< 0 && Addr
.Offset
> -256 && Subtarget
->hasV6T2Ops())
1172 StrOpc
= ARM::t2STRi8
;
1174 StrOpc
= ARM::t2STRi12
;
1176 StrOpc
= ARM::STRi12
;
1180 if (!Subtarget
->hasVFP2()) return false;
1181 // Unaligned stores need special handling. Floats require word-alignment.
1182 if (Alignment
&& Alignment
< 4) {
1183 unsigned MoveReg
= createResultReg(TLI
.getRegClassFor(MVT::i32
));
1184 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1185 TII
.get(ARM::VMOVRS
), MoveReg
)
1189 StrOpc
= isThumb2
? ARM::t2STRi12
: ARM::STRi12
;
1191 StrOpc
= ARM::VSTRS
;
1195 if (!Subtarget
->hasVFP2()) return false;
1196 // FIXME: Unaligned stores need special handling. Doublewords require
1198 if (Alignment
&& Alignment
< 4)
1201 StrOpc
= ARM::VSTRD
;
1204 // Simplify this down to something we can handle.
1205 ARMSimplifyAddress(Addr
, VT
, useAM3
);
1207 // Create the base instruction, then add the operands.
1208 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1211 AddLoadStoreOperands(VT
, Addr
, MIB
, MachineMemOperand::MOStore
, useAM3
);
1215 bool ARMFastISel::SelectStore(const Instruction
*I
) {
1216 Value
*Op0
= I
->getOperand(0);
1217 unsigned SrcReg
= 0;
1219 // Atomic stores need special handling.
1220 if (cast
<StoreInst
>(I
)->isAtomic())
1223 // Verify we have a legal type before going any further.
1225 if (!isLoadTypeLegal(I
->getOperand(0)->getType(), VT
))
1228 // Get the value to be stored into a register.
1229 SrcReg
= getRegForValue(Op0
);
1230 if (SrcReg
== 0) return false;
1232 // See if we can handle this address.
1234 if (!ARMComputeAddress(I
->getOperand(1), Addr
))
1237 if (!ARMEmitStore(VT
, SrcReg
, Addr
, cast
<StoreInst
>(I
)->getAlignment()))
1242 static ARMCC::CondCodes
getComparePred(CmpInst::Predicate Pred
) {
1244 // Needs two compares...
1245 case CmpInst::FCMP_ONE
:
1246 case CmpInst::FCMP_UEQ
:
1248 // AL is our "false" for now. The other two need more compares.
1250 case CmpInst::ICMP_EQ
:
1251 case CmpInst::FCMP_OEQ
:
1253 case CmpInst::ICMP_SGT
:
1254 case CmpInst::FCMP_OGT
:
1256 case CmpInst::ICMP_SGE
:
1257 case CmpInst::FCMP_OGE
:
1259 case CmpInst::ICMP_UGT
:
1260 case CmpInst::FCMP_UGT
:
1262 case CmpInst::FCMP_OLT
:
1264 case CmpInst::ICMP_ULE
:
1265 case CmpInst::FCMP_OLE
:
1267 case CmpInst::FCMP_ORD
:
1269 case CmpInst::FCMP_UNO
:
1271 case CmpInst::FCMP_UGE
:
1273 case CmpInst::ICMP_SLT
:
1274 case CmpInst::FCMP_ULT
:
1276 case CmpInst::ICMP_SLE
:
1277 case CmpInst::FCMP_ULE
:
1279 case CmpInst::FCMP_UNE
:
1280 case CmpInst::ICMP_NE
:
1282 case CmpInst::ICMP_UGE
:
1284 case CmpInst::ICMP_ULT
:
1289 bool ARMFastISel::SelectBranch(const Instruction
*I
) {
1290 const BranchInst
*BI
= cast
<BranchInst
>(I
);
1291 MachineBasicBlock
*TBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(0)];
1292 MachineBasicBlock
*FBB
= FuncInfo
.MBBMap
[BI
->getSuccessor(1)];
1294 // Simple branch support.
1296 // If we can, avoid recomputing the compare - redoing it could lead to wonky
1298 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(BI
->getCondition())) {
1299 if (CI
->hasOneUse() && (CI
->getParent() == I
->getParent())) {
1301 // Get the compare predicate.
1302 // Try to take advantage of fallthrough opportunities.
1303 CmpInst::Predicate Predicate
= CI
->getPredicate();
1304 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1305 std::swap(TBB
, FBB
);
1306 Predicate
= CmpInst::getInversePredicate(Predicate
);
1309 ARMCC::CondCodes ARMPred
= getComparePred(Predicate
);
1311 // We may not handle every CC for now.
1312 if (ARMPred
== ARMCC::AL
) return false;
1314 // Emit the compare.
1315 if (!ARMEmitCmp(CI
->getOperand(0), CI
->getOperand(1), CI
->isUnsigned()))
1318 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1319 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(BrOpc
))
1320 .addMBB(TBB
).addImm(ARMPred
).addReg(ARM::CPSR
);
1321 FastEmitBranch(FBB
, DL
);
1322 FuncInfo
.MBB
->addSuccessor(TBB
);
1325 } else if (TruncInst
*TI
= dyn_cast
<TruncInst
>(BI
->getCondition())) {
1327 if (TI
->hasOneUse() && TI
->getParent() == I
->getParent() &&
1328 (isLoadTypeLegal(TI
->getOperand(0)->getType(), SourceVT
))) {
1329 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1330 unsigned OpReg
= getRegForValue(TI
->getOperand(0));
1331 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1333 .addReg(OpReg
).addImm(1));
1335 unsigned CCMode
= ARMCC::NE
;
1336 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1337 std::swap(TBB
, FBB
);
1341 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1342 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(BrOpc
))
1343 .addMBB(TBB
).addImm(CCMode
).addReg(ARM::CPSR
);
1345 FastEmitBranch(FBB
, DL
);
1346 FuncInfo
.MBB
->addSuccessor(TBB
);
1349 } else if (const ConstantInt
*CI
=
1350 dyn_cast
<ConstantInt
>(BI
->getCondition())) {
1351 uint64_t Imm
= CI
->getZExtValue();
1352 MachineBasicBlock
*Target
= (Imm
== 0) ? FBB
: TBB
;
1353 FastEmitBranch(Target
, DL
);
1357 unsigned CmpReg
= getRegForValue(BI
->getCondition());
1358 if (CmpReg
== 0) return false;
1360 // We've been divorced from our compare! Our block was split, and
1361 // now our compare lives in a predecessor block. We musn't
1362 // re-compare here, as the children of the compare aren't guaranteed
1363 // live across the block boundary (we *could* check for this).
1364 // Regardless, the compare has been done in the predecessor block,
1365 // and it left a value for us in a virtual register. Ergo, we test
1366 // the one-bit value left in the virtual register.
1367 unsigned TstOpc
= isThumb2
? ARM::t2TSTri
: ARM::TSTri
;
1368 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TstOpc
))
1369 .addReg(CmpReg
).addImm(1));
1371 unsigned CCMode
= ARMCC::NE
;
1372 if (FuncInfo
.MBB
->isLayoutSuccessor(TBB
)) {
1373 std::swap(TBB
, FBB
);
1377 unsigned BrOpc
= isThumb2
? ARM::t2Bcc
: ARM::Bcc
;
1378 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(BrOpc
))
1379 .addMBB(TBB
).addImm(CCMode
).addReg(ARM::CPSR
);
1380 FastEmitBranch(FBB
, DL
);
1381 FuncInfo
.MBB
->addSuccessor(TBB
);
1385 bool ARMFastISel::SelectIndirectBr(const Instruction
*I
) {
1386 unsigned AddrReg
= getRegForValue(I
->getOperand(0));
1387 if (AddrReg
== 0) return false;
1389 unsigned Opc
= isThumb2
? ARM::tBRIND
: ARM::BX
;
1390 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
))
1395 bool ARMFastISel::ARMEmitCmp(const Value
*Src1Value
, const Value
*Src2Value
,
1397 Type
*Ty
= Src1Value
->getType();
1398 EVT SrcVT
= TLI
.getValueType(Ty
, true);
1399 if (!SrcVT
.isSimple()) return false;
1401 bool isFloat
= (Ty
->isFloatTy() || Ty
->isDoubleTy());
1402 if (isFloat
&& !Subtarget
->hasVFP2())
1405 // Check to see if the 2nd operand is a constant that we can encode directly
1408 bool UseImm
= false;
1409 bool isNegativeImm
= false;
1410 // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1411 // Thus, Src1Value may be a ConstantInt, but we're missing it.
1412 if (const ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(Src2Value
)) {
1413 if (SrcVT
== MVT::i32
|| SrcVT
== MVT::i16
|| SrcVT
== MVT::i8
||
1415 const APInt
&CIVal
= ConstInt
->getValue();
1416 Imm
= (isZExt
) ? (int)CIVal
.getZExtValue() : (int)CIVal
.getSExtValue();
1417 // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1418 // then a cmn, because there is no way to represent 2147483648 as a
1419 // signed 32-bit int.
1420 if (Imm
< 0 && Imm
!= (int)0x80000000) {
1421 isNegativeImm
= true;
1424 UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
1425 (ARM_AM::getSOImmVal(Imm
) != -1);
1427 } else if (const ConstantFP
*ConstFP
= dyn_cast
<ConstantFP
>(Src2Value
)) {
1428 if (SrcVT
== MVT::f32
|| SrcVT
== MVT::f64
)
1429 if (ConstFP
->isZero() && !ConstFP
->isNegative())
1435 bool needsExt
= false;
1436 switch (SrcVT
.getSimpleVT().SimpleTy
) {
1437 default: return false;
1438 // TODO: Verify compares.
1441 CmpOpc
= UseImm
? ARM::VCMPEZS
: ARM::VCMPES
;
1445 CmpOpc
= UseImm
? ARM::VCMPEZD
: ARM::VCMPED
;
1451 // Intentional fall-through.
1455 CmpOpc
= ARM::t2CMPrr
;
1457 CmpOpc
= isNegativeImm
? ARM::t2CMNri
: ARM::t2CMPri
;
1460 CmpOpc
= ARM::CMPrr
;
1462 CmpOpc
= isNegativeImm
? ARM::CMNri
: ARM::CMPri
;
1467 unsigned SrcReg1
= getRegForValue(Src1Value
);
1468 if (SrcReg1
== 0) return false;
1470 unsigned SrcReg2
= 0;
1472 SrcReg2
= getRegForValue(Src2Value
);
1473 if (SrcReg2
== 0) return false;
1476 // We have i1, i8, or i16, we need to either zero extend or sign extend.
1478 SrcReg1
= ARMEmitIntExt(SrcVT
, SrcReg1
, MVT::i32
, isZExt
);
1479 if (SrcReg1
== 0) return false;
1481 SrcReg2
= ARMEmitIntExt(SrcVT
, SrcReg2
, MVT::i32
, isZExt
);
1482 if (SrcReg2
== 0) return false;
1487 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1489 .addReg(SrcReg1
).addReg(SrcReg2
));
1491 MachineInstrBuilder MIB
;
1492 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(CmpOpc
))
1495 // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1498 AddOptionalDefs(MIB
);
1501 // For floating point we need to move the result to a comparison register
1502 // that we can then use for branches.
1503 if (Ty
->isFloatTy() || Ty
->isDoubleTy())
1504 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1505 TII
.get(ARM::FMSTAT
)));
1509 bool ARMFastISel::SelectCmp(const Instruction
*I
) {
1510 const CmpInst
*CI
= cast
<CmpInst
>(I
);
1512 // Get the compare predicate.
1513 ARMCC::CondCodes ARMPred
= getComparePred(CI
->getPredicate());
1515 // We may not handle every CC for now.
1516 if (ARMPred
== ARMCC::AL
) return false;
1518 // Emit the compare.
1519 if (!ARMEmitCmp(CI
->getOperand(0), CI
->getOperand(1), CI
->isUnsigned()))
1522 // Now set a register based on the comparison. Explicitly set the predicates
1524 unsigned MovCCOpc
= isThumb2
? ARM::t2MOVCCi
: ARM::MOVCCi
;
1525 const TargetRegisterClass
*RC
= isThumb2
?
1526 (const TargetRegisterClass
*)&ARM::rGPRRegClass
:
1527 (const TargetRegisterClass
*)&ARM::GPRRegClass
;
1528 unsigned DestReg
= createResultReg(RC
);
1529 Constant
*Zero
= ConstantInt::get(Type::getInt32Ty(*Context
), 0);
1530 unsigned ZeroReg
= TargetMaterializeConstant(Zero
);
1531 // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1532 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(MovCCOpc
), DestReg
)
1533 .addReg(ZeroReg
).addImm(1)
1534 .addImm(ARMPred
).addReg(ARM::CPSR
);
1536 UpdateValueMap(I
, DestReg
);
1540 bool ARMFastISel::SelectFPExt(const Instruction
*I
) {
1541 // Make sure we have VFP and that we're extending float to double.
1542 if (!Subtarget
->hasVFP2()) return false;
1544 Value
*V
= I
->getOperand(0);
1545 if (!I
->getType()->isDoubleTy() ||
1546 !V
->getType()->isFloatTy()) return false;
1548 unsigned Op
= getRegForValue(V
);
1549 if (Op
== 0) return false;
1551 unsigned Result
= createResultReg(&ARM::DPRRegClass
);
1552 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1553 TII
.get(ARM::VCVTDS
), Result
)
1555 UpdateValueMap(I
, Result
);
1559 bool ARMFastISel::SelectFPTrunc(const Instruction
*I
) {
1560 // Make sure we have VFP and that we're truncating double to float.
1561 if (!Subtarget
->hasVFP2()) return false;
1563 Value
*V
= I
->getOperand(0);
1564 if (!(I
->getType()->isFloatTy() &&
1565 V
->getType()->isDoubleTy())) return false;
1567 unsigned Op
= getRegForValue(V
);
1568 if (Op
== 0) return false;
1570 unsigned Result
= createResultReg(&ARM::SPRRegClass
);
1571 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1572 TII
.get(ARM::VCVTSD
), Result
)
1574 UpdateValueMap(I
, Result
);
1578 bool ARMFastISel::SelectIToFP(const Instruction
*I
, bool isSigned
) {
1579 // Make sure we have VFP.
1580 if (!Subtarget
->hasVFP2()) return false;
1583 Type
*Ty
= I
->getType();
1584 if (!isTypeLegal(Ty
, DstVT
))
1587 Value
*Src
= I
->getOperand(0);
1588 EVT SrcVT
= TLI
.getValueType(Src
->getType(), true);
1589 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
)
1592 unsigned SrcReg
= getRegForValue(Src
);
1593 if (SrcReg
== 0) return false;
1595 // Handle sign-extension.
1596 if (SrcVT
== MVT::i16
|| SrcVT
== MVT::i8
) {
1597 EVT DestVT
= MVT::i32
;
1598 SrcReg
= ARMEmitIntExt(SrcVT
, SrcReg
, DestVT
,
1599 /*isZExt*/!isSigned
);
1600 if (SrcReg
== 0) return false;
1603 // The conversion routine works on fp-reg to fp-reg and the operand above
1604 // was an integer, move it to the fp registers if possible.
1605 unsigned FP
= ARMMoveToFPReg(MVT::f32
, SrcReg
);
1606 if (FP
== 0) return false;
1609 if (Ty
->isFloatTy()) Opc
= isSigned
? ARM::VSITOS
: ARM::VUITOS
;
1610 else if (Ty
->isDoubleTy()) Opc
= isSigned
? ARM::VSITOD
: ARM::VUITOD
;
1613 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(DstVT
));
1614 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
),
1617 UpdateValueMap(I
, ResultReg
);
1621 bool ARMFastISel::SelectFPToI(const Instruction
*I
, bool isSigned
) {
1622 // Make sure we have VFP.
1623 if (!Subtarget
->hasVFP2()) return false;
1626 Type
*RetTy
= I
->getType();
1627 if (!isTypeLegal(RetTy
, DstVT
))
1630 unsigned Op
= getRegForValue(I
->getOperand(0));
1631 if (Op
== 0) return false;
1634 Type
*OpTy
= I
->getOperand(0)->getType();
1635 if (OpTy
->isFloatTy()) Opc
= isSigned
? ARM::VTOSIZS
: ARM::VTOUIZS
;
1636 else if (OpTy
->isDoubleTy()) Opc
= isSigned
? ARM::VTOSIZD
: ARM::VTOUIZD
;
1639 // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1640 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(MVT::f32
));
1641 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
),
1645 // This result needs to be in an integer register, but the conversion only
1646 // takes place in fp-regs.
1647 unsigned IntReg
= ARMMoveToIntReg(DstVT
, ResultReg
);
1648 if (IntReg
== 0) return false;
1650 UpdateValueMap(I
, IntReg
);
1654 bool ARMFastISel::SelectSelect(const Instruction
*I
) {
1656 if (!isTypeLegal(I
->getType(), VT
))
1659 // Things need to be register sized for register moves.
1660 if (VT
!= MVT::i32
) return false;
1661 const TargetRegisterClass
*RC
= TLI
.getRegClassFor(VT
);
1663 unsigned CondReg
= getRegForValue(I
->getOperand(0));
1664 if (CondReg
== 0) return false;
1665 unsigned Op1Reg
= getRegForValue(I
->getOperand(1));
1666 if (Op1Reg
== 0) return false;
1668 // Check to see if we can use an immediate in the conditional move.
1670 bool UseImm
= false;
1671 bool isNegativeImm
= false;
1672 if (const ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(I
->getOperand(2))) {
1673 assert (VT
== MVT::i32
&& "Expecting an i32.");
1674 Imm
= (int)ConstInt
->getValue().getZExtValue();
1676 isNegativeImm
= true;
1679 UseImm
= isThumb2
? (ARM_AM::getT2SOImmVal(Imm
) != -1) :
1680 (ARM_AM::getSOImmVal(Imm
) != -1);
1683 unsigned Op2Reg
= 0;
1685 Op2Reg
= getRegForValue(I
->getOperand(2));
1686 if (Op2Reg
== 0) return false;
1689 unsigned CmpOpc
= isThumb2
? ARM::t2CMPri
: ARM::CMPri
;
1690 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(CmpOpc
))
1691 .addReg(CondReg
).addImm(0));
1695 MovCCOpc
= isThumb2
? ARM::t2MOVCCr
: ARM::MOVCCr
;
1697 if (!isNegativeImm
) {
1698 MovCCOpc
= isThumb2
? ARM::t2MOVCCi
: ARM::MOVCCi
;
1700 MovCCOpc
= isThumb2
? ARM::t2MVNCCi
: ARM::MVNCCi
;
1703 unsigned ResultReg
= createResultReg(RC
);
1705 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(MovCCOpc
), ResultReg
)
1706 .addReg(Op2Reg
).addReg(Op1Reg
).addImm(ARMCC::NE
).addReg(ARM::CPSR
);
1708 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(MovCCOpc
), ResultReg
)
1709 .addReg(Op1Reg
).addImm(Imm
).addImm(ARMCC::EQ
).addReg(ARM::CPSR
);
1710 UpdateValueMap(I
, ResultReg
);
1714 bool ARMFastISel::SelectDiv(const Instruction
*I
, bool isSigned
) {
1716 Type
*Ty
= I
->getType();
1717 if (!isTypeLegal(Ty
, VT
))
1720 // If we have integer div support we should have selected this automagically.
1721 // In case we have a real miss go ahead and return false and we'll pick
1723 if (Subtarget
->hasDivide()) return false;
1725 // Otherwise emit a libcall.
1726 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
1728 LC
= isSigned
? RTLIB::SDIV_I8
: RTLIB::UDIV_I8
;
1729 else if (VT
== MVT::i16
)
1730 LC
= isSigned
? RTLIB::SDIV_I16
: RTLIB::UDIV_I16
;
1731 else if (VT
== MVT::i32
)
1732 LC
= isSigned
? RTLIB::SDIV_I32
: RTLIB::UDIV_I32
;
1733 else if (VT
== MVT::i64
)
1734 LC
= isSigned
? RTLIB::SDIV_I64
: RTLIB::UDIV_I64
;
1735 else if (VT
== MVT::i128
)
1736 LC
= isSigned
? RTLIB::SDIV_I128
: RTLIB::UDIV_I128
;
1737 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unsupported SDIV!");
1739 return ARMEmitLibcall(I
, LC
);
1742 bool ARMFastISel::SelectRem(const Instruction
*I
, bool isSigned
) {
1744 Type
*Ty
= I
->getType();
1745 if (!isTypeLegal(Ty
, VT
))
1748 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
1750 LC
= isSigned
? RTLIB::SREM_I8
: RTLIB::UREM_I8
;
1751 else if (VT
== MVT::i16
)
1752 LC
= isSigned
? RTLIB::SREM_I16
: RTLIB::UREM_I16
;
1753 else if (VT
== MVT::i32
)
1754 LC
= isSigned
? RTLIB::SREM_I32
: RTLIB::UREM_I32
;
1755 else if (VT
== MVT::i64
)
1756 LC
= isSigned
? RTLIB::SREM_I64
: RTLIB::UREM_I64
;
1757 else if (VT
== MVT::i128
)
1758 LC
= isSigned
? RTLIB::SREM_I128
: RTLIB::UREM_I128
;
1759 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unsupported SREM!");
1761 return ARMEmitLibcall(I
, LC
);
1764 bool ARMFastISel::SelectBinaryIntOp(const Instruction
*I
, unsigned ISDOpcode
) {
1765 EVT DestVT
= TLI
.getValueType(I
->getType(), true);
1767 // We can get here in the case when we have a binary operation on a non-legal
1768 // type and the target independent selector doesn't know how to handle it.
1769 if (DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
&& DestVT
!= MVT::i1
)
1773 switch (ISDOpcode
) {
1774 default: return false;
1776 Opc
= isThumb2
? ARM::t2ADDrr
: ARM::ADDrr
;
1779 Opc
= isThumb2
? ARM::t2ORRrr
: ARM::ORRrr
;
1782 Opc
= isThumb2
? ARM::t2SUBrr
: ARM::SUBrr
;
1786 unsigned SrcReg1
= getRegForValue(I
->getOperand(0));
1787 if (SrcReg1
== 0) return false;
1789 // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1790 // in the instruction, rather then materializing the value in a register.
1791 unsigned SrcReg2
= getRegForValue(I
->getOperand(1));
1792 if (SrcReg2
== 0) return false;
1794 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(MVT::i32
));
1795 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1796 TII
.get(Opc
), ResultReg
)
1797 .addReg(SrcReg1
).addReg(SrcReg2
));
1798 UpdateValueMap(I
, ResultReg
);
1802 bool ARMFastISel::SelectBinaryFPOp(const Instruction
*I
, unsigned ISDOpcode
) {
1803 EVT VT
= TLI
.getValueType(I
->getType(), true);
1805 // We can get here in the case when we want to use NEON for our fp
1806 // operations, but can't figure out how to. Just use the vfp instructions
1808 // FIXME: It'd be nice to use NEON instructions.
1809 Type
*Ty
= I
->getType();
1810 bool isFloat
= (Ty
->isDoubleTy() || Ty
->isFloatTy());
1811 if (isFloat
&& !Subtarget
->hasVFP2())
1815 bool is64bit
= VT
== MVT::f64
|| VT
== MVT::i64
;
1816 switch (ISDOpcode
) {
1817 default: return false;
1819 Opc
= is64bit
? ARM::VADDD
: ARM::VADDS
;
1822 Opc
= is64bit
? ARM::VSUBD
: ARM::VSUBS
;
1825 Opc
= is64bit
? ARM::VMULD
: ARM::VMULS
;
1828 unsigned Op1
= getRegForValue(I
->getOperand(0));
1829 if (Op1
== 0) return false;
1831 unsigned Op2
= getRegForValue(I
->getOperand(1));
1832 if (Op2
== 0) return false;
1834 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(VT
));
1835 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1836 TII
.get(Opc
), ResultReg
)
1837 .addReg(Op1
).addReg(Op2
));
1838 UpdateValueMap(I
, ResultReg
);
1842 // Call Handling Code
1844 // This is largely taken directly from CCAssignFnForNode
1845 // TODO: We may not support all of this.
1846 CCAssignFn
*ARMFastISel::CCAssignFnForCall(CallingConv::ID CC
,
1851 llvm_unreachable("Unsupported calling convention");
1852 case CallingConv::Fast
:
1853 if (Subtarget
->hasVFP2() && !isVarArg
) {
1854 if (!Subtarget
->isAAPCS_ABI())
1855 return (Return
? RetFastCC_ARM_APCS
: FastCC_ARM_APCS
);
1856 // For AAPCS ABI targets, just use VFP variant of the calling convention.
1857 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1860 case CallingConv::C
:
1861 // Use target triple & subtarget features to do actual dispatch.
1862 if (Subtarget
->isAAPCS_ABI()) {
1863 if (Subtarget
->hasVFP2() &&
1864 TM
.Options
.FloatABIType
== FloatABI::Hard
&& !isVarArg
)
1865 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1867 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1869 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1870 case CallingConv::ARM_AAPCS_VFP
:
1872 return (Return
? RetCC_ARM_AAPCS_VFP
: CC_ARM_AAPCS_VFP
);
1873 // Fall through to soft float variant, variadic functions don't
1874 // use hard floating point ABI.
1875 case CallingConv::ARM_AAPCS
:
1876 return (Return
? RetCC_ARM_AAPCS
: CC_ARM_AAPCS
);
1877 case CallingConv::ARM_APCS
:
1878 return (Return
? RetCC_ARM_APCS
: CC_ARM_APCS
);
1879 case CallingConv::GHC
:
1881 llvm_unreachable("Can't return in GHC call convention");
1883 return CC_ARM_APCS_GHC
;
1887 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl
<Value
*> &Args
,
1888 SmallVectorImpl
<unsigned> &ArgRegs
,
1889 SmallVectorImpl
<MVT
> &ArgVTs
,
1890 SmallVectorImpl
<ISD::ArgFlagsTy
> &ArgFlags
,
1891 SmallVectorImpl
<unsigned> &RegArgs
,
1895 SmallVector
<CCValAssign
, 16> ArgLocs
;
1896 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, TM
, ArgLocs
, *Context
);
1897 CCInfo
.AnalyzeCallOperands(ArgVTs
, ArgFlags
,
1898 CCAssignFnForCall(CC
, false, isVarArg
));
1900 // Check that we can handle all of the arguments. If we can't, then bail out
1901 // now before we add code to the MBB.
1902 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1903 CCValAssign
&VA
= ArgLocs
[i
];
1904 MVT ArgVT
= ArgVTs
[VA
.getValNo()];
1906 // We don't handle NEON/vector parameters yet.
1907 if (ArgVT
.isVector() || ArgVT
.getSizeInBits() > 64)
1910 // Now copy/store arg to correct locations.
1911 if (VA
.isRegLoc() && !VA
.needsCustom()) {
1913 } else if (VA
.needsCustom()) {
1914 // TODO: We need custom lowering for vector (v2f64) args.
1915 if (VA
.getLocVT() != MVT::f64
||
1916 // TODO: Only handle register args for now.
1917 !VA
.isRegLoc() || !ArgLocs
[++i
].isRegLoc())
1920 switch (static_cast<EVT
>(ArgVT
).getSimpleVT().SimpleTy
) {
1929 if (!Subtarget
->hasVFP2())
1933 if (!Subtarget
->hasVFP2())
1940 // At the point, we are able to handle the call's arguments in fast isel.
1942 // Get a count of how many bytes are to be pushed on the stack.
1943 NumBytes
= CCInfo
.getNextStackOffset();
1945 // Issue CALLSEQ_START
1946 unsigned AdjStackDown
= TII
.getCallFrameSetupOpcode();
1947 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
1948 TII
.get(AdjStackDown
))
1951 // Process the args.
1952 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
1953 CCValAssign
&VA
= ArgLocs
[i
];
1954 unsigned Arg
= ArgRegs
[VA
.getValNo()];
1955 MVT ArgVT
= ArgVTs
[VA
.getValNo()];
1957 assert((!ArgVT
.isVector() && ArgVT
.getSizeInBits() <= 64) &&
1958 "We don't handle NEON/vector parameters yet.");
1960 // Handle arg promotion, etc.
1961 switch (VA
.getLocInfo()) {
1962 case CCValAssign::Full
: break;
1963 case CCValAssign::SExt
: {
1964 MVT DestVT
= VA
.getLocVT();
1965 Arg
= ARMEmitIntExt(ArgVT
, Arg
, DestVT
, /*isZExt*/false);
1966 assert (Arg
!= 0 && "Failed to emit a sext");
1970 case CCValAssign::AExt
:
1971 // Intentional fall-through. Handle AExt and ZExt.
1972 case CCValAssign::ZExt
: {
1973 MVT DestVT
= VA
.getLocVT();
1974 Arg
= ARMEmitIntExt(ArgVT
, Arg
, DestVT
, /*isZExt*/true);
1975 assert (Arg
!= 0 && "Failed to emit a sext");
1979 case CCValAssign::BCvt
: {
1980 unsigned BC
= FastEmit_r(ArgVT
, VA
.getLocVT(), ISD::BITCAST
, Arg
,
1981 /*TODO: Kill=*/false);
1982 assert(BC
!= 0 && "Failed to emit a bitcast!");
1984 ArgVT
= VA
.getLocVT();
1987 default: llvm_unreachable("Unknown arg promotion!");
1990 // Now copy/store arg to correct locations.
1991 if (VA
.isRegLoc() && !VA
.needsCustom()) {
1992 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TargetOpcode::COPY
),
1995 RegArgs
.push_back(VA
.getLocReg());
1996 } else if (VA
.needsCustom()) {
1997 // TODO: We need custom lowering for vector (v2f64) args.
1998 assert(VA
.getLocVT() == MVT::f64
&&
1999 "Custom lowering for v2f64 args not available");
2001 CCValAssign
&NextVA
= ArgLocs
[++i
];
2003 assert(VA
.isRegLoc() && NextVA
.isRegLoc() &&
2004 "We only handle register args!");
2006 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
2007 TII
.get(ARM::VMOVRRD
), VA
.getLocReg())
2008 .addReg(NextVA
.getLocReg(), RegState::Define
)
2010 RegArgs
.push_back(VA
.getLocReg());
2011 RegArgs
.push_back(NextVA
.getLocReg());
2013 assert(VA
.isMemLoc());
2014 // Need to store on the stack.
2016 Addr
.BaseType
= Address::RegBase
;
2017 Addr
.Base
.Reg
= ARM::SP
;
2018 Addr
.Offset
= VA
.getLocMemOffset();
2020 bool EmitRet
= ARMEmitStore(ArgVT
, Arg
, Addr
); (void)EmitRet
;
2021 assert(EmitRet
&& "Could not emit a store for argument!");
2028 bool ARMFastISel::FinishCall(MVT RetVT
, SmallVectorImpl
<unsigned> &UsedRegs
,
2029 const Instruction
*I
, CallingConv::ID CC
,
2030 unsigned &NumBytes
, bool isVarArg
) {
2031 // Issue CALLSEQ_END
2032 unsigned AdjStackUp
= TII
.getCallFrameDestroyOpcode();
2033 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
2034 TII
.get(AdjStackUp
))
2035 .addImm(NumBytes
).addImm(0));
2037 // Now the return value.
2038 if (RetVT
!= MVT::isVoid
) {
2039 SmallVector
<CCValAssign
, 16> RVLocs
;
2040 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, TM
, RVLocs
, *Context
);
2041 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, isVarArg
));
2043 // Copy all of the result registers out of their specified physreg.
2044 if (RVLocs
.size() == 2 && RetVT
== MVT::f64
) {
2045 // For this move we copy into two registers and then move into the
2046 // double fp reg we want.
2047 EVT DestVT
= RVLocs
[0].getValVT();
2048 const TargetRegisterClass
* DstRC
= TLI
.getRegClassFor(DestVT
);
2049 unsigned ResultReg
= createResultReg(DstRC
);
2050 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
2051 TII
.get(ARM::VMOVDRR
), ResultReg
)
2052 .addReg(RVLocs
[0].getLocReg())
2053 .addReg(RVLocs
[1].getLocReg()));
2055 UsedRegs
.push_back(RVLocs
[0].getLocReg());
2056 UsedRegs
.push_back(RVLocs
[1].getLocReg());
2058 // Finally update the result.
2059 UpdateValueMap(I
, ResultReg
);
2061 assert(RVLocs
.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2062 EVT CopyVT
= RVLocs
[0].getValVT();
2064 // Special handling for extended integers.
2065 if (RetVT
== MVT::i1
|| RetVT
== MVT::i8
|| RetVT
== MVT::i16
)
2068 const TargetRegisterClass
* DstRC
= TLI
.getRegClassFor(CopyVT
);
2070 unsigned ResultReg
= createResultReg(DstRC
);
2071 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TargetOpcode::COPY
),
2072 ResultReg
).addReg(RVLocs
[0].getLocReg());
2073 UsedRegs
.push_back(RVLocs
[0].getLocReg());
2075 // Finally update the result.
2076 UpdateValueMap(I
, ResultReg
);
2083 bool ARMFastISel::SelectRet(const Instruction
*I
) {
2084 const ReturnInst
*Ret
= cast
<ReturnInst
>(I
);
2085 const Function
&F
= *I
->getParent()->getParent();
2087 if (!FuncInfo
.CanLowerReturn
)
2090 CallingConv::ID CC
= F
.getCallingConv();
2091 if (Ret
->getNumOperands() > 0) {
2092 SmallVector
<ISD::OutputArg
, 4> Outs
;
2093 GetReturnInfo(F
.getReturnType(), F
.getAttributes().getRetAttributes(),
2096 // Analyze operands of the call, assigning locations to each operand.
2097 SmallVector
<CCValAssign
, 16> ValLocs
;
2098 CCState
CCInfo(CC
, F
.isVarArg(), *FuncInfo
.MF
, TM
, ValLocs
,I
->getContext());
2099 CCInfo
.AnalyzeReturn(Outs
, CCAssignFnForCall(CC
, true /* is Ret */,
2102 const Value
*RV
= Ret
->getOperand(0);
2103 unsigned Reg
= getRegForValue(RV
);
2107 // Only handle a single return value for now.
2108 if (ValLocs
.size() != 1)
2111 CCValAssign
&VA
= ValLocs
[0];
2113 // Don't bother handling odd stuff for now.
2114 if (VA
.getLocInfo() != CCValAssign::Full
)
2116 // Only handle register returns for now.
2120 unsigned SrcReg
= Reg
+ VA
.getValNo();
2121 EVT RVVT
= TLI
.getValueType(RV
->getType());
2122 EVT DestVT
= VA
.getValVT();
2123 // Special handling for extended integers.
2124 if (RVVT
!= DestVT
) {
2125 if (RVVT
!= MVT::i1
&& RVVT
!= MVT::i8
&& RVVT
!= MVT::i16
)
2128 assert(DestVT
== MVT::i32
&& "ARM should always ext to i32");
2130 // Perform extension if flagged as either zext or sext. Otherwise, do
2132 if (Outs
[0].Flags
.isZExt() || Outs
[0].Flags
.isSExt()) {
2133 SrcReg
= ARMEmitIntExt(RVVT
, SrcReg
, DestVT
, Outs
[0].Flags
.isZExt());
2134 if (SrcReg
== 0) return false;
2139 unsigned DstReg
= VA
.getLocReg();
2140 const TargetRegisterClass
* SrcRC
= MRI
.getRegClass(SrcReg
);
2141 // Avoid a cross-class copy. This is very unlikely.
2142 if (!SrcRC
->contains(DstReg
))
2144 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(TargetOpcode::COPY
),
2145 DstReg
).addReg(SrcReg
);
2147 // Mark the register as live out of the function.
2148 MRI
.addLiveOut(VA
.getLocReg());
2151 unsigned RetOpc
= isThumb2
? ARM::tBX_RET
: ARM::BX_RET
;
2152 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
2157 unsigned ARMFastISel::ARMSelectCallOp(bool UseReg
) {
2159 return isThumb2
? ARM::tBLXr
: ARM::BLX
;
2161 return isThumb2
? ARM::tBL
: ARM::BL
;
2164 unsigned ARMFastISel::getLibcallReg(const Twine
&Name
) {
2165 GlobalValue
*GV
= new GlobalVariable(Type::getInt32Ty(*Context
), false,
2166 GlobalValue::ExternalLinkage
, 0, Name
);
2167 return ARMMaterializeGV(GV
, TLI
.getValueType(GV
->getType()));
2170 // A quick function that will emit a call for a named libcall in F with the
2171 // vector of passed arguments for the Instruction in I. We can assume that we
2172 // can emit a call for any libcall we can produce. This is an abridged version
2173 // of the full call infrastructure since we won't need to worry about things
2174 // like computed function pointers or strange arguments at call sites.
2175 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
2177 bool ARMFastISel::ARMEmitLibcall(const Instruction
*I
, RTLIB::Libcall Call
) {
2178 CallingConv::ID CC
= TLI
.getLibcallCallingConv(Call
);
2180 // Handle *simple* calls for now.
2181 Type
*RetTy
= I
->getType();
2183 if (RetTy
->isVoidTy())
2184 RetVT
= MVT::isVoid
;
2185 else if (!isTypeLegal(RetTy
, RetVT
))
2188 // Can't handle non-double multi-reg retvals.
2189 if (RetVT
!= MVT::isVoid
&& RetVT
!= MVT::i32
) {
2190 SmallVector
<CCValAssign
, 16> RVLocs
;
2191 CCState
CCInfo(CC
, false, *FuncInfo
.MF
, TM
, RVLocs
, *Context
);
2192 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, false));
2193 if (RVLocs
.size() >= 2 && RetVT
!= MVT::f64
)
2197 // Set up the argument vectors.
2198 SmallVector
<Value
*, 8> Args
;
2199 SmallVector
<unsigned, 8> ArgRegs
;
2200 SmallVector
<MVT
, 8> ArgVTs
;
2201 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
2202 Args
.reserve(I
->getNumOperands());
2203 ArgRegs
.reserve(I
->getNumOperands());
2204 ArgVTs
.reserve(I
->getNumOperands());
2205 ArgFlags
.reserve(I
->getNumOperands());
2206 for (unsigned i
= 0; i
< I
->getNumOperands(); ++i
) {
2207 Value
*Op
= I
->getOperand(i
);
2208 unsigned Arg
= getRegForValue(Op
);
2209 if (Arg
== 0) return false;
2211 Type
*ArgTy
= Op
->getType();
2213 if (!isTypeLegal(ArgTy
, ArgVT
)) return false;
2215 ISD::ArgFlagsTy Flags
;
2216 unsigned OriginalAlignment
= TD
.getABITypeAlignment(ArgTy
);
2217 Flags
.setOrigAlign(OriginalAlignment
);
2220 ArgRegs
.push_back(Arg
);
2221 ArgVTs
.push_back(ArgVT
);
2222 ArgFlags
.push_back(Flags
);
2225 // Handle the arguments now that we've gotten them.
2226 SmallVector
<unsigned, 4> RegArgs
;
2228 if (!ProcessCallArgs(Args
, ArgRegs
, ArgVTs
, ArgFlags
,
2229 RegArgs
, CC
, NumBytes
, false))
2232 unsigned CalleeReg
= 0;
2233 if (EnableARMLongCalls
) {
2234 CalleeReg
= getLibcallReg(TLI
.getLibcallName(Call
));
2235 if (CalleeReg
== 0) return false;
2239 unsigned CallOpc
= ARMSelectCallOp(EnableARMLongCalls
);
2240 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2241 DL
, TII
.get(CallOpc
));
2242 // BL / BLX don't take a predicate, but tBL / tBLX do.
2244 AddDefaultPred(MIB
);
2245 if (EnableARMLongCalls
)
2246 MIB
.addReg(CalleeReg
);
2248 MIB
.addExternalSymbol(TLI
.getLibcallName(Call
));
2250 // Add implicit physical register uses to the call.
2251 for (unsigned i
= 0, e
= RegArgs
.size(); i
!= e
; ++i
)
2252 MIB
.addReg(RegArgs
[i
], RegState::Implicit
);
2254 // Add a register mask with the call-preserved registers.
2255 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2256 MIB
.addRegMask(TRI
.getCallPreservedMask(CC
));
2258 // Finish off the call including any return values.
2259 SmallVector
<unsigned, 4> UsedRegs
;
2260 if (!FinishCall(RetVT
, UsedRegs
, I
, CC
, NumBytes
, false)) return false;
2262 // Set all unused physreg defs as dead.
2263 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
2268 bool ARMFastISel::SelectCall(const Instruction
*I
,
2269 const char *IntrMemName
= 0) {
2270 const CallInst
*CI
= cast
<CallInst
>(I
);
2271 const Value
*Callee
= CI
->getCalledValue();
2273 // Can't handle inline asm.
2274 if (isa
<InlineAsm
>(Callee
)) return false;
2276 // Check the calling convention.
2277 ImmutableCallSite
CS(CI
);
2278 CallingConv::ID CC
= CS
.getCallingConv();
2280 // TODO: Avoid some calling conventions?
2282 PointerType
*PT
= cast
<PointerType
>(CS
.getCalledValue()->getType());
2283 FunctionType
*FTy
= cast
<FunctionType
>(PT
->getElementType());
2284 bool isVarArg
= FTy
->isVarArg();
2286 // Handle *simple* calls for now.
2287 Type
*RetTy
= I
->getType();
2289 if (RetTy
->isVoidTy())
2290 RetVT
= MVT::isVoid
;
2291 else if (!isTypeLegal(RetTy
, RetVT
) && RetVT
!= MVT::i16
&&
2292 RetVT
!= MVT::i8
&& RetVT
!= MVT::i1
)
2295 // Can't handle non-double multi-reg retvals.
2296 if (RetVT
!= MVT::isVoid
&& RetVT
!= MVT::i1
&& RetVT
!= MVT::i8
&&
2297 RetVT
!= MVT::i16
&& RetVT
!= MVT::i32
) {
2298 SmallVector
<CCValAssign
, 16> RVLocs
;
2299 CCState
CCInfo(CC
, isVarArg
, *FuncInfo
.MF
, TM
, RVLocs
, *Context
);
2300 CCInfo
.AnalyzeCallResult(RetVT
, CCAssignFnForCall(CC
, true, isVarArg
));
2301 if (RVLocs
.size() >= 2 && RetVT
!= MVT::f64
)
2305 // Set up the argument vectors.
2306 SmallVector
<Value
*, 8> Args
;
2307 SmallVector
<unsigned, 8> ArgRegs
;
2308 SmallVector
<MVT
, 8> ArgVTs
;
2309 SmallVector
<ISD::ArgFlagsTy
, 8> ArgFlags
;
2310 unsigned arg_size
= CS
.arg_size();
2311 Args
.reserve(arg_size
);
2312 ArgRegs
.reserve(arg_size
);
2313 ArgVTs
.reserve(arg_size
);
2314 ArgFlags
.reserve(arg_size
);
2315 for (ImmutableCallSite::arg_iterator i
= CS
.arg_begin(), e
= CS
.arg_end();
2317 // If we're lowering a memory intrinsic instead of a regular call, skip the
2318 // last two arguments, which shouldn't be passed to the underlying function.
2319 if (IntrMemName
&& e
-i
<= 2)
2322 ISD::ArgFlagsTy Flags
;
2323 unsigned AttrInd
= i
- CS
.arg_begin() + 1;
2324 if (CS
.paramHasAttr(AttrInd
, Attribute::SExt
))
2326 if (CS
.paramHasAttr(AttrInd
, Attribute::ZExt
))
2329 // FIXME: Only handle *easy* calls for now.
2330 if (CS
.paramHasAttr(AttrInd
, Attribute::InReg
) ||
2331 CS
.paramHasAttr(AttrInd
, Attribute::StructRet
) ||
2332 CS
.paramHasAttr(AttrInd
, Attribute::Nest
) ||
2333 CS
.paramHasAttr(AttrInd
, Attribute::ByVal
))
2336 Type
*ArgTy
= (*i
)->getType();
2338 if (!isTypeLegal(ArgTy
, ArgVT
) && ArgVT
!= MVT::i16
&& ArgVT
!= MVT::i8
&&
2342 unsigned Arg
= getRegForValue(*i
);
2346 unsigned OriginalAlignment
= TD
.getABITypeAlignment(ArgTy
);
2347 Flags
.setOrigAlign(OriginalAlignment
);
2350 ArgRegs
.push_back(Arg
);
2351 ArgVTs
.push_back(ArgVT
);
2352 ArgFlags
.push_back(Flags
);
2355 // Handle the arguments now that we've gotten them.
2356 SmallVector
<unsigned, 4> RegArgs
;
2358 if (!ProcessCallArgs(Args
, ArgRegs
, ArgVTs
, ArgFlags
,
2359 RegArgs
, CC
, NumBytes
, isVarArg
))
2362 bool UseReg
= false;
2363 const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(Callee
);
2364 if (!GV
|| EnableARMLongCalls
) UseReg
= true;
2366 unsigned CalleeReg
= 0;
2369 CalleeReg
= getLibcallReg(IntrMemName
);
2371 CalleeReg
= getRegForValue(Callee
);
2373 if (CalleeReg
== 0) return false;
2377 unsigned CallOpc
= ARMSelectCallOp(UseReg
);
2378 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2379 DL
, TII
.get(CallOpc
));
2381 // ARM calls don't take a predicate, but tBL / tBLX do.
2383 AddDefaultPred(MIB
);
2385 MIB
.addReg(CalleeReg
);
2386 else if (!IntrMemName
)
2387 MIB
.addGlobalAddress(GV
, 0, 0);
2389 MIB
.addExternalSymbol(IntrMemName
, 0);
2391 // Add implicit physical register uses to the call.
2392 for (unsigned i
= 0, e
= RegArgs
.size(); i
!= e
; ++i
)
2393 MIB
.addReg(RegArgs
[i
], RegState::Implicit
);
2395 // Add a register mask with the call-preserved registers.
2396 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2397 MIB
.addRegMask(TRI
.getCallPreservedMask(CC
));
2399 // Finish off the call including any return values.
2400 SmallVector
<unsigned, 4> UsedRegs
;
2401 if (!FinishCall(RetVT
, UsedRegs
, I
, CC
, NumBytes
, isVarArg
))
2404 // Set all unused physreg defs as dead.
2405 static_cast<MachineInstr
*>(MIB
)->setPhysRegsDeadExcept(UsedRegs
, TRI
);
2410 bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len
) {
2414 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest
, Address Src
,
2416 // Make sure we don't bloat code by inlining very large memcpy's.
2417 if (!ARMIsMemCpySmall(Len
))
2420 // We don't care about alignment here since we just emit integer accesses.
2434 RV
= ARMEmitLoad(VT
, ResultReg
, Src
);
2435 assert (RV
== true && "Should be able to handle this load.");
2436 RV
= ARMEmitStore(VT
, ResultReg
, Dest
);
2437 assert (RV
== true && "Should be able to handle this store.");
2440 unsigned Size
= VT
.getSizeInBits()/8;
2442 Dest
.Offset
+= Size
;
2449 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst
&I
) {
2450 // FIXME: Handle more intrinsics.
2451 switch (I
.getIntrinsicID()) {
2452 default: return false;
2453 case Intrinsic::frameaddress
: {
2454 MachineFrameInfo
*MFI
= FuncInfo
.MF
->getFrameInfo();
2455 MFI
->setFrameAddressIsTaken(true);
2458 const TargetRegisterClass
*RC
;
2460 LdrOpc
= ARM::t2LDRi12
;
2461 RC
= (const TargetRegisterClass
*)&ARM::tGPRRegClass
;
2463 LdrOpc
= ARM::LDRi12
;
2464 RC
= (const TargetRegisterClass
*)&ARM::GPRRegClass
;
2467 const ARMBaseRegisterInfo
*RegInfo
=
2468 static_cast<const ARMBaseRegisterInfo
*>(TM
.getRegisterInfo());
2469 unsigned FramePtr
= RegInfo
->getFrameRegister(*(FuncInfo
.MF
));
2470 unsigned SrcReg
= FramePtr
;
2472 // Recursively load frame address
2478 unsigned Depth
= cast
<ConstantInt
>(I
.getOperand(0))->getZExtValue();
2480 DestReg
= createResultReg(RC
);
2481 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
2482 TII
.get(LdrOpc
), DestReg
)
2483 .addReg(SrcReg
).addImm(0));
2486 UpdateValueMap(&I
, SrcReg
);
2489 case Intrinsic::memcpy
:
2490 case Intrinsic::memmove
: {
2491 const MemTransferInst
&MTI
= cast
<MemTransferInst
>(I
);
2492 // Don't handle volatile.
2493 if (MTI
.isVolatile())
2496 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
2497 // we would emit dead code because we don't currently handle memmoves.
2498 bool isMemCpy
= (I
.getIntrinsicID() == Intrinsic::memcpy
);
2499 if (isa
<ConstantInt
>(MTI
.getLength()) && isMemCpy
) {
2500 // Small memcpy's are common enough that we want to do them without a call
2502 uint64_t Len
= cast
<ConstantInt
>(MTI
.getLength())->getZExtValue();
2503 if (ARMIsMemCpySmall(Len
)) {
2505 if (!ARMComputeAddress(MTI
.getRawDest(), Dest
) ||
2506 !ARMComputeAddress(MTI
.getRawSource(), Src
))
2508 if (ARMTryEmitSmallMemCpy(Dest
, Src
, Len
))
2513 if (!MTI
.getLength()->getType()->isIntegerTy(32))
2516 if (MTI
.getSourceAddressSpace() > 255 || MTI
.getDestAddressSpace() > 255)
2519 const char *IntrMemName
= isa
<MemCpyInst
>(I
) ? "memcpy" : "memmove";
2520 return SelectCall(&I
, IntrMemName
);
2522 case Intrinsic::memset
: {
2523 const MemSetInst
&MSI
= cast
<MemSetInst
>(I
);
2524 // Don't handle volatile.
2525 if (MSI
.isVolatile())
2528 if (!MSI
.getLength()->getType()->isIntegerTy(32))
2531 if (MSI
.getDestAddressSpace() > 255)
2534 return SelectCall(&I
, "memset");
2536 case Intrinsic::trap
: {
2537 BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(ARM::TRAP
));
2543 bool ARMFastISel::SelectTrunc(const Instruction
*I
) {
2544 // The high bits for a type smaller than the register size are assumed to be
2546 Value
*Op
= I
->getOperand(0);
2549 SrcVT
= TLI
.getValueType(Op
->getType(), true);
2550 DestVT
= TLI
.getValueType(I
->getType(), true);
2552 if (SrcVT
!= MVT::i32
&& SrcVT
!= MVT::i16
&& SrcVT
!= MVT::i8
)
2554 if (DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
&& DestVT
!= MVT::i1
)
2557 unsigned SrcReg
= getRegForValue(Op
);
2558 if (!SrcReg
) return false;
2560 // Because the high bits are undefined, a truncate doesn't generate
2562 UpdateValueMap(I
, SrcReg
);
2566 unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT
, unsigned SrcReg
, EVT DestVT
,
2568 if (DestVT
!= MVT::i32
&& DestVT
!= MVT::i16
&& DestVT
!= MVT::i8
)
2572 bool isBoolZext
= false;
2573 if (!SrcVT
.isSimple()) return 0;
2574 switch (SrcVT
.getSimpleVT().SimpleTy
) {
2577 if (!Subtarget
->hasV6Ops()) return 0;
2579 Opc
= isThumb2
? ARM::t2UXTH
: ARM::UXTH
;
2581 Opc
= isThumb2
? ARM::t2SXTH
: ARM::SXTH
;
2584 if (!Subtarget
->hasV6Ops()) return 0;
2586 Opc
= isThumb2
? ARM::t2UXTB
: ARM::UXTB
;
2588 Opc
= isThumb2
? ARM::t2SXTB
: ARM::SXTB
;
2592 Opc
= isThumb2
? ARM::t2ANDri
: ARM::ANDri
;
2599 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(MVT::i32
));
2600 MachineInstrBuilder MIB
;
2601 MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
, TII
.get(Opc
), ResultReg
)
2607 AddOptionalDefs(MIB
);
2611 bool ARMFastISel::SelectIntExt(const Instruction
*I
) {
2612 // On ARM, in general, integer casts don't involve legal types; this code
2613 // handles promotable integers.
2614 Type
*DestTy
= I
->getType();
2615 Value
*Src
= I
->getOperand(0);
2616 Type
*SrcTy
= Src
->getType();
2619 SrcVT
= TLI
.getValueType(SrcTy
, true);
2620 DestVT
= TLI
.getValueType(DestTy
, true);
2622 bool isZExt
= isa
<ZExtInst
>(I
);
2623 unsigned SrcReg
= getRegForValue(Src
);
2624 if (!SrcReg
) return false;
2626 unsigned ResultReg
= ARMEmitIntExt(SrcVT
, SrcReg
, DestVT
, isZExt
);
2627 if (ResultReg
== 0) return false;
2628 UpdateValueMap(I
, ResultReg
);
2632 bool ARMFastISel::SelectShift(const Instruction
*I
,
2633 ARM_AM::ShiftOpc ShiftTy
) {
2634 // We handle thumb2 mode by target independent selector
2635 // or SelectionDAG ISel.
2639 // Only handle i32 now.
2640 EVT DestVT
= TLI
.getValueType(I
->getType(), true);
2641 if (DestVT
!= MVT::i32
)
2644 unsigned Opc
= ARM::MOVsr
;
2646 Value
*Src2Value
= I
->getOperand(1);
2647 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Src2Value
)) {
2648 ShiftImm
= CI
->getZExtValue();
2650 // Fall back to selection DAG isel if the shift amount
2651 // is zero or greater than the width of the value type.
2652 if (ShiftImm
== 0 || ShiftImm
>=32)
2658 Value
*Src1Value
= I
->getOperand(0);
2659 unsigned Reg1
= getRegForValue(Src1Value
);
2660 if (Reg1
== 0) return false;
2663 if (Opc
== ARM::MOVsr
) {
2664 Reg2
= getRegForValue(Src2Value
);
2665 if (Reg2
== 0) return false;
2668 unsigned ResultReg
= createResultReg(TLI
.getRegClassFor(MVT::i32
));
2669 if(ResultReg
== 0) return false;
2671 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
2672 TII
.get(Opc
), ResultReg
)
2675 if (Opc
== ARM::MOVsi
)
2676 MIB
.addImm(ARM_AM::getSORegOpc(ShiftTy
, ShiftImm
));
2677 else if (Opc
== ARM::MOVsr
) {
2679 MIB
.addImm(ARM_AM::getSORegOpc(ShiftTy
, 0));
2682 AddOptionalDefs(MIB
);
2683 UpdateValueMap(I
, ResultReg
);
2687 // TODO: SoftFP support.
2688 bool ARMFastISel::TargetSelectInstruction(const Instruction
*I
) {
2690 switch (I
->getOpcode()) {
2691 case Instruction::Load
:
2692 return SelectLoad(I
);
2693 case Instruction::Store
:
2694 return SelectStore(I
);
2695 case Instruction::Br
:
2696 return SelectBranch(I
);
2697 case Instruction::IndirectBr
:
2698 return SelectIndirectBr(I
);
2699 case Instruction::ICmp
:
2700 case Instruction::FCmp
:
2701 return SelectCmp(I
);
2702 case Instruction::FPExt
:
2703 return SelectFPExt(I
);
2704 case Instruction::FPTrunc
:
2705 return SelectFPTrunc(I
);
2706 case Instruction::SIToFP
:
2707 return SelectIToFP(I
, /*isSigned*/ true);
2708 case Instruction::UIToFP
:
2709 return SelectIToFP(I
, /*isSigned*/ false);
2710 case Instruction::FPToSI
:
2711 return SelectFPToI(I
, /*isSigned*/ true);
2712 case Instruction::FPToUI
:
2713 return SelectFPToI(I
, /*isSigned*/ false);
2714 case Instruction::Add
:
2715 return SelectBinaryIntOp(I
, ISD::ADD
);
2716 case Instruction::Or
:
2717 return SelectBinaryIntOp(I
, ISD::OR
);
2718 case Instruction::Sub
:
2719 return SelectBinaryIntOp(I
, ISD::SUB
);
2720 case Instruction::FAdd
:
2721 return SelectBinaryFPOp(I
, ISD::FADD
);
2722 case Instruction::FSub
:
2723 return SelectBinaryFPOp(I
, ISD::FSUB
);
2724 case Instruction::FMul
:
2725 return SelectBinaryFPOp(I
, ISD::FMUL
);
2726 case Instruction::SDiv
:
2727 return SelectDiv(I
, /*isSigned*/ true);
2728 case Instruction::UDiv
:
2729 return SelectDiv(I
, /*isSigned*/ false);
2730 case Instruction::SRem
:
2731 return SelectRem(I
, /*isSigned*/ true);
2732 case Instruction::URem
:
2733 return SelectRem(I
, /*isSigned*/ false);
2734 case Instruction::Call
:
2735 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
))
2736 return SelectIntrinsicCall(*II
);
2737 return SelectCall(I
);
2738 case Instruction::Select
:
2739 return SelectSelect(I
);
2740 case Instruction::Ret
:
2741 return SelectRet(I
);
2742 case Instruction::Trunc
:
2743 return SelectTrunc(I
);
2744 case Instruction::ZExt
:
2745 case Instruction::SExt
:
2746 return SelectIntExt(I
);
2747 case Instruction::Shl
:
2748 return SelectShift(I
, ARM_AM::lsl
);
2749 case Instruction::LShr
:
2750 return SelectShift(I
, ARM_AM::lsr
);
2751 case Instruction::AShr
:
2752 return SelectShift(I
, ARM_AM::asr
);
2758 /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
2759 /// vreg is being provided by the specified load instruction. If possible,
2760 /// try to fold the load as an operand to the instruction, returning true if
2762 bool ARMFastISel::TryToFoldLoad(MachineInstr
*MI
, unsigned OpNo
,
2763 const LoadInst
*LI
) {
2764 // Verify we have a legal type before going any further.
2766 if (!isLoadTypeLegal(LI
->getType(), VT
))
2769 // Combine load followed by zero- or sign-extend.
2770 // ldrb r1, [r0] ldrb r1, [r0]
2772 // mov r3, r2 mov r3, r1
2774 switch(MI
->getOpcode()) {
2775 default: return false;
2793 // See if we can handle this address.
2795 if (!ARMComputeAddress(LI
->getOperand(0), Addr
)) return false;
2797 unsigned ResultReg
= MI
->getOperand(0).getReg();
2798 if (!ARMEmitLoad(VT
, ResultReg
, Addr
, LI
->getAlignment(), isZExt
, false))
2800 MI
->eraseFromParent();
2804 unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue
*GV
,
2805 unsigned Align
, EVT VT
) {
2806 bool UseGOTOFF
= GV
->hasLocalLinkage() || GV
->hasHiddenVisibility();
2807 ARMConstantPoolConstant
*CPV
=
2808 ARMConstantPoolConstant::Create(GV
, UseGOTOFF
? ARMCP::GOTOFF
: ARMCP::GOT
);
2809 unsigned Idx
= MCP
.getConstantPoolIndex(CPV
, Align
);
2812 unsigned DestReg1
= createResultReg(TLI
.getRegClassFor(VT
));
2815 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
, DL
,
2816 TII
.get(ARM::t2LDRpci
), DestReg1
)
2817 .addConstantPoolIndex(Idx
));
2818 Opc
= UseGOTOFF
? ARM::t2ADDrr
: ARM::t2LDRs
;
2820 // The extra immediate is for addrmode2.
2821 AddOptionalDefs(BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2822 DL
, TII
.get(ARM::LDRcp
), DestReg1
)
2823 .addConstantPoolIndex(Idx
).addImm(0));
2824 Opc
= UseGOTOFF
? ARM::ADDrr
: ARM::LDRrs
;
2827 unsigned GlobalBaseReg
= AFI
->getGlobalBaseReg();
2828 if (GlobalBaseReg
== 0) {
2829 GlobalBaseReg
= MRI
.createVirtualRegister(TLI
.getRegClassFor(VT
));
2830 AFI
->setGlobalBaseReg(GlobalBaseReg
);
2833 unsigned DestReg2
= createResultReg(TLI
.getRegClassFor(VT
));
2834 MachineInstrBuilder MIB
= BuildMI(*FuncInfo
.MBB
, FuncInfo
.InsertPt
,
2835 DL
, TII
.get(Opc
), DestReg2
)
2837 .addReg(GlobalBaseReg
);
2840 AddOptionalDefs(MIB
);
2846 FastISel
*ARM::createFastISel(FunctionLoweringInfo
&funcInfo
,
2847 const TargetLibraryInfo
*libInfo
,
2848 GCFunctionInfo
&gcInfo
) {
2849 // Completely untested on non-iOS.
2850 const TargetMachine
&TM
= funcInfo
.MF
->getTarget();
2852 // Darwin and thumb1 only for now.
2853 const ARMSubtarget
*Subtarget
= &TM
.getSubtarget
<ARMSubtarget
>();
2854 if (Subtarget
->isTargetIOS() && !Subtarget
->isThumb1Only())
2855 return new ARMFastISel(funcInfo
, libInfo
, gcInfo
);