]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file contains the ARM implementation of TargetFrameLowering class. | |
11 | // | |
12 | //===----------------------------------------------------------------------===// | |
13 | ||
14 | #include "ARMFrameLowering.h" | |
15 | #include "ARMBaseInstrInfo.h" | |
16 | #include "ARMBaseRegisterInfo.h" | |
1a4d82fc | 17 | #include "ARMConstantPoolValue.h" |
223e47cc | 18 | #include "ARMMachineFunctionInfo.h" |
223e47cc | 19 | #include "MCTargetDesc/ARMAddressingModes.h" |
223e47cc LB |
20 | #include "llvm/CodeGen/MachineFrameInfo.h" |
21 | #include "llvm/CodeGen/MachineFunction.h" | |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |
1a4d82fc | 23 | #include "llvm/CodeGen/MachineModuleInfo.h" |
223e47cc LB |
24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
25 | #include "llvm/CodeGen/RegisterScavenging.h" | |
970d7e83 LB |
26 | #include "llvm/IR/CallingConv.h" |
27 | #include "llvm/IR/Function.h" | |
1a4d82fc | 28 | #include "llvm/MC/MCContext.h" |
223e47cc | 29 | #include "llvm/Support/CommandLine.h" |
970d7e83 | 30 | #include "llvm/Target/TargetOptions.h" |
223e47cc LB |
31 | |
32 | using namespace llvm; | |
33 | ||
34 | static cl::opt<bool> | |
35 | SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), | |
36 | cl::desc("Align ARM NEON spills in prolog and epilog")); | |
37 | ||
38 | static MachineBasicBlock::iterator | |
39 | skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, | |
40 | unsigned NumAlignedDPRCS2Regs); | |
41 | ||
1a4d82fc JJ |
42 | ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) |
43 | : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), | |
44 | STI(sti) {} | |
45 | ||
223e47cc LB |
46 | /// hasFP - Return true if the specified function should have a dedicated frame |
47 | /// pointer register. This is true if the function has variable sized allocas | |
48 | /// or if frame pointer elimination is disabled. | |
49 | bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { | |
1a4d82fc | 50 | const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); |
223e47cc LB |
51 | |
52 | // iOS requires FP not to be clobbered for backtracing purpose. | |
53 | if (STI.isTargetIOS()) | |
54 | return true; | |
55 | ||
56 | const MachineFrameInfo *MFI = MF.getFrameInfo(); | |
57 | // Always eliminate non-leaf frame pointers. | |
58 | return ((MF.getTarget().Options.DisableFramePointerElim(MF) && | |
59 | MFI->hasCalls()) || | |
60 | RegInfo->needsStackRealignment(MF) || | |
61 | MFI->hasVarSizedObjects() || | |
62 | MFI->isFrameAddressTaken()); | |
63 | } | |
64 | ||
65 | /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is | |
66 | /// not required, we reserve argument space for call sites in the function | |
67 | /// immediately on entry to the current function. This eliminates the need for | |
68 | /// add/sub sp brackets around call sites. Returns true if the call frame is | |
69 | /// included as part of the stack frame. | |
70 | bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { | |
71 | const MachineFrameInfo *FFI = MF.getFrameInfo(); | |
72 | unsigned CFSize = FFI->getMaxCallFrameSize(); | |
73 | // It's not always a good idea to include the call frame as part of the | |
74 | // stack frame. ARM (especially Thumb) has small immediate offset to | |
75 | // address the stack frame. So a large call frame can cause poor codegen | |
76 | // and may even makes it impossible to scavenge a register. | |
77 | if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 | |
78 | return false; | |
79 | ||
80 | return !MF.getFrameInfo()->hasVarSizedObjects(); | |
81 | } | |
82 | ||
83 | /// canSimplifyCallFramePseudos - If there is a reserved call frame, the | |
84 | /// call frame pseudos can be simplified. Unlike most targets, having a FP | |
85 | /// is not sufficient here since we still may reference some objects via SP | |
86 | /// even when FP is available in Thumb2 mode. | |
87 | bool | |
88 | ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { | |
89 | return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); | |
90 | } | |
91 | ||
223e47cc LB |
92 | static bool isCSRestore(MachineInstr *MI, |
93 | const ARMBaseInstrInfo &TII, | |
1a4d82fc | 94 | const MCPhysReg *CSRegs) { |
223e47cc | 95 | // Integer spill area is handled with "pop". |
1a4d82fc | 96 | if (isPopOpcode(MI->getOpcode())) { |
223e47cc LB |
97 | // The first two operands are predicates. The last two are |
98 | // imp-def and imp-use of SP. Check everything in between. | |
99 | for (int i = 5, e = MI->getNumOperands(); i != e; ++i) | |
100 | if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) | |
101 | return false; | |
102 | return true; | |
103 | } | |
104 | if ((MI->getOpcode() == ARM::LDR_POST_IMM || | |
105 | MI->getOpcode() == ARM::LDR_POST_REG || | |
106 | MI->getOpcode() == ARM::t2LDR_POST) && | |
107 | isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && | |
108 | MI->getOperand(1).getReg() == ARM::SP) | |
109 | return true; | |
110 | ||
111 | return false; | |
112 | } | |
113 | ||
1a4d82fc JJ |
114 | static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, |
115 | MachineBasicBlock::iterator &MBBI, DebugLoc dl, | |
116 | const ARMBaseInstrInfo &TII, unsigned DestReg, | |
117 | unsigned SrcReg, int NumBytes, | |
118 | unsigned MIFlags = MachineInstr::NoFlags, | |
119 | ARMCC::CondCodes Pred = ARMCC::AL, | |
120 | unsigned PredReg = 0) { | |
223e47cc | 121 | if (isARM) |
1a4d82fc | 122 | emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, |
970d7e83 | 123 | Pred, PredReg, TII, MIFlags); |
223e47cc | 124 | else |
1a4d82fc | 125 | emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, |
970d7e83 | 126 | Pred, PredReg, TII, MIFlags); |
223e47cc LB |
127 | } |
128 | ||
1a4d82fc JJ |
129 | static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, |
130 | MachineBasicBlock::iterator &MBBI, DebugLoc dl, | |
131 | const ARMBaseInstrInfo &TII, int NumBytes, | |
132 | unsigned MIFlags = MachineInstr::NoFlags, | |
133 | ARMCC::CondCodes Pred = ARMCC::AL, | |
134 | unsigned PredReg = 0) { | |
135 | emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes, | |
136 | MIFlags, Pred, PredReg); | |
137 | } | |
138 | ||
139 | static int sizeOfSPAdjustment(const MachineInstr *MI) { | |
85aaf69f SL |
140 | int RegSize; |
141 | switch (MI->getOpcode()) { | |
142 | case ARM::VSTMDDB_UPD: | |
143 | RegSize = 8; | |
144 | break; | |
145 | case ARM::STMDB_UPD: | |
146 | case ARM::t2STMDB_UPD: | |
147 | RegSize = 4; | |
148 | break; | |
149 | case ARM::t2STR_PRE: | |
150 | case ARM::STR_PRE_IMM: | |
151 | return 4; | |
152 | default: | |
153 | llvm_unreachable("Unknown push or pop like instruction"); | |
154 | } | |
155 | ||
1a4d82fc JJ |
156 | int count = 0; |
157 | // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ | |
158 | // pred) so the list starts at 4. | |
159 | for (int i = MI->getNumOperands() - 1; i >= 4; --i) | |
85aaf69f | 160 | count += RegSize; |
1a4d82fc JJ |
161 | return count; |
162 | } | |
163 | ||
164 | static bool WindowsRequiresStackProbe(const MachineFunction &MF, | |
165 | size_t StackSizeInBytes) { | |
166 | const MachineFrameInfo *MFI = MF.getFrameInfo(); | |
167 | if (MFI->getStackProtectorIndex() > 0) | |
168 | return StackSizeInBytes >= 4080; | |
169 | return StackSizeInBytes >= 4096; | |
170 | } | |
171 | ||
85aaf69f SL |
172 | namespace { |
173 | struct StackAdjustingInsts { | |
174 | struct InstInfo { | |
175 | MachineBasicBlock::iterator I; | |
176 | unsigned SPAdjust; | |
177 | bool BeforeFPSet; | |
178 | }; | |
179 | ||
180 | SmallVector<InstInfo, 4> Insts; | |
181 | ||
182 | void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust, | |
183 | bool BeforeFPSet = false) { | |
184 | InstInfo Info = {I, SPAdjust, BeforeFPSet}; | |
185 | Insts.push_back(Info); | |
186 | } | |
187 | ||
188 | void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { | |
189 | auto Info = std::find_if(Insts.begin(), Insts.end(), | |
190 | [&](InstInfo &Info) { return Info.I == I; }); | |
191 | assert(Info != Insts.end() && "invalid sp adjusting instruction"); | |
192 | Info->SPAdjust += ExtraBytes; | |
193 | } | |
194 | ||
195 | void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB, | |
196 | DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) { | |
197 | unsigned CFAOffset = 0; | |
198 | for (auto &Info : Insts) { | |
199 | if (HasFP && !Info.BeforeFPSet) | |
200 | return; | |
201 | ||
202 | CFAOffset -= Info.SPAdjust; | |
203 | unsigned CFIIndex = MMI.addFrameInst( | |
204 | MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); | |
205 | BuildMI(MBB, std::next(Info.I), dl, | |
206 | TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
207 | .addCFIIndex(CFIIndex) | |
208 | .setMIFlags(MachineInstr::FrameSetup); | |
209 | } | |
210 | } | |
211 | }; | |
212 | } | |
213 | ||
214 | /// Emit an instruction sequence that will align the address in | |
215 | /// register Reg by zero-ing out the lower bits. For versions of the | |
216 | /// architecture that support Neon, this must be done in a single | |
217 | /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a | |
218 | /// single instruction. That function only gets called when optimizing | |
219 | /// spilling of D registers on a core with the Neon instruction set | |
220 | /// present. | |
221 | static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, | |
222 | const TargetInstrInfo &TII, | |
223 | MachineBasicBlock &MBB, | |
224 | MachineBasicBlock::iterator MBBI, | |
225 | DebugLoc DL, const unsigned Reg, | |
226 | const unsigned Alignment, | |
227 | const bool MustBeSingleInstruction) { | |
228 | const ARMSubtarget &AST = MF.getTarget().getSubtarget<ARMSubtarget>(); | |
229 | const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); | |
230 | const unsigned AlignMask = Alignment - 1; | |
231 | const unsigned NrBitsToZero = countTrailingZeros(Alignment); | |
232 | assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported"); | |
233 | if (!AFI->isThumbFunction()) { | |
234 | // if the BFC instruction is available, use that to zero the lower | |
235 | // bits: | |
236 | // bfc Reg, #0, log2(Alignment) | |
237 | // otherwise use BIC, if the mask to zero the required number of bits | |
238 | // can be encoded in the bic immediate field | |
239 | // bic Reg, Reg, Alignment-1 | |
240 | // otherwise, emit | |
241 | // lsr Reg, Reg, log2(Alignment) | |
242 | // lsl Reg, Reg, log2(Alignment) | |
243 | if (CanUseBFC) { | |
244 | AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) | |
245 | .addReg(Reg, RegState::Kill) | |
246 | .addImm(~AlignMask)); | |
247 | } else if (AlignMask <= 255) { | |
248 | AddDefaultCC( | |
249 | AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) | |
250 | .addReg(Reg, RegState::Kill) | |
251 | .addImm(AlignMask))); | |
252 | } else { | |
253 | assert(!MustBeSingleInstruction && | |
254 | "Shouldn't call emitAligningInstructions demanding a single " | |
255 | "instruction to be emitted for large stack alignment for a target " | |
256 | "without BFC."); | |
257 | AddDefaultCC(AddDefaultPred( | |
258 | BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) | |
259 | .addReg(Reg, RegState::Kill) | |
260 | .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)))); | |
261 | AddDefaultCC(AddDefaultPred( | |
262 | BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) | |
263 | .addReg(Reg, RegState::Kill) | |
264 | .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)))); | |
265 | } | |
266 | } else { | |
267 | // Since this is only reached for Thumb-2 targets, the BFC instruction | |
268 | // should always be available. | |
269 | assert(CanUseBFC); | |
270 | AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) | |
271 | .addReg(Reg, RegState::Kill) | |
272 | .addImm(~AlignMask)); | |
273 | } | |
274 | } | |
275 | ||
223e47cc LB |
276 | void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { |
277 | MachineBasicBlock &MBB = MF.front(); | |
278 | MachineBasicBlock::iterator MBBI = MBB.begin(); | |
279 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |
280 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |
1a4d82fc JJ |
281 | MachineModuleInfo &MMI = MF.getMMI(); |
282 | MCContext &Context = MMI.getContext(); | |
283 | const TargetMachine &TM = MF.getTarget(); | |
284 | const MCRegisterInfo *MRI = Context.getRegisterInfo(); | |
285 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( | |
286 | TM.getSubtargetImpl()->getRegisterInfo()); | |
287 | const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( | |
288 | TM.getSubtargetImpl()->getInstrInfo()); | |
223e47cc LB |
289 | assert(!AFI->isThumb1OnlyFunction() && |
290 | "This emitPrologue does not support Thumb1!"); | |
291 | bool isARM = !AFI->isThumbFunction(); | |
1a4d82fc JJ |
292 | unsigned Align = |
293 | TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); | |
294 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); | |
223e47cc LB |
295 | unsigned NumBytes = MFI->getStackSize(); |
296 | const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); | |
297 | DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); | |
298 | unsigned FramePtr = RegInfo->getFrameRegister(MF); | |
299 | ||
300 | // Determine the sizes of each callee-save spill areas and record which frame | |
301 | // belongs to which callee-save spill areas. | |
302 | unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; | |
303 | int FramePtrSpillFI = 0; | |
304 | int D8SpillFI = 0; | |
305 | ||
970d7e83 LB |
306 | // All calls are tail calls in GHC calling conv, and functions have no |
307 | // prologue/epilogue. | |
223e47cc LB |
308 | if (MF.getFunction()->getCallingConv() == CallingConv::GHC) |
309 | return; | |
310 | ||
85aaf69f SL |
311 | StackAdjustingInsts DefCFAOffsetCandidates; |
312 | ||
1a4d82fc JJ |
313 | // Allocate the vararg register save area. |
314 | if (ArgRegsSaveSize) { | |
315 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, | |
223e47cc | 316 | MachineInstr::FrameSetup); |
85aaf69f | 317 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true); |
1a4d82fc | 318 | } |
223e47cc | 319 | |
1a4d82fc JJ |
320 | if (!AFI->hasStackFrame() && |
321 | (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) { | |
322 | if (NumBytes - ArgRegsSaveSize != 0) { | |
323 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), | |
223e47cc | 324 | MachineInstr::FrameSetup); |
85aaf69f SL |
325 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), |
326 | NumBytes - ArgRegsSaveSize, true); | |
1a4d82fc | 327 | } |
223e47cc LB |
328 | return; |
329 | } | |
330 | ||
1a4d82fc | 331 | // Determine spill area sizes. |
223e47cc LB |
332 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { |
333 | unsigned Reg = CSI[i].getReg(); | |
334 | int FI = CSI[i].getFrameIdx(); | |
335 | switch (Reg) { | |
1a4d82fc JJ |
336 | case ARM::R8: |
337 | case ARM::R9: | |
338 | case ARM::R10: | |
339 | case ARM::R11: | |
340 | case ARM::R12: | |
341 | if (STI.isTargetDarwin()) { | |
342 | GPRCS2Size += 4; | |
343 | break; | |
344 | } | |
345 | // fallthrough | |
346 | case ARM::R0: | |
347 | case ARM::R1: | |
348 | case ARM::R2: | |
349 | case ARM::R3: | |
223e47cc LB |
350 | case ARM::R4: |
351 | case ARM::R5: | |
352 | case ARM::R6: | |
353 | case ARM::R7: | |
354 | case ARM::LR: | |
355 | if (Reg == FramePtr) | |
356 | FramePtrSpillFI = FI; | |
223e47cc LB |
357 | GPRCS1Size += 4; |
358 | break; | |
223e47cc LB |
359 | default: |
360 | // This is a DPR. Exclude the aligned DPRCS2 spills. | |
361 | if (Reg == ARM::D8) | |
362 | D8SpillFI = FI; | |
1a4d82fc | 363 | if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) |
223e47cc | 364 | DPRCSSize += 8; |
223e47cc LB |
365 | } |
366 | } | |
367 | ||
368 | // Move past area 1. | |
85aaf69f SL |
369 | MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; |
370 | if (GPRCS1Size > 0) { | |
1a4d82fc | 371 | GPRCS1Push = LastPush = MBBI++; |
85aaf69f SL |
372 | DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); |
373 | } | |
223e47cc LB |
374 | |
375 | // Determine starting offsets of spill areas. | |
1a4d82fc | 376 | bool HasFP = hasFP(MF); |
85aaf69f SL |
377 | unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; |
378 | unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; | |
379 | unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U; | |
380 | unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign; | |
381 | unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; | |
1a4d82fc JJ |
382 | int FramePtrOffsetInPush = 0; |
383 | if (HasFP) { | |
85aaf69f SL |
384 | FramePtrOffsetInPush = |
385 | MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; | |
223e47cc LB |
386 | AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + |
387 | NumBytes); | |
1a4d82fc | 388 | } |
223e47cc LB |
389 | AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); |
390 | AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); | |
391 | AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); | |
392 | ||
1a4d82fc | 393 | // Move past area 2. |
85aaf69f | 394 | if (GPRCS2Size > 0) { |
1a4d82fc | 395 | GPRCS2Push = LastPush = MBBI++; |
85aaf69f SL |
396 | DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); |
397 | } | |
398 | ||
399 | // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our | |
400 | // .cfi_offset operations will reflect that. | |
401 | if (DPRGapSize) { | |
402 | assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs"); | |
403 | if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize)) | |
404 | DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize); | |
405 | else { | |
406 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, | |
407 | MachineInstr::FrameSetup); | |
408 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize); | |
409 | } | |
410 | } | |
1a4d82fc | 411 | |
223e47cc LB |
412 | // Move past area 3. |
413 | if (DPRCSSize > 0) { | |
223e47cc LB |
414 | // Since vpush register list cannot have gaps, there may be multiple vpush |
415 | // instructions in the prologue. | |
85aaf69f SL |
416 | while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) { |
417 | DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI)); | |
1a4d82fc | 418 | LastPush = MBBI++; |
85aaf69f | 419 | } |
223e47cc LB |
420 | } |
421 | ||
422 | // Move past the aligned DPRCS2 area. | |
423 | if (AFI->getNumAlignedDPRCS2Regs() > 0) { | |
424 | MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); | |
425 | // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and | |
426 | // leaves the stack pointer pointing to the DPRCS2 area. | |
427 | // | |
428 | // Adjust NumBytes to represent the stack slots below the DPRCS2 area. | |
429 | NumBytes += MFI->getObjectOffset(D8SpillFI); | |
430 | } else | |
431 | NumBytes = DPRCSOffset; | |
432 | ||
1a4d82fc JJ |
433 | if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { |
434 | uint32_t NumWords = NumBytes >> 2; | |
435 | ||
436 | if (NumWords < 65536) | |
437 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) | |
438 | .addImm(NumWords) | |
439 | .setMIFlags(MachineInstr::FrameSetup)); | |
440 | else | |
441 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) | |
442 | .addImm(NumWords) | |
443 | .setMIFlags(MachineInstr::FrameSetup); | |
444 | ||
445 | switch (TM.getCodeModel()) { | |
446 | case CodeModel::Small: | |
447 | case CodeModel::Medium: | |
448 | case CodeModel::Default: | |
449 | case CodeModel::Kernel: | |
450 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) | |
451 | .addImm((unsigned)ARMCC::AL).addReg(0) | |
452 | .addExternalSymbol("__chkstk") | |
453 | .addReg(ARM::R4, RegState::Implicit) | |
454 | .setMIFlags(MachineInstr::FrameSetup); | |
455 | break; | |
456 | case CodeModel::Large: | |
457 | case CodeModel::JITDefault: | |
458 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12) | |
459 | .addExternalSymbol("__chkstk") | |
460 | .setMIFlags(MachineInstr::FrameSetup); | |
461 | ||
462 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr)) | |
463 | .addImm((unsigned)ARMCC::AL).addReg(0) | |
464 | .addReg(ARM::R12, RegState::Kill) | |
465 | .addReg(ARM::R4, RegState::Implicit) | |
466 | .setMIFlags(MachineInstr::FrameSetup); | |
467 | break; | |
468 | } | |
469 | ||
470 | AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), | |
471 | ARM::SP) | |
472 | .addReg(ARM::SP, RegState::Define) | |
473 | .addReg(ARM::R4, RegState::Kill) | |
474 | .setMIFlags(MachineInstr::FrameSetup))); | |
475 | NumBytes = 0; | |
476 | } | |
477 | ||
223e47cc LB |
478 | if (NumBytes) { |
479 | // Adjust SP after all the callee-save spills. | |
85aaf69f SL |
480 | if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) |
481 | DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes); | |
482 | else { | |
1a4d82fc JJ |
483 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, |
484 | MachineInstr::FrameSetup); | |
85aaf69f SL |
485 | DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes); |
486 | } | |
1a4d82fc | 487 | |
223e47cc LB |
488 | if (HasFP && isARM) |
489 | // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 | |
490 | // Note it's not safe to do this in Thumb2 mode because it would have | |
491 | // taken two instructions: | |
492 | // mov sp, r7 | |
493 | // sub sp, #24 | |
494 | // If an interrupt is taken between the two instructions, then sp is in | |
495 | // an inconsistent state (pointing to the middle of callee-saved area). | |
496 | // The interrupt handler can end up clobbering the registers. | |
497 | AFI->setShouldRestoreSPFromFP(true); | |
498 | } | |
499 | ||
85aaf69f SL |
500 | // Set FP to point to the stack slot that contains the previous FP. |
501 | // For iOS, FP is R7, which has now been stored in spill area 1. | |
502 | // Otherwise, if this is not iOS, all the callee-saved registers go | |
503 | // into spill area 1, including the FP in R11. In either case, it | |
504 | // is in area one and the adjustment needs to take place just after | |
505 | // that push. | |
506 | if (HasFP) { | |
507 | MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); | |
508 | unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push); | |
509 | emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, | |
510 | dl, TII, FramePtr, ARM::SP, | |
511 | PushSize + FramePtrOffsetInPush, | |
512 | MachineInstr::FrameSetup); | |
513 | if (FramePtrOffsetInPush + PushSize != 0) { | |
514 | unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( | |
515 | nullptr, MRI->getDwarfRegNum(FramePtr, true), | |
516 | -(ArgRegsSaveSize - FramePtrOffsetInPush))); | |
517 | BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
518 | .addCFIIndex(CFIIndex) | |
519 | .setMIFlags(MachineInstr::FrameSetup); | |
520 | } else { | |
521 | unsigned CFIIndex = | |
522 | MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( | |
523 | nullptr, MRI->getDwarfRegNum(FramePtr, true))); | |
524 | BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
525 | .addCFIIndex(CFIIndex) | |
526 | .setMIFlags(MachineInstr::FrameSetup); | |
527 | } | |
528 | } | |
529 | ||
530 | // Now that the prologue's actual instructions are finalised, we can insert | |
531 | // the necessary DWARF cf instructions to describe the situation. Start by | |
532 | // recording where each register ended up: | |
533 | if (GPRCS1Size > 0) { | |
534 | MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); | |
535 | int CFIIndex; | |
1a4d82fc JJ |
536 | for (const auto &Entry : CSI) { |
537 | unsigned Reg = Entry.getReg(); | |
538 | int FI = Entry.getFrameIdx(); | |
539 | switch (Reg) { | |
540 | case ARM::R8: | |
541 | case ARM::R9: | |
542 | case ARM::R10: | |
543 | case ARM::R11: | |
544 | case ARM::R12: | |
545 | if (STI.isTargetDarwin()) | |
546 | break; | |
547 | // fallthrough | |
548 | case ARM::R0: | |
549 | case ARM::R1: | |
550 | case ARM::R2: | |
551 | case ARM::R3: | |
552 | case ARM::R4: | |
553 | case ARM::R5: | |
554 | case ARM::R6: | |
555 | case ARM::R7: | |
556 | case ARM::LR: | |
557 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |
558 | nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); | |
559 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
85aaf69f SL |
560 | .addCFIIndex(CFIIndex) |
561 | .setMIFlags(MachineInstr::FrameSetup); | |
1a4d82fc JJ |
562 | break; |
563 | } | |
564 | } | |
565 | } | |
566 | ||
1a4d82fc | 567 | if (GPRCS2Size > 0) { |
85aaf69f | 568 | MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); |
1a4d82fc JJ |
569 | for (const auto &Entry : CSI) { |
570 | unsigned Reg = Entry.getReg(); | |
571 | int FI = Entry.getFrameIdx(); | |
572 | switch (Reg) { | |
573 | case ARM::R8: | |
574 | case ARM::R9: | |
575 | case ARM::R10: | |
576 | case ARM::R11: | |
577 | case ARM::R12: | |
578 | if (STI.isTargetDarwin()) { | |
579 | unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); | |
580 | unsigned Offset = MFI->getObjectOffset(FI); | |
581 | unsigned CFIIndex = MMI.addFrameInst( | |
582 | MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); | |
583 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
85aaf69f SL |
584 | .addCFIIndex(CFIIndex) |
585 | .setMIFlags(MachineInstr::FrameSetup); | |
1a4d82fc JJ |
586 | } |
587 | break; | |
588 | } | |
589 | } | |
590 | } | |
591 | ||
592 | if (DPRCSSize > 0) { | |
593 | // Since vpush register list cannot have gaps, there may be multiple vpush | |
594 | // instructions in the prologue. | |
85aaf69f | 595 | MachineBasicBlock::iterator Pos = std::next(LastPush); |
1a4d82fc JJ |
596 | for (const auto &Entry : CSI) { |
597 | unsigned Reg = Entry.getReg(); | |
598 | int FI = Entry.getFrameIdx(); | |
599 | if ((Reg >= ARM::D0 && Reg <= ARM::D31) && | |
600 | (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) { | |
601 | unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); | |
602 | unsigned Offset = MFI->getObjectOffset(FI); | |
603 | unsigned CFIIndex = MMI.addFrameInst( | |
604 | MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); | |
85aaf69f SL |
605 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
606 | .addCFIIndex(CFIIndex) | |
607 | .setMIFlags(MachineInstr::FrameSetup); | |
1a4d82fc JJ |
608 | } |
609 | } | |
610 | } | |
611 | ||
85aaf69f SL |
612 | // Now we can emit descriptions of where the canonical frame address was |
613 | // throughout the process. If we have a frame pointer, it takes over the job | |
614 | // half-way through, so only the first few .cfi_def_cfa_offset instructions | |
615 | // actually get emitted. | |
616 | DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); | |
1a4d82fc | 617 | |
223e47cc LB |
618 | if (STI.isTargetELF() && hasFP(MF)) |
619 | MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - | |
620 | AFI->getFramePtrSpillOffset()); | |
621 | ||
622 | AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); | |
623 | AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); | |
85aaf69f | 624 | AFI->setDPRCalleeSavedGapSize(DPRGapSize); |
223e47cc LB |
625 | AFI->setDPRCalleeSavedAreaSize(DPRCSSize); |
626 | ||
627 | // If we need dynamic stack realignment, do it here. Be paranoid and make | |
628 | // sure if we also have VLAs, we have a base pointer for frame access. | |
629 | // If aligned NEON registers were spilled, the stack has already been | |
630 | // realigned. | |
631 | if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { | |
632 | unsigned MaxAlign = MFI->getMaxAlignment(); | |
85aaf69f | 633 | assert(!AFI->isThumb1OnlyFunction()); |
223e47cc | 634 | if (!AFI->isThumbFunction()) { |
85aaf69f SL |
635 | emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, |
636 | false); | |
223e47cc | 637 | } else { |
85aaf69f SL |
638 | // We cannot use sp as source/dest register here, thus we're using r4 to |
639 | // perform the calculations. We're emitting the following sequence: | |
223e47cc | 640 | // mov r4, sp |
85aaf69f SL |
641 | // -- use emitAligningInstructions to produce best sequence to zero |
642 | // -- out lower bits in r4 | |
223e47cc LB |
643 | // mov sp, r4 |
644 | // FIXME: It will be better just to find spare register here. | |
645 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) | |
85aaf69f SL |
646 | .addReg(ARM::SP, RegState::Kill)); |
647 | emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, | |
648 | false); | |
223e47cc | 649 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) |
85aaf69f | 650 | .addReg(ARM::R4, RegState::Kill)); |
223e47cc LB |
651 | } |
652 | ||
653 | AFI->setShouldRestoreSPFromFP(true); | |
654 | } | |
655 | ||
656 | // If we need a base pointer, set it up here. It's whatever the value | |
657 | // of the stack pointer is at this point. Any variable size objects | |
658 | // will be allocated after this, so we can still use the base pointer | |
659 | // to reference locals. | |
660 | // FIXME: Clarify FrameSetup flags here. | |
661 | if (RegInfo->hasBasePointer(MF)) { | |
662 | if (isARM) | |
663 | BuildMI(MBB, MBBI, dl, | |
664 | TII.get(ARM::MOVr), RegInfo->getBaseRegister()) | |
665 | .addReg(ARM::SP) | |
666 | .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); | |
667 | else | |
668 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), | |
669 | RegInfo->getBaseRegister()) | |
670 | .addReg(ARM::SP)); | |
671 | } | |
672 | ||
673 | // If the frame has variable sized objects then the epilogue must restore | |
674 | // the sp from fp. We can assume there's an FP here since hasFP already | |
675 | // checks for hasVarSizedObjects. | |
676 | if (MFI->hasVarSizedObjects()) | |
677 | AFI->setShouldRestoreSPFromFP(true); | |
678 | } | |
679 | ||
85aaf69f SL |
680 | // Resolve TCReturn pseudo-instruction |
681 | void ARMFrameLowering::fixTCReturn(MachineFunction &MF, | |
682 | MachineBasicBlock &MBB) const { | |
683 | MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); | |
684 | assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); | |
685 | unsigned RetOpcode = MBBI->getOpcode(); | |
686 | DebugLoc dl = MBBI->getDebugLoc(); | |
687 | const ARMBaseInstrInfo &TII = | |
688 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |
689 | ||
690 | if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri)) | |
691 | return; | |
692 | ||
693 | // Tail call return: adjust the stack pointer and jump to callee. | |
694 | MBBI = MBB.getLastNonDebugInstr(); | |
695 | MachineOperand &JumpTarget = MBBI->getOperand(0); | |
696 | ||
697 | // Jump to label or value in register. | |
698 | if (RetOpcode == ARM::TCRETURNdi) { | |
699 | unsigned TCOpcode = STI.isThumb() ? | |
700 | (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : | |
701 | ARM::TAILJMPd; | |
702 | MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); | |
703 | if (JumpTarget.isGlobal()) | |
704 | MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), | |
705 | JumpTarget.getTargetFlags()); | |
706 | else { | |
707 | assert(JumpTarget.isSymbol()); | |
708 | MIB.addExternalSymbol(JumpTarget.getSymbolName(), | |
709 | JumpTarget.getTargetFlags()); | |
710 | } | |
711 | ||
712 | // Add the default predicate in Thumb mode. | |
713 | if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); | |
714 | } else if (RetOpcode == ARM::TCRETURNri) { | |
715 | BuildMI(MBB, MBBI, dl, | |
716 | TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). | |
717 | addReg(JumpTarget.getReg(), RegState::Kill); | |
718 | } | |
719 | ||
720 | MachineInstr *NewMI = std::prev(MBBI); | |
721 | for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) | |
722 | NewMI->addOperand(MBBI->getOperand(i)); | |
723 | ||
724 | // Delete the pseudo instruction TCRETURN. | |
725 | MBB.erase(MBBI); | |
726 | MBBI = NewMI; | |
727 | } | |
728 | ||
223e47cc LB |
729 | void ARMFrameLowering::emitEpilogue(MachineFunction &MF, |
730 | MachineBasicBlock &MBB) const { | |
731 | MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); | |
732 | assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); | |
223e47cc LB |
733 | DebugLoc dl = MBBI->getDebugLoc(); |
734 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |
735 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |
1a4d82fc | 736 | const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); |
223e47cc | 737 | const ARMBaseInstrInfo &TII = |
1a4d82fc | 738 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
223e47cc LB |
739 | assert(!AFI->isThumb1OnlyFunction() && |
740 | "This emitEpilogue does not support Thumb1!"); | |
741 | bool isARM = !AFI->isThumbFunction(); | |
742 | ||
1a4d82fc JJ |
743 | unsigned Align = MF.getTarget() |
744 | .getSubtargetImpl() | |
745 | ->getFrameLowering() | |
746 | ->getStackAlignment(); | |
747 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); | |
223e47cc LB |
748 | int NumBytes = (int)MFI->getStackSize(); |
749 | unsigned FramePtr = RegInfo->getFrameRegister(MF); | |
750 | ||
970d7e83 LB |
751 | // All calls are tail calls in GHC calling conv, and functions have no |
752 | // prologue/epilogue. | |
85aaf69f SL |
753 | if (MF.getFunction()->getCallingConv() == CallingConv::GHC) { |
754 | fixTCReturn(MF, MBB); | |
223e47cc | 755 | return; |
85aaf69f | 756 | } |
223e47cc LB |
757 | |
758 | if (!AFI->hasStackFrame()) { | |
1a4d82fc JJ |
759 | if (NumBytes - ArgRegsSaveSize != 0) |
760 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize); | |
223e47cc LB |
761 | } else { |
762 | // Unwind MBBI to point to first LDR / VLDRD. | |
1a4d82fc | 763 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); |
223e47cc | 764 | if (MBBI != MBB.begin()) { |
1a4d82fc | 765 | do { |
223e47cc | 766 | --MBBI; |
1a4d82fc | 767 | } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); |
223e47cc LB |
768 | if (!isCSRestore(MBBI, TII, CSRegs)) |
769 | ++MBBI; | |
770 | } | |
771 | ||
772 | // Move SP to start of FP callee save spill area. | |
1a4d82fc JJ |
773 | NumBytes -= (ArgRegsSaveSize + |
774 | AFI->getGPRCalleeSavedArea1Size() + | |
223e47cc | 775 | AFI->getGPRCalleeSavedArea2Size() + |
85aaf69f | 776 | AFI->getDPRCalleeSavedGapSize() + |
223e47cc LB |
777 | AFI->getDPRCalleeSavedAreaSize()); |
778 | ||
779 | // Reset SP based on frame pointer only if the stack frame extends beyond | |
780 | // frame pointer stack slot or target is ELF and the function has FP. | |
781 | if (AFI->shouldRestoreSPFromFP()) { | |
782 | NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; | |
783 | if (NumBytes) { | |
784 | if (isARM) | |
785 | emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, | |
786 | ARMCC::AL, 0, TII); | |
787 | else { | |
788 | // It's not possible to restore SP from FP in a single instruction. | |
789 | // For iOS, this looks like: | |
790 | // mov sp, r7 | |
791 | // sub sp, #24 | |
792 | // This is bad, if an interrupt is taken after the mov, sp is in an | |
793 | // inconsistent state. | |
794 | // Use the first callee-saved register as a scratch register. | |
795 | assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && | |
796 | "No scratch register to restore SP from FP!"); | |
797 | emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, | |
798 | ARMCC::AL, 0, TII); | |
799 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), | |
800 | ARM::SP) | |
801 | .addReg(ARM::R4)); | |
802 | } | |
803 | } else { | |
804 | // Thumb2 or ARM. | |
805 | if (isARM) | |
806 | BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) | |
807 | .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); | |
808 | else | |
809 | AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), | |
810 | ARM::SP) | |
811 | .addReg(FramePtr)); | |
812 | } | |
1a4d82fc JJ |
813 | } else if (NumBytes && |
814 | !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) | |
815 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); | |
223e47cc LB |
816 | |
817 | // Increment past our save areas. | |
818 | if (AFI->getDPRCalleeSavedAreaSize()) { | |
819 | MBBI++; | |
820 | // Since vpop register list cannot have gaps, there may be multiple vpop | |
821 | // instructions in the epilogue. | |
822 | while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) | |
823 | MBBI++; | |
824 | } | |
85aaf69f SL |
825 | if (AFI->getDPRCalleeSavedGapSize()) { |
826 | assert(AFI->getDPRCalleeSavedGapSize() == 4 && | |
827 | "unexpected DPR alignment gap"); | |
828 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize()); | |
829 | } | |
830 | ||
223e47cc LB |
831 | if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; |
832 | if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; | |
833 | } | |
834 | ||
85aaf69f | 835 | fixTCReturn(MF, MBB); |
223e47cc | 836 | |
1a4d82fc JJ |
837 | if (ArgRegsSaveSize) |
838 | emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); | |
223e47cc LB |
839 | } |
840 | ||
841 | /// getFrameIndexReference - Provide a base+offset reference to an FI slot for | |
842 | /// debug info. It's the same as what we use for resolving the code-gen | |
843 | /// references for now. FIXME: This can go wrong when references are | |
844 | /// SP-relative and simple call frames aren't used. | |
845 | int | |
846 | ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, | |
847 | unsigned &FrameReg) const { | |
848 | return ResolveFrameIndexReference(MF, FI, FrameReg, 0); | |
849 | } | |
850 | ||
851 | int | |
852 | ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, | |
853 | int FI, unsigned &FrameReg, | |
854 | int SPAdj) const { | |
855 | const MachineFrameInfo *MFI = MF.getFrameInfo(); | |
1a4d82fc JJ |
856 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
857 | MF.getSubtarget().getRegisterInfo()); | |
223e47cc LB |
858 | const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
859 | int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); | |
860 | int FPOffset = Offset - AFI->getFramePtrSpillOffset(); | |
861 | bool isFixed = MFI->isFixedObjectIndex(FI); | |
862 | ||
863 | FrameReg = ARM::SP; | |
864 | Offset += SPAdj; | |
223e47cc LB |
865 | |
866 | // SP can move around if there are allocas. We may also lose track of SP | |
867 | // when emergency spilling inside a non-reserved call frame setup. | |
868 | bool hasMovingSP = !hasReservedCallFrame(MF); | |
869 | ||
870 | // When dynamically realigning the stack, use the frame pointer for | |
871 | // parameters, and the stack/base pointer for locals. | |
872 | if (RegInfo->needsStackRealignment(MF)) { | |
873 | assert (hasFP(MF) && "dynamic stack realignment without a FP!"); | |
874 | if (isFixed) { | |
875 | FrameReg = RegInfo->getFrameRegister(MF); | |
876 | Offset = FPOffset; | |
877 | } else if (hasMovingSP) { | |
878 | assert(RegInfo->hasBasePointer(MF) && | |
879 | "VLAs and dynamic stack alignment, but missing base pointer!"); | |
880 | FrameReg = RegInfo->getBaseRegister(); | |
881 | } | |
882 | return Offset; | |
883 | } | |
884 | ||
885 | // If there is a frame pointer, use it when we can. | |
886 | if (hasFP(MF) && AFI->hasStackFrame()) { | |
887 | // Use frame pointer to reference fixed objects. Use it for locals if | |
888 | // there are VLAs (and thus the SP isn't reliable as a base). | |
889 | if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) { | |
890 | FrameReg = RegInfo->getFrameRegister(MF); | |
891 | return FPOffset; | |
892 | } else if (hasMovingSP) { | |
893 | assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); | |
894 | if (AFI->isThumb2Function()) { | |
895 | // Try to use the frame pointer if we can, else use the base pointer | |
896 | // since it's available. This is handy for the emergency spill slot, in | |
897 | // particular. | |
898 | if (FPOffset >= -255 && FPOffset < 0) { | |
899 | FrameReg = RegInfo->getFrameRegister(MF); | |
900 | return FPOffset; | |
901 | } | |
902 | } | |
903 | } else if (AFI->isThumb2Function()) { | |
904 | // Use add <rd>, sp, #<imm8> | |
905 | // ldr <rd>, [sp, #<imm8>] | |
906 | // if at all possible to save space. | |
907 | if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) | |
908 | return Offset; | |
909 | // In Thumb2 mode, the negative offset is very limited. Try to avoid | |
910 | // out of range references. ldr <rt>,[<rn>, #-<imm8>] | |
911 | if (FPOffset >= -255 && FPOffset < 0) { | |
912 | FrameReg = RegInfo->getFrameRegister(MF); | |
913 | return FPOffset; | |
914 | } | |
915 | } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { | |
916 | // Otherwise, use SP or FP, whichever is closer to the stack slot. | |
917 | FrameReg = RegInfo->getFrameRegister(MF); | |
918 | return FPOffset; | |
919 | } | |
920 | } | |
921 | // Use the base pointer if we have one. | |
922 | if (RegInfo->hasBasePointer(MF)) | |
923 | FrameReg = RegInfo->getBaseRegister(); | |
924 | return Offset; | |
925 | } | |
926 | ||
927 | int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF, | |
928 | int FI) const { | |
929 | unsigned FrameReg; | |
930 | return getFrameIndexReference(MF, FI, FrameReg); | |
931 | } | |
932 | ||
933 | void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, | |
934 | MachineBasicBlock::iterator MI, | |
935 | const std::vector<CalleeSavedInfo> &CSI, | |
936 | unsigned StmOpc, unsigned StrOpc, | |
937 | bool NoGap, | |
938 | bool(*Func)(unsigned, bool), | |
939 | unsigned NumAlignedDPRCS2Regs, | |
940 | unsigned MIFlags) const { | |
941 | MachineFunction &MF = *MBB.getParent(); | |
1a4d82fc | 942 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
223e47cc LB |
943 | |
944 | DebugLoc DL; | |
945 | if (MI != MBB.end()) DL = MI->getDebugLoc(); | |
946 | ||
947 | SmallVector<std::pair<unsigned,bool>, 4> Regs; | |
948 | unsigned i = CSI.size(); | |
949 | while (i != 0) { | |
950 | unsigned LastReg = 0; | |
951 | for (; i != 0; --i) { | |
952 | unsigned Reg = CSI[i-1].getReg(); | |
1a4d82fc | 953 | if (!(Func)(Reg, STI.isTargetDarwin())) continue; |
223e47cc LB |
954 | |
955 | // D-registers in the aligned area DPRCS2 are NOT spilled here. | |
956 | if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) | |
957 | continue; | |
958 | ||
959 | // Add the callee-saved register as live-in unless it's LR and | |
960 | // @llvm.returnaddress is called. If LR is returned for | |
961 | // @llvm.returnaddress then it's already added to the function and | |
962 | // entry block live-in sets. | |
963 | bool isKill = true; | |
964 | if (Reg == ARM::LR) { | |
965 | if (MF.getFrameInfo()->isReturnAddressTaken() && | |
966 | MF.getRegInfo().isLiveIn(Reg)) | |
967 | isKill = false; | |
968 | } | |
969 | ||
970 | if (isKill) | |
971 | MBB.addLiveIn(Reg); | |
972 | ||
973 | // If NoGap is true, push consecutive registers and then leave the rest | |
974 | // for other instructions. e.g. | |
975 | // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} | |
976 | if (NoGap && LastReg && LastReg != Reg-1) | |
977 | break; | |
978 | LastReg = Reg; | |
979 | Regs.push_back(std::make_pair(Reg, isKill)); | |
980 | } | |
981 | ||
982 | if (Regs.empty()) | |
983 | continue; | |
984 | if (Regs.size() > 1 || StrOpc== 0) { | |
985 | MachineInstrBuilder MIB = | |
986 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) | |
987 | .addReg(ARM::SP).setMIFlags(MIFlags)); | |
988 | for (unsigned i = 0, e = Regs.size(); i < e; ++i) | |
989 | MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); | |
990 | } else if (Regs.size() == 1) { | |
991 | MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), | |
992 | ARM::SP) | |
993 | .addReg(Regs[0].first, getKillRegState(Regs[0].second)) | |
994 | .addReg(ARM::SP).setMIFlags(MIFlags) | |
995 | .addImm(-4); | |
996 | AddDefaultPred(MIB); | |
997 | } | |
998 | Regs.clear(); | |
1a4d82fc JJ |
999 | |
1000 | // Put any subsequent vpush instructions before this one: they will refer to | |
1001 | // higher register numbers so need to be pushed first in order to preserve | |
1002 | // monotonicity. | |
1003 | --MI; | |
223e47cc LB |
1004 | } |
1005 | } | |
1006 | ||
1007 | void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, | |
1008 | MachineBasicBlock::iterator MI, | |
1009 | const std::vector<CalleeSavedInfo> &CSI, | |
1010 | unsigned LdmOpc, unsigned LdrOpc, | |
1011 | bool isVarArg, bool NoGap, | |
1012 | bool(*Func)(unsigned, bool), | |
1013 | unsigned NumAlignedDPRCS2Regs) const { | |
1014 | MachineFunction &MF = *MBB.getParent(); | |
1a4d82fc | 1015 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
223e47cc LB |
1016 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
1017 | DebugLoc DL = MI->getDebugLoc(); | |
1018 | unsigned RetOpcode = MI->getOpcode(); | |
1019 | bool isTailCall = (RetOpcode == ARM::TCRETURNdi || | |
1020 | RetOpcode == ARM::TCRETURNri); | |
1a4d82fc JJ |
1021 | bool isInterrupt = |
1022 | RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; | |
223e47cc LB |
1023 | |
1024 | SmallVector<unsigned, 4> Regs; | |
1025 | unsigned i = CSI.size(); | |
1026 | while (i != 0) { | |
1027 | unsigned LastReg = 0; | |
1028 | bool DeleteRet = false; | |
1029 | for (; i != 0; --i) { | |
1030 | unsigned Reg = CSI[i-1].getReg(); | |
1a4d82fc | 1031 | if (!(Func)(Reg, STI.isTargetDarwin())) continue; |
223e47cc LB |
1032 | |
1033 | // The aligned reloads from area DPRCS2 are not inserted here. | |
1034 | if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) | |
1035 | continue; | |
1036 | ||
1a4d82fc JJ |
1037 | if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && |
1038 | STI.hasV5TOps()) { | |
223e47cc LB |
1039 | Reg = ARM::PC; |
1040 | LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; | |
1041 | // Fold the return instruction into the LDM. | |
1042 | DeleteRet = true; | |
1043 | } | |
1044 | ||
1045 | // If NoGap is true, pop consecutive registers and then leave the rest | |
1046 | // for other instructions. e.g. | |
1047 | // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} | |
1048 | if (NoGap && LastReg && LastReg != Reg-1) | |
1049 | break; | |
1050 | ||
1051 | LastReg = Reg; | |
1052 | Regs.push_back(Reg); | |
1053 | } | |
1054 | ||
1055 | if (Regs.empty()) | |
1056 | continue; | |
1057 | if (Regs.size() > 1 || LdrOpc == 0) { | |
1058 | MachineInstrBuilder MIB = | |
1059 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) | |
1060 | .addReg(ARM::SP)); | |
1061 | for (unsigned i = 0, e = Regs.size(); i < e; ++i) | |
1062 | MIB.addReg(Regs[i], getDefRegState(true)); | |
1063 | if (DeleteRet) { | |
970d7e83 | 1064 | MIB.copyImplicitOps(&*MI); |
223e47cc LB |
1065 | MI->eraseFromParent(); |
1066 | } | |
1067 | MI = MIB; | |
1068 | } else if (Regs.size() == 1) { | |
1069 | // If we adjusted the reg to PC from LR above, switch it back here. We | |
1070 | // only do that for LDM. | |
1071 | if (Regs[0] == ARM::PC) | |
1072 | Regs[0] = ARM::LR; | |
1073 | MachineInstrBuilder MIB = | |
1074 | BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) | |
1075 | .addReg(ARM::SP, RegState::Define) | |
1076 | .addReg(ARM::SP); | |
1077 | // ARM mode needs an extra reg0 here due to addrmode2. Will go away once | |
1078 | // that refactoring is complete (eventually). | |
1079 | if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) { | |
1080 | MIB.addReg(0); | |
1081 | MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); | |
1082 | } else | |
1083 | MIB.addImm(4); | |
1084 | AddDefaultPred(MIB); | |
1085 | } | |
1086 | Regs.clear(); | |
1a4d82fc JJ |
1087 | |
1088 | // Put any subsequent vpop instructions after this one: they will refer to | |
1089 | // higher register numbers so need to be popped afterwards. | |
1090 | ++MI; | |
223e47cc LB |
1091 | } |
1092 | } | |
1093 | ||
1094 | /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers | |
1095 | /// starting from d8. Also insert stack realignment code and leave the stack | |
1096 | /// pointer pointing to the d8 spill slot. | |
1097 | static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, | |
1098 | MachineBasicBlock::iterator MI, | |
1099 | unsigned NumAlignedDPRCS2Regs, | |
1100 | const std::vector<CalleeSavedInfo> &CSI, | |
1101 | const TargetRegisterInfo *TRI) { | |
1102 | MachineFunction &MF = *MBB.getParent(); | |
1103 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |
1104 | DebugLoc DL = MI->getDebugLoc(); | |
1a4d82fc | 1105 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
223e47cc LB |
1106 | MachineFrameInfo &MFI = *MF.getFrameInfo(); |
1107 | ||
1108 | // Mark the D-register spill slots as properly aligned. Since MFI computes | |
1109 | // stack slot layout backwards, this can actually mean that the d-reg stack | |
1110 | // slot offsets can be wrong. The offset for d8 will always be correct. | |
1111 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { | |
1112 | unsigned DNum = CSI[i].getReg() - ARM::D8; | |
1113 | if (DNum >= 8) | |
1114 | continue; | |
1115 | int FI = CSI[i].getFrameIdx(); | |
1116 | // The even-numbered registers will be 16-byte aligned, the odd-numbered | |
1117 | // registers will be 8-byte aligned. | |
1118 | MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16); | |
1119 | ||
1120 | // The stack slot for D8 needs to be maximally aligned because this is | |
1121 | // actually the point where we align the stack pointer. MachineFrameInfo | |
1122 | // computes all offsets relative to the incoming stack pointer which is a | |
1123 | // bit weird when realigning the stack. Any extra padding for this | |
1124 | // over-alignment is not realized because the code inserted below adjusts | |
1125 | // the stack pointer by numregs * 8 before aligning the stack pointer. | |
1126 | if (DNum == 0) | |
1127 | MFI.setObjectAlignment(FI, MFI.getMaxAlignment()); | |
1128 | } | |
1129 | ||
1130 | // Move the stack pointer to the d8 spill slot, and align it at the same | |
1131 | // time. Leave the stack slot address in the scratch register r4. | |
1132 | // | |
1133 | // sub r4, sp, #numregs * 8 | |
1134 | // bic r4, r4, #align - 1 | |
1135 | // mov sp, r4 | |
1136 | // | |
1137 | bool isThumb = AFI->isThumbFunction(); | |
1138 | assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); | |
1139 | AFI->setShouldRestoreSPFromFP(true); | |
1140 | ||
1141 | // sub r4, sp, #numregs * 8 | |
1142 | // The immediate is <= 64, so it doesn't need any special encoding. | |
1143 | unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; | |
1144 | AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) | |
85aaf69f SL |
1145 | .addReg(ARM::SP) |
1146 | .addImm(8 * NumAlignedDPRCS2Regs))); | |
223e47cc | 1147 | |
223e47cc | 1148 | unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); |
85aaf69f SL |
1149 | // We must set parameter MustBeSingleInstruction to true, since |
1150 | // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform | |
1151 | // stack alignment. Luckily, this can always be done since all ARM | |
1152 | // architecture versions that support Neon also support the BFC | |
1153 | // instruction. | |
1154 | emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true); | |
223e47cc LB |
1155 | |
1156 | // mov sp, r4 | |
1157 | // The stack pointer must be adjusted before spilling anything, otherwise | |
1158 | // the stack slots could be clobbered by an interrupt handler. | |
1159 | // Leave r4 live, it is used below. | |
1160 | Opc = isThumb ? ARM::tMOVr : ARM::MOVr; | |
1161 | MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) | |
1162 | .addReg(ARM::R4); | |
1163 | MIB = AddDefaultPred(MIB); | |
1164 | if (!isThumb) | |
1165 | AddDefaultCC(MIB); | |
1166 | ||
1167 | // Now spill NumAlignedDPRCS2Regs registers starting from d8. | |
1168 | // r4 holds the stack slot address. | |
1169 | unsigned NextReg = ARM::D8; | |
1170 | ||
1171 | // 16-byte aligned vst1.64 with 4 d-regs and address writeback. | |
1172 | // The writeback is only needed when emitting two vst1.64 instructions. | |
1173 | if (NumAlignedDPRCS2Regs >= 6) { | |
1174 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |
1175 | &ARM::QQPRRegClass); | |
1176 | MBB.addLiveIn(SupReg); | |
1177 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), | |
1178 | ARM::R4) | |
1179 | .addReg(ARM::R4, RegState::Kill).addImm(16) | |
1180 | .addReg(NextReg) | |
1181 | .addReg(SupReg, RegState::ImplicitKill)); | |
1182 | NextReg += 4; | |
1183 | NumAlignedDPRCS2Regs -= 4; | |
1184 | } | |
1185 | ||
1186 | // We won't modify r4 beyond this point. It currently points to the next | |
1187 | // register to be spilled. | |
1188 | unsigned R4BaseReg = NextReg; | |
1189 | ||
1190 | // 16-byte aligned vst1.64 with 4 d-regs, no writeback. | |
1191 | if (NumAlignedDPRCS2Regs >= 4) { | |
1192 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |
1193 | &ARM::QQPRRegClass); | |
1194 | MBB.addLiveIn(SupReg); | |
1195 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) | |
1196 | .addReg(ARM::R4).addImm(16).addReg(NextReg) | |
1197 | .addReg(SupReg, RegState::ImplicitKill)); | |
1198 | NextReg += 4; | |
1199 | NumAlignedDPRCS2Regs -= 4; | |
1200 | } | |
1201 | ||
1202 | // 16-byte aligned vst1.64 with 2 d-regs. | |
1203 | if (NumAlignedDPRCS2Regs >= 2) { | |
1204 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |
1205 | &ARM::QPRRegClass); | |
1206 | MBB.addLiveIn(SupReg); | |
1207 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) | |
1208 | .addReg(ARM::R4).addImm(16).addReg(SupReg)); | |
1209 | NextReg += 2; | |
1210 | NumAlignedDPRCS2Regs -= 2; | |
1211 | } | |
1212 | ||
1213 | // Finally, use a vanilla vstr.64 for the odd last register. | |
1214 | if (NumAlignedDPRCS2Regs) { | |
1215 | MBB.addLiveIn(NextReg); | |
1216 | // vstr.64 uses addrmode5 which has an offset scale of 4. | |
1217 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) | |
1218 | .addReg(NextReg) | |
1219 | .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); | |
1220 | } | |
1221 | ||
1222 | // The last spill instruction inserted should kill the scratch register r4. | |
1a4d82fc | 1223 | std::prev(MI)->addRegisterKilled(ARM::R4, TRI); |
223e47cc LB |
1224 | } |
1225 | ||
1226 | /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an | |
1227 | /// iterator to the following instruction. | |
1228 | static MachineBasicBlock::iterator | |
1229 | skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, | |
1230 | unsigned NumAlignedDPRCS2Regs) { | |
1231 | // sub r4, sp, #numregs * 8 | |
1232 | // bic r4, r4, #align - 1 | |
1233 | // mov sp, r4 | |
1234 | ++MI; ++MI; ++MI; | |
1235 | assert(MI->mayStore() && "Expecting spill instruction"); | |
1236 | ||
1237 | // These switches all fall through. | |
1238 | switch(NumAlignedDPRCS2Regs) { | |
1239 | case 7: | |
1240 | ++MI; | |
1241 | assert(MI->mayStore() && "Expecting spill instruction"); | |
1242 | default: | |
1243 | ++MI; | |
1244 | assert(MI->mayStore() && "Expecting spill instruction"); | |
1245 | case 1: | |
1246 | case 2: | |
1247 | case 4: | |
1248 | assert(MI->killsRegister(ARM::R4) && "Missed kill flag"); | |
1249 | ++MI; | |
1250 | } | |
1251 | return MI; | |
1252 | } | |
1253 | ||
1254 | /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers | |
1255 | /// starting from d8. These instructions are assumed to execute while the | |
1256 | /// stack is still aligned, unlike the code inserted by emitPopInst. | |
1257 | static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, | |
1258 | MachineBasicBlock::iterator MI, | |
1259 | unsigned NumAlignedDPRCS2Regs, | |
1260 | const std::vector<CalleeSavedInfo> &CSI, | |
1261 | const TargetRegisterInfo *TRI) { | |
1262 | MachineFunction &MF = *MBB.getParent(); | |
1263 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |
1264 | DebugLoc DL = MI->getDebugLoc(); | |
1a4d82fc | 1265 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
223e47cc LB |
1266 | |
1267 | // Find the frame index assigned to d8. | |
1268 | int D8SpillFI = 0; | |
1269 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) | |
1270 | if (CSI[i].getReg() == ARM::D8) { | |
1271 | D8SpillFI = CSI[i].getFrameIdx(); | |
1272 | break; | |
1273 | } | |
1274 | ||
1275 | // Materialize the address of the d8 spill slot into the scratch register r4. | |
1276 | // This can be fairly complicated if the stack frame is large, so just use | |
1277 | // the normal frame index elimination mechanism to do it. This code runs as | |
1278 | // the initial part of the epilog where the stack and base pointers haven't | |
1279 | // been changed yet. | |
1280 | bool isThumb = AFI->isThumbFunction(); | |
1281 | assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); | |
1282 | ||
1283 | unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; | |
1284 | AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) | |
1285 | .addFrameIndex(D8SpillFI).addImm(0))); | |
1286 | ||
1287 | // Now restore NumAlignedDPRCS2Regs registers starting from d8. | |
1288 | unsigned NextReg = ARM::D8; | |
1289 | ||
1290 | // 16-byte aligned vld1.64 with 4 d-regs and writeback. | |
1291 | if (NumAlignedDPRCS2Regs >= 6) { | |
1292 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |
1293 | &ARM::QQPRRegClass); | |
1294 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) | |
1295 | .addReg(ARM::R4, RegState::Define) | |
1296 | .addReg(ARM::R4, RegState::Kill).addImm(16) | |
1297 | .addReg(SupReg, RegState::ImplicitDefine)); | |
1298 | NextReg += 4; | |
1299 | NumAlignedDPRCS2Regs -= 4; | |
1300 | } | |
1301 | ||
1302 | // We won't modify r4 beyond this point. It currently points to the next | |
1303 | // register to be spilled. | |
1304 | unsigned R4BaseReg = NextReg; | |
1305 | ||
1306 | // 16-byte aligned vld1.64 with 4 d-regs, no writeback. | |
1307 | if (NumAlignedDPRCS2Regs >= 4) { | |
1308 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |
1309 | &ARM::QQPRRegClass); | |
1310 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) | |
1311 | .addReg(ARM::R4).addImm(16) | |
1312 | .addReg(SupReg, RegState::ImplicitDefine)); | |
1313 | NextReg += 4; | |
1314 | NumAlignedDPRCS2Regs -= 4; | |
1315 | } | |
1316 | ||
1317 | // 16-byte aligned vld1.64 with 2 d-regs. | |
1318 | if (NumAlignedDPRCS2Regs >= 2) { | |
1319 | unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, | |
1320 | &ARM::QPRRegClass); | |
1321 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) | |
1322 | .addReg(ARM::R4).addImm(16)); | |
1323 | NextReg += 2; | |
1324 | NumAlignedDPRCS2Regs -= 2; | |
1325 | } | |
1326 | ||
1327 | // Finally, use a vanilla vldr.64 for the remaining odd register. | |
1328 | if (NumAlignedDPRCS2Regs) | |
1329 | AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) | |
1330 | .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); | |
1331 | ||
1332 | // Last store kills r4. | |
1a4d82fc | 1333 | std::prev(MI)->addRegisterKilled(ARM::R4, TRI); |
223e47cc LB |
1334 | } |
1335 | ||
1336 | bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, | |
1337 | MachineBasicBlock::iterator MI, | |
1338 | const std::vector<CalleeSavedInfo> &CSI, | |
1339 | const TargetRegisterInfo *TRI) const { | |
1340 | if (CSI.empty()) | |
1341 | return false; | |
1342 | ||
1343 | MachineFunction &MF = *MBB.getParent(); | |
1344 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |
1345 | ||
1346 | unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; | |
1347 | unsigned PushOneOpc = AFI->isThumbFunction() ? | |
1348 | ARM::t2STR_PRE : ARM::STR_PRE_IMM; | |
1349 | unsigned FltOpc = ARM::VSTMDDB_UPD; | |
1350 | unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); | |
1351 | emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, | |
1352 | MachineInstr::FrameSetup); | |
1353 | emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, | |
1354 | MachineInstr::FrameSetup); | |
1355 | emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, | |
1356 | NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); | |
1357 | ||
1358 | // The code above does not insert spill code for the aligned DPRCS2 registers. | |
1359 | // The stack realignment code will be inserted between the push instructions | |
1360 | // and these spills. | |
1361 | if (NumAlignedDPRCS2Regs) | |
1362 | emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); | |
1363 | ||
1364 | return true; | |
1365 | } | |
1366 | ||
1367 | bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, | |
1368 | MachineBasicBlock::iterator MI, | |
1369 | const std::vector<CalleeSavedInfo> &CSI, | |
1370 | const TargetRegisterInfo *TRI) const { | |
1371 | if (CSI.empty()) | |
1372 | return false; | |
1373 | ||
1374 | MachineFunction &MF = *MBB.getParent(); | |
1375 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |
1a4d82fc | 1376 | bool isVarArg = AFI->getArgRegsSaveSize() > 0; |
223e47cc LB |
1377 | unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); |
1378 | ||
1379 | // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 | |
1380 | // registers. Do that here instead. | |
1381 | if (NumAlignedDPRCS2Regs) | |
1382 | emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); | |
1383 | ||
1384 | unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; | |
1385 | unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; | |
1386 | unsigned FltOpc = ARM::VLDMDIA_UPD; | |
1387 | emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, | |
1388 | NumAlignedDPRCS2Regs); | |
1389 | emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, | |
1390 | &isARMArea2Register, 0); | |
1391 | emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, | |
1392 | &isARMArea1Register, 0); | |
1393 | ||
1394 | return true; | |
1395 | } | |
1396 | ||
1397 | // FIXME: Make generic? | |
1398 | static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, | |
1399 | const ARMBaseInstrInfo &TII) { | |
1400 | unsigned FnSize = 0; | |
1a4d82fc JJ |
1401 | for (auto &MBB : MF) { |
1402 | for (auto &MI : MBB) | |
1403 | FnSize += TII.GetInstSizeInBytes(&MI); | |
223e47cc LB |
1404 | } |
1405 | return FnSize; | |
1406 | } | |
1407 | ||
223e47cc LB |
1408 | /// estimateRSStackSizeLimit - Look at each instruction that references stack |
1409 | /// frames and return the stack size limit beyond which some of these | |
1410 | /// instructions will require a scratch register during their expansion later. | |
1411 | // FIXME: Move to TII? | |
1412 | static unsigned estimateRSStackSizeLimit(MachineFunction &MF, | |
1413 | const TargetFrameLowering *TFI) { | |
1414 | const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |
1415 | unsigned Limit = (1 << 12) - 1; | |
1a4d82fc JJ |
1416 | for (auto &MBB : MF) { |
1417 | for (auto &MI : MBB) { | |
1418 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { | |
1419 | if (!MI.getOperand(i).isFI()) | |
1420 | continue; | |
223e47cc LB |
1421 | |
1422 | // When using ADDri to get the address of a stack object, 255 is the | |
1423 | // largest offset guaranteed to fit in the immediate offset. | |
1a4d82fc | 1424 | if (MI.getOpcode() == ARM::ADDri) { |
223e47cc LB |
1425 | Limit = std::min(Limit, (1U << 8) - 1); |
1426 | break; | |
1427 | } | |
1428 | ||
1429 | // Otherwise check the addressing mode. | |
1a4d82fc | 1430 | switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { |
223e47cc LB |
1431 | case ARMII::AddrMode3: |
1432 | case ARMII::AddrModeT2_i8: | |
1433 | Limit = std::min(Limit, (1U << 8) - 1); | |
1434 | break; | |
1435 | case ARMII::AddrMode5: | |
1436 | case ARMII::AddrModeT2_i8s4: | |
1437 | Limit = std::min(Limit, ((1U << 8) - 1) * 4); | |
1438 | break; | |
1439 | case ARMII::AddrModeT2_i12: | |
1440 | // i12 supports only positive offset so these will be converted to | |
1441 | // i8 opcodes. See llvm::rewriteT2FrameIndex. | |
1442 | if (TFI->hasFP(MF) && AFI->hasStackFrame()) | |
1443 | Limit = std::min(Limit, (1U << 8) - 1); | |
1444 | break; | |
1445 | case ARMII::AddrMode4: | |
1446 | case ARMII::AddrMode6: | |
1447 | // Addressing modes 4 & 6 (load/store) instructions can't encode an | |
1448 | // immediate offset for stack references. | |
1449 | return 0; | |
1450 | default: | |
1451 | break; | |
1452 | } | |
1453 | break; // At most one FI per instruction | |
1454 | } | |
1455 | } | |
1456 | } | |
1457 | ||
1458 | return Limit; | |
1459 | } | |
1460 | ||
1461 | // In functions that realign the stack, it can be an advantage to spill the | |
1462 | // callee-saved vector registers after realigning the stack. The vst1 and vld1 | |
1463 | // instructions take alignment hints that can improve performance. | |
1464 | // | |
1465 | static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { | |
1466 | MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); | |
1467 | if (!SpillAlignedNEONRegs) | |
1468 | return; | |
1469 | ||
1470 | // Naked functions don't spill callee-saved registers. | |
970d7e83 LB |
1471 | if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, |
1472 | Attribute::Naked)) | |
223e47cc LB |
1473 | return; |
1474 | ||
1475 | // We are planning to use NEON instructions vst1 / vld1. | |
1476 | if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON()) | |
1477 | return; | |
1478 | ||
1479 | // Don't bother if the default stack alignment is sufficiently high. | |
1a4d82fc JJ |
1480 | if (MF.getTarget() |
1481 | .getSubtargetImpl() | |
1482 | ->getFrameLowering() | |
1483 | ->getStackAlignment() >= 8) | |
223e47cc LB |
1484 | return; |
1485 | ||
1486 | // Aligned spills require stack realignment. | |
1a4d82fc JJ |
1487 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
1488 | MF.getSubtarget().getRegisterInfo()); | |
223e47cc LB |
1489 | if (!RegInfo->canRealignStack(MF)) |
1490 | return; | |
1491 | ||
1492 | // We always spill contiguous d-registers starting from d8. Count how many | |
1493 | // needs spilling. The register allocator will almost always use the | |
1494 | // callee-saved registers in order, but it can happen that there are holes in | |
1495 | // the range. Registers above the hole will be spilled to the standard DPRCS | |
1496 | // area. | |
1497 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |
1498 | unsigned NumSpills = 0; | |
1499 | for (; NumSpills < 8; ++NumSpills) | |
970d7e83 | 1500 | if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills)) |
223e47cc LB |
1501 | break; |
1502 | ||
1503 | // Don't do this for just one d-register. It's not worth it. | |
1504 | if (NumSpills < 2) | |
1505 | return; | |
1506 | ||
1507 | // Spill the first NumSpills D-registers after realigning the stack. | |
1508 | MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); | |
1509 | ||
1510 | // A scratch register is required for the vst1 / vld1 instructions. | |
1511 | MF.getRegInfo().setPhysRegUsed(ARM::R4); | |
1512 | } | |
1513 | ||
1514 | void | |
1515 | ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, | |
1516 | RegScavenger *RS) const { | |
1517 | // This tells PEI to spill the FP as if it is any other callee-save register | |
1518 | // to take advantage the eliminateFrameIndex machinery. This also ensures it | |
1519 | // is spilled in the order specified by getCalleeSavedRegs() to make it easier | |
1520 | // to combine multiple loads / stores. | |
1521 | bool CanEliminateFrame = true; | |
1522 | bool CS1Spilled = false; | |
1523 | bool LRSpilled = false; | |
1524 | unsigned NumGPRSpills = 0; | |
1525 | SmallVector<unsigned, 4> UnspilledCS1GPRs; | |
1526 | SmallVector<unsigned, 4> UnspilledCS2GPRs; | |
1a4d82fc JJ |
1527 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
1528 | MF.getSubtarget().getRegisterInfo()); | |
223e47cc | 1529 | const ARMBaseInstrInfo &TII = |
1a4d82fc | 1530 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
223e47cc LB |
1531 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
1532 | MachineFrameInfo *MFI = MF.getFrameInfo(); | |
970d7e83 | 1533 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
223e47cc LB |
1534 | unsigned FramePtr = RegInfo->getFrameRegister(MF); |
1535 | ||
1536 | // Spill R4 if Thumb2 function requires stack realignment - it will be used as | |
1537 | // scratch register. Also spill R4 if Thumb2 function has varsized objects, | |
1538 | // since it's not always possible to restore sp from fp in a single | |
1539 | // instruction. | |
1540 | // FIXME: It will be better just to find spare register here. | |
1541 | if (AFI->isThumb2Function() && | |
1542 | (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) | |
970d7e83 | 1543 | MRI.setPhysRegUsed(ARM::R4); |
223e47cc LB |
1544 | |
1545 | if (AFI->isThumb1OnlyFunction()) { | |
1546 | // Spill LR if Thumb1 function uses variable length argument lists. | |
1a4d82fc | 1547 | if (AFI->getArgRegsSaveSize() > 0) |
970d7e83 | 1548 | MRI.setPhysRegUsed(ARM::LR); |
223e47cc LB |
1549 | |
1550 | // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know | |
1551 | // for sure what the stack size will be, but for this, an estimate is good | |
1552 | // enough. If there anything changes it, it'll be a spill, which implies | |
1553 | // we've used all the registers and so R4 is already used, so not marking | |
1554 | // it here will be OK. | |
1555 | // FIXME: It will be better just to find spare register here. | |
970d7e83 | 1556 | unsigned StackSize = MFI->estimateStackSize(MF); |
223e47cc | 1557 | if (MFI->hasVarSizedObjects() || StackSize > 508) |
970d7e83 | 1558 | MRI.setPhysRegUsed(ARM::R4); |
223e47cc LB |
1559 | } |
1560 | ||
1561 | // See if we can spill vector registers to aligned stack. | |
1562 | checkNumAlignedDPRCS2Regs(MF); | |
1563 | ||
1564 | // Spill the BasePtr if it's used. | |
1565 | if (RegInfo->hasBasePointer(MF)) | |
970d7e83 | 1566 | MRI.setPhysRegUsed(RegInfo->getBaseRegister()); |
223e47cc LB |
1567 | |
1568 | // Don't spill FP if the frame can be eliminated. This is determined | |
1569 | // by scanning the callee-save registers to see if any is used. | |
1a4d82fc | 1570 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); |
223e47cc LB |
1571 | for (unsigned i = 0; CSRegs[i]; ++i) { |
1572 | unsigned Reg = CSRegs[i]; | |
1573 | bool Spilled = false; | |
970d7e83 | 1574 | if (MRI.isPhysRegUsed(Reg)) { |
223e47cc LB |
1575 | Spilled = true; |
1576 | CanEliminateFrame = false; | |
1577 | } | |
1578 | ||
1579 | if (!ARM::GPRRegClass.contains(Reg)) | |
1580 | continue; | |
1581 | ||
1582 | if (Spilled) { | |
1583 | NumGPRSpills++; | |
1584 | ||
1a4d82fc | 1585 | if (!STI.isTargetDarwin()) { |
223e47cc LB |
1586 | if (Reg == ARM::LR) |
1587 | LRSpilled = true; | |
1588 | CS1Spilled = true; | |
1589 | continue; | |
1590 | } | |
1591 | ||
1592 | // Keep track if LR and any of R4, R5, R6, and R7 is spilled. | |
1593 | switch (Reg) { | |
1594 | case ARM::LR: | |
1595 | LRSpilled = true; | |
1596 | // Fallthrough | |
1a4d82fc JJ |
1597 | case ARM::R0: case ARM::R1: |
1598 | case ARM::R2: case ARM::R3: | |
223e47cc LB |
1599 | case ARM::R4: case ARM::R5: |
1600 | case ARM::R6: case ARM::R7: | |
1601 | CS1Spilled = true; | |
1602 | break; | |
1603 | default: | |
1604 | break; | |
1605 | } | |
1606 | } else { | |
1a4d82fc | 1607 | if (!STI.isTargetDarwin()) { |
223e47cc LB |
1608 | UnspilledCS1GPRs.push_back(Reg); |
1609 | continue; | |
1610 | } | |
1611 | ||
1612 | switch (Reg) { | |
1a4d82fc JJ |
1613 | case ARM::R0: case ARM::R1: |
1614 | case ARM::R2: case ARM::R3: | |
223e47cc LB |
1615 | case ARM::R4: case ARM::R5: |
1616 | case ARM::R6: case ARM::R7: | |
1617 | case ARM::LR: | |
1618 | UnspilledCS1GPRs.push_back(Reg); | |
1619 | break; | |
1620 | default: | |
1621 | UnspilledCS2GPRs.push_back(Reg); | |
1622 | break; | |
1623 | } | |
1624 | } | |
1625 | } | |
1626 | ||
1627 | bool ForceLRSpill = false; | |
1628 | if (!LRSpilled && AFI->isThumb1OnlyFunction()) { | |
1629 | unsigned FnSize = GetFunctionSizeInBytes(MF, TII); | |
1630 | // Force LR to be spilled if the Thumb function size is > 2048. This enables | |
1631 | // use of BL to implement far jump. If it turns out that it's not needed | |
1632 | // then the branch fix up path will undo it. | |
1633 | if (FnSize >= (1 << 11)) { | |
1634 | CanEliminateFrame = false; | |
1635 | ForceLRSpill = true; | |
1636 | } | |
1637 | } | |
1638 | ||
1639 | // If any of the stack slot references may be out of range of an immediate | |
1640 | // offset, make sure a register (or a spill slot) is available for the | |
1641 | // register scavenger. Note that if we're indexing off the frame pointer, the | |
1642 | // effective stack size is 4 bytes larger since the FP points to the stack | |
1643 | // slot of the previous FP. Also, if we have variable sized objects in the | |
1644 | // function, stack slot references will often be negative, and some of | |
1645 | // our instructions are positive-offset only, so conservatively consider | |
1646 | // that case to want a spill slot (or register) as well. Similarly, if | |
1647 | // the function adjusts the stack pointer during execution and the | |
1648 | // adjustments aren't already part of our stack size estimate, our offset | |
1649 | // calculations may be off, so be conservative. | |
1650 | // FIXME: We could add logic to be more precise about negative offsets | |
1651 | // and which instructions will need a scratch register for them. Is it | |
1652 | // worth the effort and added fragility? | |
1653 | bool BigStack = | |
1654 | (RS && | |
970d7e83 LB |
1655 | (MFI->estimateStackSize(MF) + |
1656 | ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= | |
223e47cc LB |
1657 | estimateRSStackSizeLimit(MF, this))) |
1658 | || MFI->hasVarSizedObjects() | |
1659 | || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); | |
1660 | ||
1661 | bool ExtraCSSpill = false; | |
1662 | if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { | |
1663 | AFI->setHasStackFrame(true); | |
1664 | ||
1665 | // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. | |
1666 | // Spill LR as well so we can fold BX_RET to the registers restore (LDM). | |
1667 | if (!LRSpilled && CS1Spilled) { | |
970d7e83 | 1668 | MRI.setPhysRegUsed(ARM::LR); |
223e47cc | 1669 | NumGPRSpills++; |
1a4d82fc JJ |
1670 | SmallVectorImpl<unsigned>::iterator LRPos; |
1671 | LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), | |
1672 | (unsigned)ARM::LR); | |
1673 | if (LRPos != UnspilledCS1GPRs.end()) | |
1674 | UnspilledCS1GPRs.erase(LRPos); | |
1675 | ||
223e47cc LB |
1676 | ForceLRSpill = false; |
1677 | ExtraCSSpill = true; | |
1678 | } | |
1679 | ||
1680 | if (hasFP(MF)) { | |
970d7e83 | 1681 | MRI.setPhysRegUsed(FramePtr); |
1a4d82fc JJ |
1682 | auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), |
1683 | FramePtr); | |
1684 | if (FPPos != UnspilledCS1GPRs.end()) | |
1685 | UnspilledCS1GPRs.erase(FPPos); | |
223e47cc LB |
1686 | NumGPRSpills++; |
1687 | } | |
1688 | ||
1689 | // If stack and double are 8-byte aligned and we are spilling an odd number | |
1690 | // of GPRs, spill one extra callee save GPR so we won't have to pad between | |
1691 | // the integer and double callee save areas. | |
1692 | unsigned TargetAlign = getStackAlignment(); | |
85aaf69f | 1693 | if (TargetAlign >= 8 && (NumGPRSpills & 1)) { |
223e47cc LB |
1694 | if (CS1Spilled && !UnspilledCS1GPRs.empty()) { |
1695 | for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { | |
1696 | unsigned Reg = UnspilledCS1GPRs[i]; | |
1697 | // Don't spill high register if the function is thumb1 | |
1698 | if (!AFI->isThumb1OnlyFunction() || | |
1699 | isARMLowRegister(Reg) || Reg == ARM::LR) { | |
970d7e83 LB |
1700 | MRI.setPhysRegUsed(Reg); |
1701 | if (!MRI.isReserved(Reg)) | |
223e47cc LB |
1702 | ExtraCSSpill = true; |
1703 | break; | |
1704 | } | |
1705 | } | |
1706 | } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { | |
1707 | unsigned Reg = UnspilledCS2GPRs.front(); | |
970d7e83 LB |
1708 | MRI.setPhysRegUsed(Reg); |
1709 | if (!MRI.isReserved(Reg)) | |
223e47cc LB |
1710 | ExtraCSSpill = true; |
1711 | } | |
1712 | } | |
1713 | ||
1714 | // Estimate if we might need to scavenge a register at some point in order | |
1715 | // to materialize a stack offset. If so, either spill one additional | |
1716 | // callee-saved register or reserve a special spill slot to facilitate | |
1717 | // register scavenging. Thumb1 needs a spill slot for stack pointer | |
1718 | // adjustments also, even when the frame itself is small. | |
1719 | if (BigStack && !ExtraCSSpill) { | |
1720 | // If any non-reserved CS register isn't spilled, just spill one or two | |
1721 | // extra. That should take care of it! | |
1722 | unsigned NumExtras = TargetAlign / 4; | |
1723 | SmallVector<unsigned, 2> Extras; | |
1724 | while (NumExtras && !UnspilledCS1GPRs.empty()) { | |
1725 | unsigned Reg = UnspilledCS1GPRs.back(); | |
1726 | UnspilledCS1GPRs.pop_back(); | |
970d7e83 | 1727 | if (!MRI.isReserved(Reg) && |
223e47cc LB |
1728 | (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || |
1729 | Reg == ARM::LR)) { | |
1730 | Extras.push_back(Reg); | |
1731 | NumExtras--; | |
1732 | } | |
1733 | } | |
1734 | // For non-Thumb1 functions, also check for hi-reg CS registers | |
1735 | if (!AFI->isThumb1OnlyFunction()) { | |
1736 | while (NumExtras && !UnspilledCS2GPRs.empty()) { | |
1737 | unsigned Reg = UnspilledCS2GPRs.back(); | |
1738 | UnspilledCS2GPRs.pop_back(); | |
970d7e83 | 1739 | if (!MRI.isReserved(Reg)) { |
223e47cc LB |
1740 | Extras.push_back(Reg); |
1741 | NumExtras--; | |
1742 | } | |
1743 | } | |
1744 | } | |
1745 | if (Extras.size() && NumExtras == 0) { | |
1746 | for (unsigned i = 0, e = Extras.size(); i != e; ++i) { | |
970d7e83 | 1747 | MRI.setPhysRegUsed(Extras[i]); |
223e47cc LB |
1748 | } |
1749 | } else if (!AFI->isThumb1OnlyFunction()) { | |
1750 | // note: Thumb1 functions spill to R12, not the stack. Reserve a slot | |
1751 | // closest to SP or frame pointer. | |
1752 | const TargetRegisterClass *RC = &ARM::GPRRegClass; | |
1a4d82fc | 1753 | RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), |
223e47cc LB |
1754 | RC->getAlignment(), |
1755 | false)); | |
1756 | } | |
1757 | } | |
1758 | } | |
1759 | ||
1760 | if (ForceLRSpill) { | |
970d7e83 | 1761 | MRI.setPhysRegUsed(ARM::LR); |
223e47cc LB |
1762 | AFI->setLRIsSpilledForFarJump(true); |
1763 | } | |
1764 | } | |
970d7e83 LB |
1765 | |
1766 | ||
1767 | void ARMFrameLowering:: | |
1768 | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, | |
1769 | MachineBasicBlock::iterator I) const { | |
1770 | const ARMBaseInstrInfo &TII = | |
1a4d82fc | 1771 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
970d7e83 LB |
1772 | if (!hasReservedCallFrame(MF)) { |
1773 | // If we have alloca, convert as follows: | |
1774 | // ADJCALLSTACKDOWN -> sub, sp, sp, amount | |
1775 | // ADJCALLSTACKUP -> add, sp, sp, amount | |
1776 | MachineInstr *Old = I; | |
1777 | DebugLoc dl = Old->getDebugLoc(); | |
1778 | unsigned Amount = Old->getOperand(0).getImm(); | |
1779 | if (Amount != 0) { | |
1780 | // We need to keep the stack aligned properly. To do this, we round the | |
1781 | // amount of space needed for the outgoing arguments up to the next | |
1782 | // alignment boundary. | |
1783 | unsigned Align = getStackAlignment(); | |
1784 | Amount = (Amount+Align-1)/Align*Align; | |
1785 | ||
1786 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |
1787 | assert(!AFI->isThumb1OnlyFunction() && | |
1788 | "This eliminateCallFramePseudoInstr does not support Thumb1!"); | |
1789 | bool isARM = !AFI->isThumbFunction(); | |
1790 | ||
1791 | // Replace the pseudo instruction with a new instruction... | |
1792 | unsigned Opc = Old->getOpcode(); | |
1793 | int PIdx = Old->findFirstPredOperandIdx(); | |
1794 | ARMCC::CondCodes Pred = (PIdx == -1) | |
1795 | ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); | |
1796 | if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { | |
1797 | // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. | |
1798 | unsigned PredReg = Old->getOperand(2).getReg(); | |
1799 | emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, | |
1800 | Pred, PredReg); | |
1801 | } else { | |
1802 | // Note: PredReg is operand 3 for ADJCALLSTACKUP. | |
1803 | unsigned PredReg = Old->getOperand(3).getReg(); | |
1804 | assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); | |
1805 | emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, | |
1806 | Pred, PredReg); | |
1807 | } | |
1808 | } | |
1809 | } | |
1810 | MBB.erase(I); | |
1811 | } | |
1812 | ||
1a4d82fc JJ |
1813 | /// Get the minimum constant for ARM that is greater than or equal to the |
1814 | /// argument. In ARM, constants can have any value that can be produced by | |
1815 | /// rotating an 8-bit value to the right by an even number of bits within a | |
1816 | /// 32-bit word. | |
1817 | static uint32_t alignToARMConstant(uint32_t Value) { | |
970d7e83 LB |
1818 | unsigned Shifted = 0; |
1819 | ||
1820 | if (Value == 0) | |
1821 | return 0; | |
1822 | ||
1823 | while (!(Value & 0xC0000000)) { | |
1824 | Value = Value << 2; | |
1825 | Shifted += 2; | |
1826 | } | |
1827 | ||
1828 | bool Carry = (Value & 0x00FFFFFF); | |
1829 | Value = ((Value & 0xFF000000) >> 24) + Carry; | |
1830 | ||
1831 | if (Value & 0x0000100) | |
1832 | Value = Value & 0x000001FC; | |
1833 | ||
1834 | if (Shifted > 24) | |
1835 | Value = Value >> (Shifted - 24); | |
1a4d82fc | 1836 | else |
970d7e83 LB |
1837 | Value = Value << (24 - Shifted); |
1838 | ||
1839 | return Value; | |
1840 | } | |
1841 | ||
1a4d82fc | 1842 | // The stack limit in the TCB is set to this many bytes above the actual |
970d7e83 LB |
1843 | // stack limit. |
1844 | static const uint64_t kSplitStackAvailable = 256; | |
1845 | ||
1a4d82fc JJ |
1846 | // Adjust the function prologue to enable split stacks. This currently only |
1847 | // supports android and linux. | |
1848 | // | |
1849 | // The ABI of the segmented stack prologue is a little arbitrarily chosen, but | |
1850 | // must be well defined in order to allow for consistent implementations of the | |
1851 | // __morestack helper function. The ABI is also not a normal ABI in that it | |
1852 | // doesn't follow the normal calling conventions because this allows the | |
1853 | // prologue of each function to be optimized further. | |
1854 | // | |
1855 | // Currently, the ABI looks like (when calling __morestack) | |
1856 | // | |
1857 | // * r4 holds the minimum stack size requested for this function call | |
1858 | // * r5 holds the stack size of the arguments to the function | |
1859 | // * the beginning of the function is 3 instructions after the call to | |
1860 | // __morestack | |
1861 | // | |
1862 | // Implementations of __morestack should use r4 to allocate a new stack, r5 to | |
1863 | // place the arguments on to the new stack, and the 3-instruction knowledge to | |
1864 | // jump directly to the body of the function when working on the new stack. | |
1865 | // | |
1866 | // An old (and possibly no longer compatible) implementation of __morestack for | |
1867 | // ARM can be found at [1]. | |
1868 | // | |
1869 | // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S | |
1870 | void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { | |
1871 | unsigned Opcode; | |
1872 | unsigned CFIIndex; | |
970d7e83 | 1873 | const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>(); |
1a4d82fc | 1874 | bool Thumb = ST->isThumb(); |
970d7e83 | 1875 | |
1a4d82fc JJ |
1876 | // Sadly, this currently doesn't support varargs, platforms other than |
1877 | // android/linux. Note that thumb1/thumb2 are support for android/linux. | |
970d7e83 LB |
1878 | if (MF.getFunction()->isVarArg()) |
1879 | report_fatal_error("Segmented stacks do not support vararg functions."); | |
1a4d82fc JJ |
1880 | if (!ST->isTargetAndroid() && !ST->isTargetLinux()) |
1881 | report_fatal_error("Segmented stacks not supported on this platform."); | |
1882 | ||
970d7e83 | 1883 | MachineBasicBlock &prologueMBB = MF.front(); |
1a4d82fc JJ |
1884 | MachineFrameInfo *MFI = MF.getFrameInfo(); |
1885 | MachineModuleInfo &MMI = MF.getMMI(); | |
1886 | MCContext &Context = MMI.getContext(); | |
1887 | const MCRegisterInfo *MRI = Context.getRegisterInfo(); | |
1888 | const ARMBaseInstrInfo &TII = | |
1889 | *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |
1890 | ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); | |
970d7e83 LB |
1891 | DebugLoc DL; |
1892 | ||
1a4d82fc JJ |
1893 | uint64_t StackSize = MFI->getStackSize(); |
1894 | ||
1895 | // Do not generate a prologue for functions with a stack of size zero | |
1896 | if (StackSize == 0) | |
1897 | return; | |
1898 | ||
1899 | // Use R4 and R5 as scratch registers. | |
1900 | // We save R4 and R5 before use and restore them before leaving the function. | |
970d7e83 LB |
1901 | unsigned ScratchReg0 = ARM::R4; |
1902 | unsigned ScratchReg1 = ARM::R5; | |
970d7e83 LB |
1903 | uint64_t AlignedStackSize; |
1904 | ||
1a4d82fc JJ |
1905 | MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock(); |
1906 | MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock(); | |
1907 | MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock(); | |
1908 | MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock(); | |
1909 | MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock(); | |
970d7e83 LB |
1910 | |
1911 | for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), | |
1a4d82fc JJ |
1912 | e = prologueMBB.livein_end(); |
1913 | i != e; ++i) { | |
1914 | AllocMBB->addLiveIn(*i); | |
1915 | GetMBB->addLiveIn(*i); | |
1916 | McrMBB->addLiveIn(*i); | |
1917 | PrevStackMBB->addLiveIn(*i); | |
1918 | PostStackMBB->addLiveIn(*i); | |
970d7e83 LB |
1919 | } |
1920 | ||
1a4d82fc JJ |
1921 | MF.push_front(PostStackMBB); |
1922 | MF.push_front(AllocMBB); | |
1923 | MF.push_front(GetMBB); | |
1924 | MF.push_front(McrMBB); | |
1925 | MF.push_front(PrevStackMBB); | |
970d7e83 | 1926 | |
1a4d82fc JJ |
1927 | // The required stack size that is aligned to ARM constant criterion. |
1928 | AlignedStackSize = alignToARMConstant(StackSize); | |
970d7e83 LB |
1929 | |
1930 | // When the frame size is less than 256 we just compare the stack | |
1931 | // boundary directly to the value of the stack pointer, per gcc. | |
1932 | bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable; | |
1933 | ||
1a4d82fc JJ |
1934 | // We will use two of the callee save registers as scratch registers so we |
1935 | // need to save those registers onto the stack. | |
1936 | // We will use SR0 to hold stack limit and SR1 to hold the stack size | |
1937 | // requested and arguments for __morestack(). | |
970d7e83 LB |
1938 | // SR0: Scratch Register #0 |
1939 | // SR1: Scratch Register #1 | |
1940 | // push {SR0, SR1} | |
1a4d82fc JJ |
1941 | if (Thumb) { |
1942 | AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))) | |
1943 | .addReg(ScratchReg0).addReg(ScratchReg1); | |
970d7e83 | 1944 | } else { |
1a4d82fc JJ |
1945 | AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) |
1946 | .addReg(ARM::SP, RegState::Define).addReg(ARM::SP)) | |
1947 | .addReg(ScratchReg0).addReg(ScratchReg1); | |
1948 | } | |
1949 | ||
1950 | // Emit the relevant DWARF information about the change in stack pointer as | |
1951 | // well as where to find both r4 and r5 (the callee-save registers) | |
1952 | CFIIndex = | |
1953 | MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8)); | |
1954 | BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
1955 | .addCFIIndex(CFIIndex); | |
1956 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |
1957 | nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); | |
1958 | BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
1959 | .addCFIIndex(CFIIndex); | |
1960 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |
1961 | nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); | |
1962 | BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
1963 | .addCFIIndex(CFIIndex); | |
1964 | ||
1965 | // mov SR1, sp | |
1966 | if (Thumb) { | |
1967 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) | |
1968 | .addReg(ARM::SP)); | |
1969 | } else if (CompareStackPointer) { | |
1970 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) | |
1971 | .addReg(ARM::SP)).addReg(0); | |
1972 | } | |
1973 | ||
1974 | // sub SR1, sp, #StackSize | |
1975 | if (!CompareStackPointer && Thumb) { | |
1976 | AddDefaultPred( | |
1977 | AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)) | |
1978 | .addReg(ScratchReg1).addImm(AlignedStackSize)); | |
1979 | } else if (!CompareStackPointer) { | |
1980 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) | |
1981 | .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0); | |
1982 | } | |
1983 | ||
1984 | if (Thumb && ST->isThumb1Only()) { | |
1985 | unsigned PCLabelId = ARMFI->createPICLabelUId(); | |
1986 | ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create( | |
1987 | MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0); | |
1988 | MachineConstantPool *MCP = MF.getConstantPool(); | |
1989 | unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment()); | |
1990 | ||
1991 | // ldr SR0, [pc, offset(STACK_LIMIT)] | |
1992 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) | |
1993 | .addConstantPoolIndex(CPI)); | |
1994 | ||
1995 | // ldr SR0, [SR0] | |
1996 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) | |
1997 | .addReg(ScratchReg0).addImm(0)); | |
1998 | } else { | |
1999 | // Get TLS base address from the coprocessor | |
2000 | // mrc p15, #0, SR0, c13, c0, #3 | |
2001 | AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) | |
2002 | .addImm(15) | |
2003 | .addImm(0) | |
2004 | .addImm(13) | |
2005 | .addImm(0) | |
2006 | .addImm(3)); | |
2007 | ||
2008 | // Use the last tls slot on android and a private field of the TCP on linux. | |
2009 | assert(ST->isTargetAndroid() || ST->isTargetLinux()); | |
2010 | unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1; | |
2011 | ||
2012 | // Get the stack limit from the right offset | |
2013 | // ldr SR0, [sr0, #4 * TlsOffset] | |
2014 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) | |
2015 | .addReg(ScratchReg0).addImm(4 * TlsOffset)); | |
970d7e83 | 2016 | } |
970d7e83 LB |
2017 | |
2018 | // Compare stack limit with stack size requested. | |
2019 | // cmp SR0, SR1 | |
1a4d82fc JJ |
2020 | Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr; |
2021 | AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode)) | |
2022 | .addReg(ScratchReg0) | |
2023 | .addReg(ScratchReg1)); | |
970d7e83 LB |
2024 | |
2025 | // This jump is taken if StackLimit < SP - stack required. | |
1a4d82fc JJ |
2026 | Opcode = Thumb ? ARM::tBcc : ARM::Bcc; |
2027 | BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB) | |
2028 | .addImm(ARMCC::LO) | |
2029 | .addReg(ARM::CPSR); | |
970d7e83 LB |
2030 | |
2031 | ||
2032 | // Calling __morestack(StackSize, Size of stack arguments). | |
2033 | // __morestack knows that the stack size requested is in SR0(r4) | |
2034 | // and amount size of stack arguments is in SR1(r5). | |
2035 | ||
2036 | // Pass first argument for the __morestack by Scratch Register #0. | |
2037 | // The amount size of stack required | |
1a4d82fc JJ |
2038 | if (Thumb) { |
2039 | AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), | |
2040 | ScratchReg0)).addImm(AlignedStackSize)); | |
2041 | } else { | |
2042 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) | |
2043 | .addImm(AlignedStackSize)).addReg(0); | |
2044 | } | |
970d7e83 LB |
2045 | // Pass second argument for the __morestack by Scratch Register #1. |
2046 | // The amount size of stack consumed to save function arguments. | |
1a4d82fc JJ |
2047 | if (Thumb) { |
2048 | AddDefaultPred( | |
2049 | AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)) | |
2050 | .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))); | |
2051 | } else { | |
2052 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) | |
2053 | .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))) | |
2054 | .addReg(0); | |
2055 | } | |
970d7e83 LB |
2056 | |
2057 | // push {lr} - Save return address of this function. | |
1a4d82fc JJ |
2058 | if (Thumb) { |
2059 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))) | |
2060 | .addReg(ARM::LR); | |
2061 | } else { | |
2062 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) | |
2063 | .addReg(ARM::SP, RegState::Define) | |
2064 | .addReg(ARM::SP)) | |
2065 | .addReg(ARM::LR); | |
2066 | } | |
2067 | ||
2068 | // Emit the DWARF info about the change in stack as well as where to find the | |
2069 | // previous link register | |
2070 | CFIIndex = | |
2071 | MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12)); | |
2072 | BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
2073 | .addCFIIndex(CFIIndex); | |
2074 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( | |
2075 | nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12)); | |
2076 | BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
2077 | .addCFIIndex(CFIIndex); | |
970d7e83 LB |
2078 | |
2079 | // Call __morestack(). | |
1a4d82fc JJ |
2080 | if (Thumb) { |
2081 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL))) | |
2082 | .addExternalSymbol("__morestack"); | |
2083 | } else { | |
2084 | BuildMI(AllocMBB, DL, TII.get(ARM::BL)) | |
2085 | .addExternalSymbol("__morestack"); | |
2086 | } | |
970d7e83 | 2087 | |
1a4d82fc JJ |
2088 | // pop {lr} - Restore return address of this original function. |
2089 | if (Thumb) { | |
2090 | if (ST->isThumb1Only()) { | |
2091 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) | |
2092 | .addReg(ScratchReg0); | |
2093 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) | |
2094 | .addReg(ScratchReg0)); | |
2095 | } else { | |
2096 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) | |
2097 | .addReg(ARM::LR, RegState::Define) | |
2098 | .addReg(ARM::SP, RegState::Define) | |
2099 | .addReg(ARM::SP) | |
2100 | .addImm(4)); | |
2101 | } | |
2102 | } else { | |
2103 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) | |
2104 | .addReg(ARM::SP, RegState::Define) | |
2105 | .addReg(ARM::SP)) | |
2106 | .addReg(ARM::LR); | |
2107 | } | |
970d7e83 LB |
2108 | |
2109 | // Restore SR0 and SR1 in case of __morestack() was called. | |
1a4d82fc | 2110 | // __morestack() will skip PostStackMBB block so we need to restore |
970d7e83 LB |
2111 | // scratch registers from here. |
2112 | // pop {SR0, SR1} | |
1a4d82fc JJ |
2113 | if (Thumb) { |
2114 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) | |
2115 | .addReg(ScratchReg0) | |
2116 | .addReg(ScratchReg1); | |
2117 | } else { | |
2118 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) | |
2119 | .addReg(ARM::SP, RegState::Define) | |
2120 | .addReg(ARM::SP)) | |
2121 | .addReg(ScratchReg0) | |
2122 | .addReg(ScratchReg1); | |
2123 | } | |
2124 | ||
2125 | // Update the CFA offset now that we've popped | |
2126 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); | |
2127 | BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
2128 | .addCFIIndex(CFIIndex); | |
970d7e83 | 2129 | |
1a4d82fc JJ |
2130 | // bx lr - Return from this function. |
2131 | Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; | |
2132 | AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode))); | |
970d7e83 LB |
2133 | |
2134 | // Restore SR0 and SR1 in case of __morestack() was not called. | |
2135 | // pop {SR0, SR1} | |
1a4d82fc JJ |
2136 | if (Thumb) { |
2137 | AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))) | |
2138 | .addReg(ScratchReg0) | |
2139 | .addReg(ScratchReg1); | |
2140 | } else { | |
2141 | AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) | |
2142 | .addReg(ARM::SP, RegState::Define) | |
2143 | .addReg(ARM::SP)) | |
2144 | .addReg(ScratchReg0) | |
2145 | .addReg(ScratchReg1); | |
2146 | } | |
2147 | ||
2148 | // Update the CFA offset now that we've popped | |
2149 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); | |
2150 | BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
2151 | .addCFIIndex(CFIIndex); | |
2152 | ||
2153 | // Tell debuggers that r4 and r5 are now the same as they were in the | |
2154 | // previous function, that they're the "Same Value". | |
2155 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( | |
2156 | nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); | |
2157 | BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
2158 | .addCFIIndex(CFIIndex); | |
2159 | CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( | |
2160 | nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); | |
2161 | BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |
2162 | .addCFIIndex(CFIIndex); | |
970d7e83 LB |
2163 | |
2164 | // Organizing MBB lists | |
1a4d82fc JJ |
2165 | PostStackMBB->addSuccessor(&prologueMBB); |
2166 | ||
2167 | AllocMBB->addSuccessor(PostStackMBB); | |
2168 | ||
2169 | GetMBB->addSuccessor(PostStackMBB); | |
2170 | GetMBB->addSuccessor(AllocMBB); | |
2171 | ||
2172 | McrMBB->addSuccessor(GetMBB); | |
2173 | ||
2174 | PrevStackMBB->addSuccessor(McrMBB); | |
970d7e83 LB |
2175 | |
2176 | #ifdef XDEBUG | |
2177 | MF.verify(); | |
2178 | #endif | |
2179 | } |