1 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the X86 implementation of TargetFrameLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "X86FrameLowering.h"
15 #include "X86InstrBuilder.h"
16 #include "X86InstrInfo.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/SmallSet.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Target/TargetOptions.h"
32 #include "llvm/Support/Debug.h"
37 // FIXME: completely move here.
38 extern cl::opt
<bool> ForceStackAlign
;
40 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction
&MF
) const {
41 return !MF
.getFrameInfo()->hasVarSizedObjects();
44 /// hasFP - Return true if the specified function should have a dedicated frame
45 /// pointer register. This is true if the function has variable sized allocas
46 /// or if frame pointer elimination is disabled.
47 bool X86FrameLowering::hasFP(const MachineFunction
&MF
) const {
48 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
49 const MachineModuleInfo
&MMI
= MF
.getMMI();
50 const TargetRegisterInfo
*RegInfo
= MF
.getSubtarget().getRegisterInfo();
52 return (MF
.getTarget().Options
.DisableFramePointerElim(MF
) ||
53 RegInfo
->needsStackRealignment(MF
) ||
54 MFI
->hasVarSizedObjects() ||
55 MFI
->isFrameAddressTaken() || MFI
->hasInlineAsmWithSPAdjust() ||
56 MF
.getInfo
<X86MachineFunctionInfo
>()->getForceFramePointer() ||
57 MMI
.callsUnwindInit() || MMI
.callsEHReturn() ||
58 MFI
->hasStackMap() || MFI
->hasPatchPoint());
61 static unsigned getSUBriOpcode(unsigned IsLP64
, int64_t Imm
) {
65 return X86::SUB64ri32
;
73 static unsigned getADDriOpcode(unsigned IsLP64
, int64_t Imm
) {
77 return X86::ADD64ri32
;
85 static unsigned getANDriOpcode(bool IsLP64
, int64_t Imm
) {
89 return X86::AND64ri32
;
96 static unsigned getPUSHiOpcode(bool IsLP64
, MachineOperand MO
) {
97 // We don't support LP64 for now.
100 if (MO
.isImm() && isInt
<8>(MO
.getImm()))
101 return X86::PUSH32i8
;
103 return X86::PUSHi32
;;
106 static unsigned getLEArOpcode(unsigned IsLP64
) {
107 return IsLP64
? X86::LEA64r
: X86::LEA32r
;
110 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
111 /// when it reaches the "return" instruction. We can then pop a stack object
112 /// to this register without worry about clobbering it.
113 static unsigned findDeadCallerSavedReg(MachineBasicBlock
&MBB
,
114 MachineBasicBlock::iterator
&MBBI
,
115 const TargetRegisterInfo
&TRI
,
117 const MachineFunction
*MF
= MBB
.getParent();
118 const Function
*F
= MF
->getFunction();
119 if (!F
|| MF
->getMMI().callsEHReturn())
122 static const uint16_t CallerSavedRegs32Bit
[] = {
123 X86::EAX
, X86::EDX
, X86::ECX
, 0
126 static const uint16_t CallerSavedRegs64Bit
[] = {
127 X86::RAX
, X86::RDX
, X86::RCX
, X86::RSI
, X86::RDI
,
128 X86::R8
, X86::R9
, X86::R10
, X86::R11
, 0
131 unsigned Opc
= MBBI
->getOpcode();
138 case X86::TCRETURNdi
:
139 case X86::TCRETURNri
:
140 case X86::TCRETURNmi
:
141 case X86::TCRETURNdi64
:
142 case X86::TCRETURNri64
:
143 case X86::TCRETURNmi64
:
145 case X86::EH_RETURN64
: {
146 SmallSet
<uint16_t, 8> Uses
;
147 for (unsigned i
= 0, e
= MBBI
->getNumOperands(); i
!= e
; ++i
) {
148 MachineOperand
&MO
= MBBI
->getOperand(i
);
149 if (!MO
.isReg() || MO
.isDef())
151 unsigned Reg
= MO
.getReg();
154 for (MCRegAliasIterator
AI(Reg
, &TRI
, true); AI
.isValid(); ++AI
)
158 const uint16_t *CS
= Is64Bit
? CallerSavedRegs64Bit
: CallerSavedRegs32Bit
;
160 if (!Uses
.count(*CS
))
169 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
170 /// stack pointer by a constant value.
172 void emitSPUpdate(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&MBBI
,
173 unsigned StackPtr
, int64_t NumBytes
,
174 bool Is64BitTarget
, bool Is64BitStackPtr
, bool UseLEA
,
175 const TargetInstrInfo
&TII
, const TargetRegisterInfo
&TRI
) {
176 bool isSub
= NumBytes
< 0;
177 uint64_t Offset
= isSub
? -NumBytes
: NumBytes
;
180 Opc
= getLEArOpcode(Is64BitStackPtr
);
183 ? getSUBriOpcode(Is64BitStackPtr
, Offset
)
184 : getADDriOpcode(Is64BitStackPtr
, Offset
);
186 uint64_t Chunk
= (1LL << 31) - 1;
187 DebugLoc DL
= MBB
.findDebugLoc(MBBI
);
190 uint64_t ThisVal
= (Offset
> Chunk
) ? Chunk
: Offset
;
191 if (ThisVal
== (Is64BitTarget
? 8 : 4)) {
192 // Use push / pop instead.
194 ? (unsigned)(Is64BitTarget
? X86::RAX
: X86::EAX
)
195 : findDeadCallerSavedReg(MBB
, MBBI
, TRI
, Is64BitTarget
);
198 ? (Is64BitTarget
? X86::PUSH64r
: X86::PUSH32r
)
199 : (Is64BitTarget
? X86::POP64r
: X86::POP32r
);
200 MachineInstr
*MI
= BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
))
201 .addReg(Reg
, getDefRegState(!isSub
) | getUndefRegState(isSub
));
203 MI
->setFlag(MachineInstr::FrameSetup
);
209 MachineInstr
*MI
= nullptr;
212 MI
= addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), StackPtr
),
213 StackPtr
, false, isSub
? -ThisVal
: ThisVal
);
215 MI
= BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), StackPtr
)
218 MI
->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
222 MI
->setFlag(MachineInstr::FrameSetup
);
228 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
230 void mergeSPUpdatesUp(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&MBBI
,
231 unsigned StackPtr
, uint64_t *NumBytes
= nullptr) {
232 if (MBBI
== MBB
.begin()) return;
234 MachineBasicBlock::iterator PI
= std::prev(MBBI
);
235 unsigned Opc
= PI
->getOpcode();
236 if ((Opc
== X86::ADD64ri32
|| Opc
== X86::ADD64ri8
||
237 Opc
== X86::ADD32ri
|| Opc
== X86::ADD32ri8
||
238 Opc
== X86::LEA32r
|| Opc
== X86::LEA64_32r
) &&
239 PI
->getOperand(0).getReg() == StackPtr
) {
241 *NumBytes
+= PI
->getOperand(2).getImm();
243 } else if ((Opc
== X86::SUB64ri32
|| Opc
== X86::SUB64ri8
||
244 Opc
== X86::SUB32ri
|| Opc
== X86::SUB32ri8
) &&
245 PI
->getOperand(0).getReg() == StackPtr
) {
247 *NumBytes
-= PI
->getOperand(2).getImm();
252 /// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower
255 void mergeSPUpdatesDown(MachineBasicBlock
&MBB
,
256 MachineBasicBlock::iterator
&MBBI
,
257 unsigned StackPtr
, uint64_t *NumBytes
= nullptr) {
258 // FIXME: THIS ISN'T RUN!!!
261 if (MBBI
== MBB
.end()) return;
263 MachineBasicBlock::iterator NI
= std::next(MBBI
);
264 if (NI
== MBB
.end()) return;
266 unsigned Opc
= NI
->getOpcode();
267 if ((Opc
== X86::ADD64ri32
|| Opc
== X86::ADD64ri8
||
268 Opc
== X86::ADD32ri
|| Opc
== X86::ADD32ri8
) &&
269 NI
->getOperand(0).getReg() == StackPtr
) {
271 *NumBytes
-= NI
->getOperand(2).getImm();
274 } else if ((Opc
== X86::SUB64ri32
|| Opc
== X86::SUB64ri8
||
275 Opc
== X86::SUB32ri
|| Opc
== X86::SUB32ri8
) &&
276 NI
->getOperand(0).getReg() == StackPtr
) {
278 *NumBytes
+= NI
->getOperand(2).getImm();
284 /// mergeSPUpdates - Checks the instruction before/after the passed
285 /// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and
286 /// the stack adjustment is returned as a positive value for ADD/LEA and a
287 /// negative for SUB.
288 static int mergeSPUpdates(MachineBasicBlock
&MBB
,
289 MachineBasicBlock::iterator
&MBBI
, unsigned StackPtr
,
290 bool doMergeWithPrevious
) {
291 if ((doMergeWithPrevious
&& MBBI
== MBB
.begin()) ||
292 (!doMergeWithPrevious
&& MBBI
== MBB
.end()))
295 MachineBasicBlock::iterator PI
= doMergeWithPrevious
? std::prev(MBBI
) : MBBI
;
296 MachineBasicBlock::iterator NI
= doMergeWithPrevious
? nullptr
298 unsigned Opc
= PI
->getOpcode();
301 if ((Opc
== X86::ADD64ri32
|| Opc
== X86::ADD64ri8
||
302 Opc
== X86::ADD32ri
|| Opc
== X86::ADD32ri8
||
303 Opc
== X86::LEA32r
|| Opc
== X86::LEA64_32r
) &&
304 PI
->getOperand(0).getReg() == StackPtr
){
305 Offset
+= PI
->getOperand(2).getImm();
307 if (!doMergeWithPrevious
) MBBI
= NI
;
308 } else if ((Opc
== X86::SUB64ri32
|| Opc
== X86::SUB64ri8
||
309 Opc
== X86::SUB32ri
|| Opc
== X86::SUB32ri8
) &&
310 PI
->getOperand(0).getReg() == StackPtr
) {
311 Offset
-= PI
->getOperand(2).getImm();
313 if (!doMergeWithPrevious
) MBBI
= NI
;
319 static bool isEAXLiveIn(MachineFunction
&MF
) {
320 for (MachineRegisterInfo::livein_iterator II
= MF
.getRegInfo().livein_begin(),
321 EE
= MF
.getRegInfo().livein_end(); II
!= EE
; ++II
) {
322 unsigned Reg
= II
->first
;
324 if (Reg
== X86::EAX
|| Reg
== X86::AX
||
325 Reg
== X86::AH
|| Reg
== X86::AL
)
333 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock
&MBB
,
334 MachineBasicBlock::iterator MBBI
,
336 MachineFunction
&MF
= *MBB
.getParent();
337 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
338 MachineModuleInfo
&MMI
= MF
.getMMI();
339 const MCRegisterInfo
*MRI
= MMI
.getContext().getRegisterInfo();
340 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
342 // Add callee saved registers to move list.
343 const std::vector
<CalleeSavedInfo
> &CSI
= MFI
->getCalleeSavedInfo();
344 if (CSI
.empty()) return;
346 // Calculate offsets.
347 for (std::vector
<CalleeSavedInfo
>::const_iterator
348 I
= CSI
.begin(), E
= CSI
.end(); I
!= E
; ++I
) {
349 int64_t Offset
= MFI
->getObjectOffset(I
->getFrameIdx());
350 unsigned Reg
= I
->getReg();
352 unsigned DwarfReg
= MRI
->getDwarfRegNum(Reg
, true);
354 MMI
.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg
,
356 BuildMI(MBB
, MBBI
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
357 .addCFIIndex(CFIIndex
);
361 /// usesTheStack - This function checks if any of the users of EFLAGS
362 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
363 /// to use the stack, and if we don't adjust the stack we clobber the first
365 /// See X86InstrInfo::copyPhysReg.
366 static bool usesTheStack(const MachineFunction
&MF
) {
367 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
369 for (MachineRegisterInfo::reg_instr_iterator
370 ri
= MRI
.reg_instr_begin(X86::EFLAGS
), re
= MRI
.reg_instr_end();
378 void X86FrameLowering::getStackProbeFunction(const X86Subtarget
&STI
,
380 const char *&Symbol
) {
381 CallOp
= STI
.is64Bit() ? X86::W64ALLOCA
: X86::CALLpcrel32
;
384 if (STI
.isTargetCygMing()) {
385 Symbol
= "___chkstk_ms";
389 } else if (STI
.isTargetCygMing())
395 /// emitPrologue - Push callee-saved registers onto the stack, which
396 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
397 /// space for local variables. Also emit labels used by the exception handler to
398 /// generate the exception handling frames.
401 Here's a gist of what gets emitted:
403 ; Establish frame pointer, if needed
406 .cfi_def_cfa_offset 16
407 .cfi_offset %rbp, -16
410 .cfi_def_cfa_register %rbp
412 ; Spill general-purpose registers
413 [for all callee-saved GPRs]
416 .cfi_def_cfa_offset (offset from RETADDR)
419 ; If the required stack alignment > default stack alignment
420 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
421 ; of unknown size in the stack frame.
422 [if stack needs re-alignment]
425 ; Allocate space for locals
426 [if target is Windows and allocated space > 4096 bytes]
427 ; Windows needs special care for allocations larger
430 call ___chkstk_ms/___chkstk
436 .seh_stackalloc (size of XMM spill slots)
437 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
442 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
443 ; they may get spilled on any platform, if the current function
444 ; calls @llvm.eh.unwind.init
446 [for all callee-saved XMM registers]
447 movaps %<xmm reg>, -MMM(%rbp)
448 [for all callee-saved XMM registers]
449 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
450 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
452 [for all callee-saved XMM registers]
453 movaps %<xmm reg>, KKK(%rsp)
454 [for all callee-saved XMM registers]
455 .seh_savexmm %<xmm reg>, KKK
459 [if needs base pointer]
461 [if needs to restore base pointer]
466 [for all callee-saved registers]
467 .cfi_offset %<reg>, (offset from %rbp)
469 .cfi_def_cfa_offset (offset from RETADDR)
470 [for all callee-saved registers]
471 .cfi_offset %<reg>, (offset from %rsp)
474 - .seh directives are emitted only for Windows 64 ABI
475 - .cfi directives are emitted for all other ABIs
476 - for 32-bit code, substitute %e?? registers for %r??
479 void X86FrameLowering::emitPrologue(MachineFunction
&MF
) const {
480 MachineBasicBlock
&MBB
= MF
.front(); // Prologue goes in entry BB.
481 MachineBasicBlock::iterator MBBI
= MBB
.begin();
482 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
483 const Function
*Fn
= MF
.getFunction();
484 const X86RegisterInfo
*RegInfo
=
485 static_cast<const X86RegisterInfo
*>(MF
.getSubtarget().getRegisterInfo());
486 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
487 MachineModuleInfo
&MMI
= MF
.getMMI();
488 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
489 uint64_t MaxAlign
= MFI
->getMaxAlignment(); // Desired stack alignment.
490 uint64_t StackSize
= MFI
->getStackSize(); // Number of bytes to allocate.
491 bool HasFP
= hasFP(MF
);
492 const X86Subtarget
&STI
= MF
.getTarget().getSubtarget
<X86Subtarget
>();
493 bool Is64Bit
= STI
.is64Bit();
494 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
495 const bool Uses64BitFramePtr
= STI
.isTarget64BitLP64() || STI
.isTargetNaCl64();
496 bool IsWin64
= STI
.isTargetWin64();
497 // Not necessarily synonymous with IsWin64.
498 bool IsWinEH
= MF
.getTarget().getMCAsmInfo()->usesWindowsCFI();
499 bool NeedsWinEH
= IsWinEH
&& Fn
->needsUnwindTableEntry();
501 !IsWinEH
&& (MMI
.hasDebugInfo() || Fn
->needsUnwindTableEntry());
502 bool UseLEA
= STI
.useLeaForSP();
503 unsigned StackAlign
= getStackAlignment();
504 unsigned SlotSize
= RegInfo
->getSlotSize();
505 unsigned FramePtr
= RegInfo
->getFrameRegister(MF
);
506 const unsigned MachineFramePtr
= STI
.isTarget64BitILP32() ?
507 getX86SubSuperRegister(FramePtr
, MVT::i64
, false) : FramePtr
;
508 unsigned StackPtr
= RegInfo
->getStackRegister();
509 unsigned BasePtr
= RegInfo
->getBaseRegister();
512 // If we're forcing a stack realignment we can't rely on just the frame
513 // info, we need to know the ABI stack alignment as well in case we
514 // have a call out. Otherwise just make sure we have some alignment - we'll
515 // go with the minimum SlotSize.
516 if (ForceStackAlign
) {
518 MaxAlign
= (StackAlign
> MaxAlign
) ? StackAlign
: MaxAlign
;
519 else if (MaxAlign
< SlotSize
)
523 // Add RETADDR move area to callee saved frame size.
524 int TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
525 if (TailCallReturnAddrDelta
< 0)
526 X86FI
->setCalleeSavedFrameSize(
527 X86FI
->getCalleeSavedFrameSize() - TailCallReturnAddrDelta
);
529 bool UseStackProbe
= (STI
.isOSWindows() && !STI
.isTargetMachO());
531 // The default stack probe size is 4096 if the function has no stackprobesize
533 unsigned StackProbeSize
= 4096;
534 if (Fn
->hasFnAttribute("stack-probe-size"))
535 Fn
->getFnAttribute("stack-probe-size")
537 .getAsInteger(0, StackProbeSize
);
539 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
540 // function, and use up to 128 bytes of stack space, don't have a frame
541 // pointer, calls, or dynamic alloca then we do not need to adjust the
542 // stack pointer (we fit in the Red Zone). We also check that we don't
543 // push and pop from the stack.
544 if (Is64Bit
&& !Fn
->getAttributes().hasAttribute(AttributeSet::FunctionIndex
,
545 Attribute::NoRedZone
) &&
546 !RegInfo
->needsStackRealignment(MF
) &&
547 !MFI
->hasVarSizedObjects() && // No dynamic alloca.
548 !MFI
->adjustsStack() && // No calls.
549 !IsWin64
&& // Win64 has no Red Zone
550 !usesTheStack(MF
) && // Don't push and pop.
551 !MF
.shouldSplitStack()) { // Regular stack
552 uint64_t MinSize
= X86FI
->getCalleeSavedFrameSize();
553 if (HasFP
) MinSize
+= SlotSize
;
554 StackSize
= std::max(MinSize
, StackSize
> 128 ? StackSize
- 128 : 0);
555 MFI
->setStackSize(StackSize
);
558 // Insert stack pointer adjustment for later moving of return addr. Only
559 // applies to tail call optimized functions where the callee argument stack
560 // size is bigger than the callers.
561 if (TailCallReturnAddrDelta
< 0) {
563 BuildMI(MBB
, MBBI
, DL
,
564 TII
.get(getSUBriOpcode(Uses64BitFramePtr
, -TailCallReturnAddrDelta
)),
567 .addImm(-TailCallReturnAddrDelta
)
568 .setMIFlag(MachineInstr::FrameSetup
);
569 MI
->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
572 // Mapping for machine moves:
574 // DST: VirtualFP AND
575 // SRC: VirtualFP => DW_CFA_def_cfa_offset
576 // ELSE => DW_CFA_def_cfa
578 // SRC: VirtualFP AND
579 // DST: Register => DW_CFA_def_cfa_register
582 // OFFSET < 0 => DW_CFA_offset_extended_sf
583 // REG < 64 => DW_CFA_offset + Reg
584 // ELSE => DW_CFA_offset_extended
586 uint64_t NumBytes
= 0;
587 int stackGrowth
= -SlotSize
;
590 // Calculate required stack adjustment.
591 uint64_t FrameSize
= StackSize
- SlotSize
;
592 // If required, include space for extra hidden slot for stashing base pointer.
593 if (X86FI
->getRestoreBasePointer())
594 FrameSize
+= SlotSize
;
595 if (RegInfo
->needsStackRealignment(MF
)) {
596 // Callee-saved registers are pushed on stack before the stack
598 FrameSize
-= X86FI
->getCalleeSavedFrameSize();
599 NumBytes
= (FrameSize
+ MaxAlign
- 1) / MaxAlign
* MaxAlign
;
601 NumBytes
= FrameSize
- X86FI
->getCalleeSavedFrameSize();
604 // Get the offset of the stack slot for the EBP register, which is
605 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
606 // Update the frame offset adjustment.
607 MFI
->setOffsetAdjustment(-NumBytes
);
609 // Save EBP/RBP into the appropriate stack slot.
610 BuildMI(MBB
, MBBI
, DL
, TII
.get(Is64Bit
? X86::PUSH64r
: X86::PUSH32r
))
611 .addReg(MachineFramePtr
, RegState::Kill
)
612 .setMIFlag(MachineInstr::FrameSetup
);
615 // Mark the place where EBP/RBP was saved.
616 // Define the current CFA rule to use the provided offset.
618 unsigned CFIIndex
= MMI
.addFrameInst(
619 MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth
));
620 BuildMI(MBB
, MBBI
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
621 .addCFIIndex(CFIIndex
);
623 // Change the rule for the FramePtr to be an "offset" rule.
624 unsigned DwarfFramePtr
= RegInfo
->getDwarfRegNum(MachineFramePtr
, true);
625 CFIIndex
= MMI
.addFrameInst(
626 MCCFIInstruction::createOffset(nullptr,
627 DwarfFramePtr
, 2 * stackGrowth
));
628 BuildMI(MBB
, MBBI
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
629 .addCFIIndex(CFIIndex
);
633 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_PushReg
))
635 .setMIFlag(MachineInstr::FrameSetup
);
638 // Update EBP with the new base value.
639 BuildMI(MBB
, MBBI
, DL
,
640 TII
.get(Uses64BitFramePtr
? X86::MOV64rr
: X86::MOV32rr
), FramePtr
)
642 .setMIFlag(MachineInstr::FrameSetup
);
645 // Mark effective beginning of when frame pointer becomes valid.
646 // Define the current CFA to use the EBP/RBP register.
647 unsigned DwarfFramePtr
= RegInfo
->getDwarfRegNum(MachineFramePtr
, true);
648 unsigned CFIIndex
= MMI
.addFrameInst(
649 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr
));
650 BuildMI(MBB
, MBBI
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
651 .addCFIIndex(CFIIndex
);
654 // Mark the FramePtr as live-in in every block.
655 for (MachineFunction::iterator I
= MF
.begin(), E
= MF
.end(); I
!= E
; ++I
)
656 I
->addLiveIn(MachineFramePtr
);
658 NumBytes
= StackSize
- X86FI
->getCalleeSavedFrameSize();
661 // Skip the callee-saved push instructions.
662 bool PushedRegs
= false;
663 int StackOffset
= 2 * stackGrowth
;
665 while (MBBI
!= MBB
.end() &&
666 (MBBI
->getOpcode() == X86::PUSH32r
||
667 MBBI
->getOpcode() == X86::PUSH64r
)) {
669 unsigned Reg
= MBBI
->getOperand(0).getReg();
672 if (!HasFP
&& NeedsDwarfCFI
) {
673 // Mark callee-saved push instruction.
674 // Define the current CFA rule to use the provided offset.
676 unsigned CFIIndex
= MMI
.addFrameInst(
677 MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset
));
678 BuildMI(MBB
, MBBI
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
679 .addCFIIndex(CFIIndex
);
680 StackOffset
+= stackGrowth
;
684 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_PushReg
)).addImm(Reg
).setMIFlag(
685 MachineInstr::FrameSetup
);
689 // Realign stack after we pushed callee-saved registers (so that we'll be
690 // able to calculate their offsets from the frame pointer).
691 if (RegInfo
->needsStackRealignment(MF
)) {
692 assert(HasFP
&& "There should be a frame pointer if stack is realigned.");
693 uint64_t Val
= -MaxAlign
;
695 BuildMI(MBB
, MBBI
, DL
,
696 TII
.get(getANDriOpcode(Uses64BitFramePtr
, Val
)), StackPtr
)
699 .setMIFlag(MachineInstr::FrameSetup
);
701 // The EFLAGS implicit def is dead.
702 MI
->getOperand(3).setIsDead();
705 // If there is an SUB32ri of ESP immediately before this instruction, merge
706 // the two. This can be the case when tail call elimination is enabled and
707 // the callee has more arguments then the caller.
708 NumBytes
-= mergeSPUpdates(MBB
, MBBI
, StackPtr
, true);
710 // If there is an ADD32ri or SUB32ri of ESP immediately after this
711 // instruction, merge the two instructions.
712 mergeSPUpdatesDown(MBB
, MBBI
, StackPtr
, &NumBytes
);
714 // Adjust stack pointer: ESP -= numbytes.
716 // Windows and cygwin/mingw require a prologue helper routine when allocating
717 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
718 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
719 // stack and adjust the stack pointer in one go. The 64-bit version of
720 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
721 // responsible for adjusting the stack pointer. Touching the stack at 4K
722 // increments is necessary to ensure that the guard pages used by the OS
723 // virtual memory manager are allocated in correct sequence.
724 if (NumBytes
>= StackProbeSize
&& UseStackProbe
) {
725 const char *StackProbeSymbol
;
728 getStackProbeFunction(STI
, CallOp
, StackProbeSymbol
);
730 // Check whether EAX is livein for this function.
731 bool isEAXAlive
= isEAXLiveIn(MF
);
734 // Sanity check that EAX is not livein for this function.
735 // It should not be, so throw an assert.
736 assert(!Is64Bit
&& "EAX is livein in x64 case!");
739 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PUSH32r
))
740 .addReg(X86::EAX
, RegState::Kill
)
741 .setMIFlag(MachineInstr::FrameSetup
);
745 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
746 // Function prologue is responsible for adjusting the stack pointer.
747 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV64ri
), X86::RAX
)
749 .setMIFlag(MachineInstr::FrameSetup
);
751 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
752 // We'll also use 4 already allocated bytes for EAX.
753 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV32ri
), X86::EAX
)
754 .addImm(isEAXAlive
? NumBytes
- 4 : NumBytes
)
755 .setMIFlag(MachineInstr::FrameSetup
);
758 BuildMI(MBB
, MBBI
, DL
,
760 .addExternalSymbol(StackProbeSymbol
)
761 .addReg(StackPtr
, RegState::Define
| RegState::Implicit
)
762 .addReg(X86::EFLAGS
, RegState::Define
| RegState::Implicit
)
763 .setMIFlag(MachineInstr::FrameSetup
);
766 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
767 // themself. It also does not clobber %rax so we can reuse it when
769 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SUB64rr
), StackPtr
)
772 .setMIFlag(MachineInstr::FrameSetup
);
776 MachineInstr
*MI
= addRegOffset(BuildMI(MF
, DL
, TII
.get(X86::MOV32rm
),
778 StackPtr
, false, NumBytes
- 4);
779 MI
->setFlag(MachineInstr::FrameSetup
);
780 MBB
.insert(MBBI
, MI
);
782 } else if (NumBytes
) {
783 emitSPUpdate(MBB
, MBBI
, StackPtr
, -(int64_t)NumBytes
, Is64Bit
, Uses64BitFramePtr
,
784 UseLEA
, TII
, *RegInfo
);
787 int SEHFrameOffset
= 0;
790 // We need to set frame base offset low enough such that all saved
791 // register offsets would be positive relative to it, but we can't
792 // just use NumBytes, because .seh_setframe offset must be <=240.
793 // So we pretend to have only allocated enough space to spill the
794 // non-volatile registers.
795 // We don't care about the rest of stack allocation, because unwinder
796 // will restore SP to (BP - SEHFrameOffset)
797 for (const CalleeSavedInfo
&Info
: MFI
->getCalleeSavedInfo()) {
798 int offset
= MFI
->getObjectOffset(Info
.getFrameIdx());
799 SEHFrameOffset
= std::max(SEHFrameOffset
, std::abs(offset
));
801 SEHFrameOffset
+= SEHFrameOffset
% 16; // ensure alignmant
803 // This only needs to account for XMM spill slots, GPR slots
804 // are covered by the .seh_pushreg's emitted above.
805 unsigned Size
= SEHFrameOffset
- X86FI
->getCalleeSavedFrameSize();
807 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_StackAlloc
))
809 .setMIFlag(MachineInstr::FrameSetup
);
812 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_SetFrame
))
814 .addImm(SEHFrameOffset
)
815 .setMIFlag(MachineInstr::FrameSetup
);
817 // SP will be the base register for restoring XMMs
819 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_StackAlloc
))
821 .setMIFlag(MachineInstr::FrameSetup
);
826 // Skip the rest of register spilling code
827 while (MBBI
!= MBB
.end() && MBBI
->getFlag(MachineInstr::FrameSetup
))
830 // Emit SEH info for non-GPRs
832 for (const CalleeSavedInfo
&Info
: MFI
->getCalleeSavedInfo()) {
833 unsigned Reg
= Info
.getReg();
834 if (X86::GR64RegClass
.contains(Reg
) || X86::GR32RegClass
.contains(Reg
))
836 assert(X86::FR64RegClass
.contains(Reg
) && "Unexpected register class");
838 int Offset
= getFrameIndexOffset(MF
, Info
.getFrameIdx());
839 Offset
+= SEHFrameOffset
;
841 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_SaveXMM
))
844 .setMIFlag(MachineInstr::FrameSetup
);
847 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_EndPrologue
))
848 .setMIFlag(MachineInstr::FrameSetup
);
851 // If we need a base pointer, set it up here. It's whatever the value
852 // of the stack pointer is at this point. Any variable size objects
853 // will be allocated after this, so we can still use the base pointer
854 // to reference locals.
855 if (RegInfo
->hasBasePointer(MF
)) {
856 // Update the base pointer with the current stack pointer.
857 unsigned Opc
= Uses64BitFramePtr
? X86::MOV64rr
: X86::MOV32rr
;
858 BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), BasePtr
)
860 .setMIFlag(MachineInstr::FrameSetup
);
861 if (X86FI
->getRestoreBasePointer()) {
862 // Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain.
863 unsigned Opm
= Uses64BitFramePtr
? X86::MOV64mr
: X86::MOV32mr
;
864 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(Opm
)),
865 FramePtr
, true, X86FI
->getRestoreBasePointerOffset())
867 .setMIFlag(MachineInstr::FrameSetup
);
871 if (((!HasFP
&& NumBytes
) || PushedRegs
) && NeedsDwarfCFI
) {
872 // Mark end of stack pointer adjustment.
873 if (!HasFP
&& NumBytes
) {
874 // Define the current CFA rule to use the provided offset.
876 unsigned CFIIndex
= MMI
.addFrameInst(
877 MCCFIInstruction::createDefCfaOffset(nullptr,
878 -StackSize
+ stackGrowth
));
880 BuildMI(MBB
, MBBI
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
881 .addCFIIndex(CFIIndex
);
884 // Emit DWARF info specifying the offsets of the callee-saved registers.
886 emitCalleeSavedFrameMoves(MBB
, MBBI
, DL
);
890 void X86FrameLowering::emitEpilogue(MachineFunction
&MF
,
891 MachineBasicBlock
&MBB
) const {
892 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
893 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
894 const X86RegisterInfo
*RegInfo
=
895 static_cast<const X86RegisterInfo
*>(MF
.getSubtarget().getRegisterInfo());
896 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
897 MachineBasicBlock::iterator MBBI
= MBB
.getLastNonDebugInstr();
898 assert(MBBI
!= MBB
.end() && "Returning block has no instructions");
899 unsigned RetOpcode
= MBBI
->getOpcode();
900 DebugLoc DL
= MBBI
->getDebugLoc();
901 const X86Subtarget
&STI
= MF
.getTarget().getSubtarget
<X86Subtarget
>();
902 bool Is64Bit
= STI
.is64Bit();
903 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
904 const bool Uses64BitFramePtr
= STI
.isTarget64BitLP64() || STI
.isTargetNaCl64();
905 const bool Is64BitILP32
= STI
.isTarget64BitILP32();
906 bool UseLEA
= STI
.useLeaForSP();
907 unsigned StackAlign
= getStackAlignment();
908 unsigned SlotSize
= RegInfo
->getSlotSize();
909 unsigned FramePtr
= RegInfo
->getFrameRegister(MF
);
910 unsigned MachineFramePtr
= Is64BitILP32
?
911 getX86SubSuperRegister(FramePtr
, MVT::i64
, false) : FramePtr
;
912 unsigned StackPtr
= RegInfo
->getStackRegister();
914 bool IsWinEH
= MF
.getTarget().getMCAsmInfo()->usesWindowsCFI();
915 bool NeedsWinEH
= IsWinEH
&& MF
.getFunction()->needsUnwindTableEntry();
919 llvm_unreachable("Can only insert epilog into returning blocks");
924 case X86::TCRETURNdi
:
925 case X86::TCRETURNri
:
926 case X86::TCRETURNmi
:
927 case X86::TCRETURNdi64
:
928 case X86::TCRETURNri64
:
929 case X86::TCRETURNmi64
:
931 case X86::EH_RETURN64
:
932 break; // These are ok
935 // Get the number of bytes to allocate from the FrameInfo.
936 uint64_t StackSize
= MFI
->getStackSize();
937 uint64_t MaxAlign
= MFI
->getMaxAlignment();
938 unsigned CSSize
= X86FI
->getCalleeSavedFrameSize();
939 uint64_t NumBytes
= 0;
941 // If we're forcing a stack realignment we can't rely on just the frame
942 // info, we need to know the ABI stack alignment as well in case we
943 // have a call out. Otherwise just make sure we have some alignment - we'll
944 // go with the minimum.
945 if (ForceStackAlign
) {
947 MaxAlign
= (StackAlign
> MaxAlign
) ? StackAlign
: MaxAlign
;
949 MaxAlign
= MaxAlign
? MaxAlign
: 4;
953 // Calculate required stack adjustment.
954 uint64_t FrameSize
= StackSize
- SlotSize
;
955 if (RegInfo
->needsStackRealignment(MF
)) {
956 // Callee-saved registers were pushed on stack before the stack
959 NumBytes
= (FrameSize
+ MaxAlign
- 1) / MaxAlign
* MaxAlign
;
961 NumBytes
= FrameSize
- CSSize
;
965 BuildMI(MBB
, MBBI
, DL
,
966 TII
.get(Is64Bit
? X86::POP64r
: X86::POP32r
), MachineFramePtr
);
968 NumBytes
= StackSize
- CSSize
;
971 // Skip the callee-saved pop instructions.
972 while (MBBI
!= MBB
.begin()) {
973 MachineBasicBlock::iterator PI
= std::prev(MBBI
);
974 unsigned Opc
= PI
->getOpcode();
976 if (Opc
!= X86::POP32r
&& Opc
!= X86::POP64r
&& Opc
!= X86::DBG_VALUE
&&
982 MachineBasicBlock::iterator FirstCSPop
= MBBI
;
984 DL
= MBBI
->getDebugLoc();
986 // If there is an ADD32ri or SUB32ri of ESP immediately before this
987 // instruction, merge the two instructions.
988 if (NumBytes
|| MFI
->hasVarSizedObjects())
989 mergeSPUpdatesUp(MBB
, MBBI
, StackPtr
, &NumBytes
);
991 // If dynamic alloca is used, then reset esp to point to the last callee-saved
992 // slot before popping them off! Same applies for the case, when stack was
994 if (RegInfo
->needsStackRealignment(MF
) || MFI
->hasVarSizedObjects()) {
995 if (RegInfo
->needsStackRealignment(MF
))
998 unsigned Opc
= getLEArOpcode(Uses64BitFramePtr
);
999 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), StackPtr
),
1000 FramePtr
, false, -CSSize
);
1003 unsigned Opc
= (Uses64BitFramePtr
? X86::MOV64rr
: X86::MOV32rr
);
1004 BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), StackPtr
)
1008 } else if (NumBytes
) {
1009 // Adjust stack pointer back: ESP += numbytes.
1010 emitSPUpdate(MBB
, MBBI
, StackPtr
, NumBytes
, Is64Bit
, Uses64BitFramePtr
, UseLEA
,
1015 // Windows unwinder will not invoke function's exception handler if IP is
1016 // either in prologue or in epilogue. This behavior causes a problem when a
1017 // call immediately precedes an epilogue, because the return address points
1018 // into the epilogue. To cope with that, we insert an epilogue marker here,
1019 // then replace it with a 'nop' if it ends up immediately after a CALL in the
1020 // final emitted code.
1022 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_Epilogue
));
1024 // We're returning from function via eh_return.
1025 if (RetOpcode
== X86::EH_RETURN
|| RetOpcode
== X86::EH_RETURN64
) {
1026 MBBI
= MBB
.getLastNonDebugInstr();
1027 MachineOperand
&DestAddr
= MBBI
->getOperand(0);
1028 assert(DestAddr
.isReg() && "Offset should be in register!");
1029 BuildMI(MBB
, MBBI
, DL
,
1030 TII
.get(Uses64BitFramePtr
? X86::MOV64rr
: X86::MOV32rr
),
1031 StackPtr
).addReg(DestAddr
.getReg());
1032 } else if (RetOpcode
== X86::TCRETURNri
|| RetOpcode
== X86::TCRETURNdi
||
1033 RetOpcode
== X86::TCRETURNmi
||
1034 RetOpcode
== X86::TCRETURNri64
|| RetOpcode
== X86::TCRETURNdi64
||
1035 RetOpcode
== X86::TCRETURNmi64
) {
1036 bool isMem
= RetOpcode
== X86::TCRETURNmi
|| RetOpcode
== X86::TCRETURNmi64
;
1037 // Tail call return: adjust the stack pointer and jump to callee.
1038 MBBI
= MBB
.getLastNonDebugInstr();
1039 MachineOperand
&JumpTarget
= MBBI
->getOperand(0);
1040 MachineOperand
&StackAdjust
= MBBI
->getOperand(isMem
? 5 : 1);
1041 assert(StackAdjust
.isImm() && "Expecting immediate value.");
1043 // Adjust stack pointer.
1044 int StackAdj
= StackAdjust
.getImm();
1045 int MaxTCDelta
= X86FI
->getTCReturnAddrDelta();
1047 assert(MaxTCDelta
<= 0 && "MaxTCDelta should never be positive");
1049 // Incoporate the retaddr area.
1050 Offset
= StackAdj
-MaxTCDelta
;
1051 assert(Offset
>= 0 && "Offset should never be negative");
1054 // Check for possible merge with preceding ADD instruction.
1055 Offset
+= mergeSPUpdates(MBB
, MBBI
, StackPtr
, true);
1056 emitSPUpdate(MBB
, MBBI
, StackPtr
, Offset
, Is64Bit
, Uses64BitFramePtr
,
1057 UseLEA
, TII
, *RegInfo
);
1060 // Jump to label or value in register.
1061 if (RetOpcode
== X86::TCRETURNdi
|| RetOpcode
== X86::TCRETURNdi64
) {
1062 MachineInstrBuilder MIB
=
1063 BuildMI(MBB
, MBBI
, DL
, TII
.get((RetOpcode
== X86::TCRETURNdi
)
1064 ? X86::TAILJMPd
: X86::TAILJMPd64
));
1065 if (JumpTarget
.isGlobal())
1066 MIB
.addGlobalAddress(JumpTarget
.getGlobal(), JumpTarget
.getOffset(),
1067 JumpTarget
.getTargetFlags());
1069 assert(JumpTarget
.isSymbol());
1070 MIB
.addExternalSymbol(JumpTarget
.getSymbolName(),
1071 JumpTarget
.getTargetFlags());
1073 } else if (RetOpcode
== X86::TCRETURNmi
|| RetOpcode
== X86::TCRETURNmi64
) {
1074 MachineInstrBuilder MIB
=
1075 BuildMI(MBB
, MBBI
, DL
, TII
.get((RetOpcode
== X86::TCRETURNmi
)
1076 ? X86::TAILJMPm
: X86::TAILJMPm64
));
1077 for (unsigned i
= 0; i
!= 5; ++i
)
1078 MIB
.addOperand(MBBI
->getOperand(i
));
1079 } else if (RetOpcode
== X86::TCRETURNri64
) {
1080 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::TAILJMPr64
)).
1081 addReg(JumpTarget
.getReg(), RegState::Kill
);
1083 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::TAILJMPr
)).
1084 addReg(JumpTarget
.getReg(), RegState::Kill
);
1087 MachineInstr
*NewMI
= std::prev(MBBI
);
1088 NewMI
->copyImplicitOps(MF
, MBBI
);
1090 // Delete the pseudo instruction TCRETURN.
1092 } else if ((RetOpcode
== X86::RETQ
|| RetOpcode
== X86::RETL
||
1093 RetOpcode
== X86::RETIQ
|| RetOpcode
== X86::RETIL
) &&
1094 (X86FI
->getTCReturnAddrDelta() < 0)) {
1095 // Add the return addr area delta back since we are not tail calling.
1096 int delta
= -1*X86FI
->getTCReturnAddrDelta();
1097 MBBI
= MBB
.getLastNonDebugInstr();
1099 // Check for possible merge with preceding ADD instruction.
1100 delta
+= mergeSPUpdates(MBB
, MBBI
, StackPtr
, true);
1101 emitSPUpdate(MBB
, MBBI
, StackPtr
, delta
, Is64Bit
, Uses64BitFramePtr
, UseLEA
, TII
,
1106 int X86FrameLowering::getFrameIndexOffset(const MachineFunction
&MF
,
1108 const X86RegisterInfo
*RegInfo
=
1109 static_cast<const X86RegisterInfo
*>(MF
.getSubtarget().getRegisterInfo());
1110 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1111 int Offset
= MFI
->getObjectOffset(FI
) - getOffsetOfLocalArea();
1112 uint64_t StackSize
= MFI
->getStackSize();
1114 if (RegInfo
->hasBasePointer(MF
)) {
1115 assert (hasFP(MF
) && "VLAs and dynamic stack realign, but no FP?!");
1117 // Skip the saved EBP.
1118 return Offset
+ RegInfo
->getSlotSize();
1120 assert((-(Offset
+ StackSize
)) % MFI
->getObjectAlignment(FI
) == 0);
1121 return Offset
+ StackSize
;
1123 } else if (RegInfo
->needsStackRealignment(MF
)) {
1125 // Skip the saved EBP.
1126 return Offset
+ RegInfo
->getSlotSize();
1128 assert((-(Offset
+ StackSize
)) % MFI
->getObjectAlignment(FI
) == 0);
1129 return Offset
+ StackSize
;
1131 // FIXME: Support tail calls
1134 return Offset
+ StackSize
;
1136 // Skip the saved EBP.
1137 Offset
+= RegInfo
->getSlotSize();
1139 // Skip the RETADDR move area
1140 const X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
1141 int TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
1142 if (TailCallReturnAddrDelta
< 0)
1143 Offset
-= TailCallReturnAddrDelta
;
1149 int X86FrameLowering::getFrameIndexReference(const MachineFunction
&MF
, int FI
,
1150 unsigned &FrameReg
) const {
1151 const X86RegisterInfo
*RegInfo
=
1152 static_cast<const X86RegisterInfo
*>(MF
.getSubtarget().getRegisterInfo());
1153 // We can't calculate offset from frame pointer if the stack is realigned,
1154 // so enforce usage of stack/base pointer. The base pointer is used when we
1155 // have dynamic allocas in addition to dynamic realignment.
1156 if (RegInfo
->hasBasePointer(MF
))
1157 FrameReg
= RegInfo
->getBaseRegister();
1158 else if (RegInfo
->needsStackRealignment(MF
))
1159 FrameReg
= RegInfo
->getStackRegister();
1161 FrameReg
= RegInfo
->getFrameRegister(MF
);
1162 return getFrameIndexOffset(MF
, FI
);
1165 // Simplified from getFrameIndexOffset keeping only StackPointer cases
1166 int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction
&MF
, int FI
) const {
1167 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1168 // Does not include any dynamic realign.
1169 const uint64_t StackSize
= MFI
->getStackSize();
1172 const X86RegisterInfo
*RegInfo
=
1173 static_cast<const X86RegisterInfo
*>(MF
.getSubtarget().getRegisterInfo());
1174 // Note: LLVM arranges the stack as:
1175 // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
1176 // > "Stack Slots" (<--SP)
1177 // We can always address StackSlots from RSP. We can usually (unless
1178 // needsStackRealignment) address CSRs from RSP, but sometimes need to
1179 // address them from RBP. FixedObjects can be placed anywhere in the stack
1180 // frame depending on their specific requirements (i.e. we can actually
1181 // refer to arguments to the function which are stored in the *callers*
1182 // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
1183 // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
1185 assert(!RegInfo
->hasBasePointer(MF
) && "we don't handle this case");
1187 // We don't handle tail calls, and shouldn't be seeing them
1189 int TailCallReturnAddrDelta
=
1190 MF
.getInfo
<X86MachineFunctionInfo
>()->getTCReturnAddrDelta();
1191 assert(!(TailCallReturnAddrDelta
< 0) && "we don't handle this case!");
1195 // This is how the math works out:
1197 // %rsp grows (i.e. gets lower) left to right. Each box below is
1198 // one word (eight bytes). Obj0 is the stack slot we're trying to
1201 // ----------------------------------
1202 // | BP | Obj0 | Obj1 | ... | ObjN |
1203 // ----------------------------------
1207 // A is the incoming stack pointer.
1208 // (B - A) is the local area offset (-8 for x86-64) [1]
1209 // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2]
1211 // |(E - B)| is the StackSize (absolute value, positive). For a
1212 // stack that grown down, this works out to be (B - E). [3]
1214 // E is also the value of %rsp after stack has been set up, and we
1215 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
1216 // (C - E) == (C - A) - (B - A) + (B - E)
1217 // { Using [1], [2] and [3] above }
1218 // == getObjectOffset - LocalAreaOffset + StackSize
1221 // Get the Offset from the StackPointer
1222 int Offset
= MFI
->getObjectOffset(FI
) - getOffsetOfLocalArea();
1224 return Offset
+ StackSize
;
1226 // Simplified from getFrameIndexReference keeping only StackPointer cases
1227 int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction
&MF
, int FI
,
1228 unsigned &FrameReg
) const {
1229 const X86RegisterInfo
*RegInfo
=
1230 static_cast<const X86RegisterInfo
*>(MF
.getSubtarget().getRegisterInfo());
1232 assert(!RegInfo
->hasBasePointer(MF
) && "we don't handle this case");
1234 FrameReg
= RegInfo
->getStackRegister();
1235 return getFrameIndexOffsetFromSP(MF
, FI
);
1238 bool X86FrameLowering::assignCalleeSavedSpillSlots(
1239 MachineFunction
&MF
, const TargetRegisterInfo
*TRI
,
1240 std::vector
<CalleeSavedInfo
> &CSI
) const {
1241 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1242 const X86RegisterInfo
*RegInfo
=
1243 static_cast<const X86RegisterInfo
*>(MF
.getSubtarget().getRegisterInfo());
1244 unsigned SlotSize
= RegInfo
->getSlotSize();
1245 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
1247 unsigned CalleeSavedFrameSize
= 0;
1248 int SpillSlotOffset
= getOffsetOfLocalArea() + X86FI
->getTCReturnAddrDelta();
1251 // emitPrologue always spills frame register the first thing.
1252 SpillSlotOffset
-= SlotSize
;
1253 MFI
->CreateFixedSpillStackObject(SlotSize
, SpillSlotOffset
);
1255 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
1256 // the frame register, we can delete it from CSI list and not have to worry
1257 // about avoiding it later.
1258 unsigned FPReg
= RegInfo
->getFrameRegister(MF
);
1259 for (unsigned i
= 0; i
< CSI
.size(); ++i
) {
1260 if (TRI
->regsOverlap(CSI
[i
].getReg(),FPReg
)) {
1261 CSI
.erase(CSI
.begin() + i
);
1267 // Assign slots for GPRs. It increases frame size.
1268 for (unsigned i
= CSI
.size(); i
!= 0; --i
) {
1269 unsigned Reg
= CSI
[i
- 1].getReg();
1271 if (!X86::GR64RegClass
.contains(Reg
) && !X86::GR32RegClass
.contains(Reg
))
1274 SpillSlotOffset
-= SlotSize
;
1275 CalleeSavedFrameSize
+= SlotSize
;
1277 int SlotIndex
= MFI
->CreateFixedSpillStackObject(SlotSize
, SpillSlotOffset
);
1278 CSI
[i
- 1].setFrameIdx(SlotIndex
);
1281 X86FI
->setCalleeSavedFrameSize(CalleeSavedFrameSize
);
1283 // Assign slots for XMMs.
1284 for (unsigned i
= CSI
.size(); i
!= 0; --i
) {
1285 unsigned Reg
= CSI
[i
- 1].getReg();
1286 if (X86::GR64RegClass
.contains(Reg
) || X86::GR32RegClass
.contains(Reg
))
1289 const TargetRegisterClass
*RC
= RegInfo
->getMinimalPhysRegClass(Reg
);
1291 SpillSlotOffset
-= std::abs(SpillSlotOffset
) % RC
->getAlignment();
1293 SpillSlotOffset
-= RC
->getSize();
1295 MFI
->CreateFixedSpillStackObject(RC
->getSize(), SpillSlotOffset
);
1296 CSI
[i
- 1].setFrameIdx(SlotIndex
);
1297 MFI
->ensureMaxAlignment(RC
->getAlignment());
1303 bool X86FrameLowering::spillCalleeSavedRegisters(
1304 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
1305 const std::vector
<CalleeSavedInfo
> &CSI
,
1306 const TargetRegisterInfo
*TRI
) const {
1307 DebugLoc DL
= MBB
.findDebugLoc(MI
);
1309 MachineFunction
&MF
= *MBB
.getParent();
1310 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1311 const X86Subtarget
&STI
= MF
.getTarget().getSubtarget
<X86Subtarget
>();
1313 // Push GPRs. It increases frame size.
1314 unsigned Opc
= STI
.is64Bit() ? X86::PUSH64r
: X86::PUSH32r
;
1315 for (unsigned i
= CSI
.size(); i
!= 0; --i
) {
1316 unsigned Reg
= CSI
[i
- 1].getReg();
1318 if (!X86::GR64RegClass
.contains(Reg
) && !X86::GR32RegClass
.contains(Reg
))
1320 // Add the callee-saved register as live-in. It's killed at the spill.
1323 BuildMI(MBB
, MI
, DL
, TII
.get(Opc
)).addReg(Reg
, RegState::Kill
)
1324 .setMIFlag(MachineInstr::FrameSetup
);
1327 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
1328 // It can be done by spilling XMMs to stack frame.
1329 for (unsigned i
= CSI
.size(); i
!= 0; --i
) {
1330 unsigned Reg
= CSI
[i
-1].getReg();
1331 if (X86::GR64RegClass
.contains(Reg
) ||
1332 X86::GR32RegClass
.contains(Reg
))
1334 // Add the callee-saved register as live-in. It's killed at the spill.
1336 const TargetRegisterClass
*RC
= TRI
->getMinimalPhysRegClass(Reg
);
1338 TII
.storeRegToStackSlot(MBB
, MI
, Reg
, true, CSI
[i
- 1].getFrameIdx(), RC
,
1341 MI
->setFlag(MachineInstr::FrameSetup
);
1348 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock
&MBB
,
1349 MachineBasicBlock::iterator MI
,
1350 const std::vector
<CalleeSavedInfo
> &CSI
,
1351 const TargetRegisterInfo
*TRI
) const {
1355 DebugLoc DL
= MBB
.findDebugLoc(MI
);
1357 MachineFunction
&MF
= *MBB
.getParent();
1358 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1359 const X86Subtarget
&STI
= MF
.getTarget().getSubtarget
<X86Subtarget
>();
1361 // Reload XMMs from stack frame.
1362 for (unsigned i
= 0, e
= CSI
.size(); i
!= e
; ++i
) {
1363 unsigned Reg
= CSI
[i
].getReg();
1364 if (X86::GR64RegClass
.contains(Reg
) ||
1365 X86::GR32RegClass
.contains(Reg
))
1368 const TargetRegisterClass
*RC
= TRI
->getMinimalPhysRegClass(Reg
);
1369 TII
.loadRegFromStackSlot(MBB
, MI
, Reg
, CSI
[i
].getFrameIdx(), RC
, TRI
);
1373 unsigned Opc
= STI
.is64Bit() ? X86::POP64r
: X86::POP32r
;
1374 for (unsigned i
= 0, e
= CSI
.size(); i
!= e
; ++i
) {
1375 unsigned Reg
= CSI
[i
].getReg();
1376 if (!X86::GR64RegClass
.contains(Reg
) &&
1377 !X86::GR32RegClass
.contains(Reg
))
1380 BuildMI(MBB
, MI
, DL
, TII
.get(Opc
), Reg
);
1386 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction
&MF
,
1387 RegScavenger
*RS
) const {
1388 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1389 const X86RegisterInfo
*RegInfo
=
1390 static_cast<const X86RegisterInfo
*>(MF
.getSubtarget().getRegisterInfo());
1391 unsigned SlotSize
= RegInfo
->getSlotSize();
1393 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
1394 int64_t TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
1396 if (TailCallReturnAddrDelta
< 0) {
1397 // create RETURNADDR area
1406 MFI
->CreateFixedObject(-TailCallReturnAddrDelta
,
1407 TailCallReturnAddrDelta
- SlotSize
, true);
1410 // Spill the BasePtr if it's used.
1411 if (RegInfo
->hasBasePointer(MF
))
1412 MF
.getRegInfo().setPhysRegUsed(RegInfo
->getBaseRegister());
1416 HasNestArgument(const MachineFunction
*MF
) {
1417 const Function
*F
= MF
->getFunction();
1418 for (Function::const_arg_iterator I
= F
->arg_begin(), E
= F
->arg_end();
1420 if (I
->hasNestAttr())
1426 /// GetScratchRegister - Get a temp register for performing work in the
1427 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
1428 /// and the properties of the function either one or two registers will be
1429 /// needed. Set primary to true for the first register, false for the second.
1431 GetScratchRegister(bool Is64Bit
, bool IsLP64
, const MachineFunction
&MF
, bool Primary
) {
1432 CallingConv::ID CallingConvention
= MF
.getFunction()->getCallingConv();
1435 if (CallingConvention
== CallingConv::HiPE
) {
1437 return Primary
? X86::R14
: X86::R13
;
1439 return Primary
? X86::EBX
: X86::EDI
;
1444 return Primary
? X86::R11
: X86::R12
;
1446 return Primary
? X86::R11D
: X86::R12D
;
1449 bool IsNested
= HasNestArgument(&MF
);
1451 if (CallingConvention
== CallingConv::X86_FastCall
||
1452 CallingConvention
== CallingConv::Fast
) {
1454 report_fatal_error("Segmented stacks does not support fastcall with "
1455 "nested function.");
1456 return Primary
? X86::EAX
: X86::ECX
;
1459 return Primary
? X86::EDX
: X86::EAX
;
1460 return Primary
? X86::ECX
: X86::EAX
;
1463 // The stack limit in the TCB is set to this many bytes above the actual stack
1465 static const uint64_t kSplitStackAvailable
= 256;
1468 X86FrameLowering::adjustForSegmentedStacks(MachineFunction
&MF
) const {
1469 MachineBasicBlock
&prologueMBB
= MF
.front();
1470 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1471 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1473 const X86Subtarget
&STI
= MF
.getTarget().getSubtarget
<X86Subtarget
>();
1474 bool Is64Bit
= STI
.is64Bit();
1475 const bool IsLP64
= STI
.isTarget64BitLP64();
1476 unsigned TlsReg
, TlsOffset
;
1479 unsigned ScratchReg
= GetScratchRegister(Is64Bit
, IsLP64
, MF
, true);
1480 assert(!MF
.getRegInfo().isLiveIn(ScratchReg
) &&
1481 "Scratch register is live-in");
1483 if (MF
.getFunction()->isVarArg())
1484 report_fatal_error("Segmented stacks do not support vararg functions.");
1485 if (!STI
.isTargetLinux() && !STI
.isTargetDarwin() && !STI
.isTargetWin32() &&
1486 !STI
.isTargetWin64() && !STI
.isTargetFreeBSD() &&
1487 !STI
.isTargetDragonFly())
1488 report_fatal_error("Segmented stacks not supported on this platform.");
1490 // Eventually StackSize will be calculated by a link-time pass; which will
1491 // also decide whether checking code needs to be injected into this particular
1493 StackSize
= MFI
->getStackSize();
1495 // Do not generate a prologue for functions with a stack of size zero
1499 MachineBasicBlock
*allocMBB
= MF
.CreateMachineBasicBlock();
1500 MachineBasicBlock
*checkMBB
= MF
.CreateMachineBasicBlock();
1501 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
1502 bool IsNested
= false;
1504 // We need to know if the function has a nest argument only in 64 bit mode.
1506 IsNested
= HasNestArgument(&MF
);
1508 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
1509 // allocMBB needs to be last (terminating) instruction.
1511 for (MachineBasicBlock::livein_iterator i
= prologueMBB
.livein_begin(),
1512 e
= prologueMBB
.livein_end(); i
!= e
; i
++) {
1513 allocMBB
->addLiveIn(*i
);
1514 checkMBB
->addLiveIn(*i
);
1518 allocMBB
->addLiveIn(IsLP64
? X86::R10
: X86::R10D
);
1520 MF
.push_front(allocMBB
);
1521 MF
.push_front(checkMBB
);
1523 // When the frame size is less than 256 we just compare the stack
1524 // boundary directly to the value of the stack pointer, per gcc.
1525 bool CompareStackPointer
= StackSize
< kSplitStackAvailable
;
1527 // Read the limit off the current stacklet off the stack_guard location.
1529 if (STI
.isTargetLinux()) {
1531 TlsOffset
= IsLP64
? 0x70 : 0x40;
1532 } else if (STI
.isTargetDarwin()) {
1534 TlsOffset
= 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
1535 } else if (STI
.isTargetWin64()) {
1537 TlsOffset
= 0x28; // pvArbitrary, reserved for application use
1538 } else if (STI
.isTargetFreeBSD()) {
1541 } else if (STI
.isTargetDragonFly()) {
1543 TlsOffset
= 0x20; // use tls_tcb.tcb_segstack
1545 report_fatal_error("Segmented stacks not supported on this platform.");
1548 if (CompareStackPointer
)
1549 ScratchReg
= IsLP64
? X86::RSP
: X86::ESP
;
1551 BuildMI(checkMBB
, DL
, TII
.get(IsLP64
? X86::LEA64r
: X86::LEA64_32r
), ScratchReg
).addReg(X86::RSP
)
1552 .addImm(1).addReg(0).addImm(-StackSize
).addReg(0);
1554 BuildMI(checkMBB
, DL
, TII
.get(IsLP64
? X86::CMP64rm
: X86::CMP32rm
)).addReg(ScratchReg
)
1555 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset
).addReg(TlsReg
);
1557 if (STI
.isTargetLinux()) {
1560 } else if (STI
.isTargetDarwin()) {
1562 TlsOffset
= 0x48 + 90*4;
1563 } else if (STI
.isTargetWin32()) {
1565 TlsOffset
= 0x14; // pvArbitrary, reserved for application use
1566 } else if (STI
.isTargetDragonFly()) {
1568 TlsOffset
= 0x10; // use tls_tcb.tcb_segstack
1569 } else if (STI
.isTargetFreeBSD()) {
1570 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
1572 report_fatal_error("Segmented stacks not supported on this platform.");
1575 if (CompareStackPointer
)
1576 ScratchReg
= X86::ESP
;
1578 BuildMI(checkMBB
, DL
, TII
.get(X86::LEA32r
), ScratchReg
).addReg(X86::ESP
)
1579 .addImm(1).addReg(0).addImm(-StackSize
).addReg(0);
1581 if (STI
.isTargetLinux() || STI
.isTargetWin32() || STI
.isTargetWin64() ||
1582 STI
.isTargetDragonFly()) {
1583 BuildMI(checkMBB
, DL
, TII
.get(X86::CMP32rm
)).addReg(ScratchReg
)
1584 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset
).addReg(TlsReg
);
1585 } else if (STI
.isTargetDarwin()) {
1587 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
1588 unsigned ScratchReg2
;
1590 if (CompareStackPointer
) {
1591 // The primary scratch register is available for holding the TLS offset.
1592 ScratchReg2
= GetScratchRegister(Is64Bit
, IsLP64
, MF
, true);
1593 SaveScratch2
= false;
1595 // Need to use a second register to hold the TLS offset
1596 ScratchReg2
= GetScratchRegister(Is64Bit
, IsLP64
, MF
, false);
1598 // Unfortunately, with fastcc the second scratch register may hold an
1600 SaveScratch2
= MF
.getRegInfo().isLiveIn(ScratchReg2
);
1603 // If Scratch2 is live-in then it needs to be saved.
1604 assert((!MF
.getRegInfo().isLiveIn(ScratchReg2
) || SaveScratch2
) &&
1605 "Scratch register is live-in and not saved");
1608 BuildMI(checkMBB
, DL
, TII
.get(X86::PUSH32r
))
1609 .addReg(ScratchReg2
, RegState::Kill
);
1611 BuildMI(checkMBB
, DL
, TII
.get(X86::MOV32ri
), ScratchReg2
)
1613 BuildMI(checkMBB
, DL
, TII
.get(X86::CMP32rm
))
1615 .addReg(ScratchReg2
).addImm(1).addReg(0)
1620 BuildMI(checkMBB
, DL
, TII
.get(X86::POP32r
), ScratchReg2
);
1624 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
1625 // It jumps to normal execution of the function body.
1626 BuildMI(checkMBB
, DL
, TII
.get(X86::JA_1
)).addMBB(&prologueMBB
);
1628 // On 32 bit we first push the arguments size and then the frame size. On 64
1629 // bit, we pass the stack frame size in r10 and the argument size in r11.
1631 // Functions with nested arguments use R10, so it needs to be saved across
1632 // the call to _morestack
1634 const unsigned RegAX
= IsLP64
? X86::RAX
: X86::EAX
;
1635 const unsigned Reg10
= IsLP64
? X86::R10
: X86::R10D
;
1636 const unsigned Reg11
= IsLP64
? X86::R11
: X86::R11D
;
1637 const unsigned MOVrr
= IsLP64
? X86::MOV64rr
: X86::MOV32rr
;
1638 const unsigned MOVri
= IsLP64
? X86::MOV64ri
: X86::MOV32ri
;
1641 BuildMI(allocMBB
, DL
, TII
.get(MOVrr
), RegAX
).addReg(Reg10
);
1643 BuildMI(allocMBB
, DL
, TII
.get(MOVri
), Reg10
)
1645 BuildMI(allocMBB
, DL
, TII
.get(MOVri
), Reg11
)
1646 .addImm(X86FI
->getArgumentStackSize());
1647 MF
.getRegInfo().setPhysRegUsed(Reg10
);
1648 MF
.getRegInfo().setPhysRegUsed(Reg11
);
1650 BuildMI(allocMBB
, DL
, TII
.get(X86::PUSHi32
))
1651 .addImm(X86FI
->getArgumentStackSize());
1652 BuildMI(allocMBB
, DL
, TII
.get(X86::PUSHi32
))
1656 // __morestack is in libgcc
1657 if (Is64Bit
&& MF
.getTarget().getCodeModel() == CodeModel::Large
) {
1658 // Under the large code model, we cannot assume that __morestack lives
1659 // within 2^31 bytes of the call site, so we cannot use pc-relative
1660 // addressing. We cannot perform the call via a temporary register,
1661 // as the rax register may be used to store the static chain, and all
1662 // other suitable registers may be either callee-save or used for
1663 // parameter passing. We cannot use the stack at this point either
1664 // because __morestack manipulates the stack directly.
1666 // To avoid these issues, perform an indirect call via a read-only memory
1667 // location containing the address.
1669 // This solution is not perfect, as it assumes that the .rodata section
1670 // is laid out within 2^31 bytes of each function body, but this seems
1671 // to be sufficient for JIT.
1672 BuildMI(allocMBB
, DL
, TII
.get(X86::CALL64m
))
1676 .addExternalSymbol("__morestack_addr")
1678 MF
.getMMI().setUsesMorestackAddr(true);
1681 BuildMI(allocMBB
, DL
, TII
.get(X86::CALL64pcrel32
))
1682 .addExternalSymbol("__morestack");
1684 BuildMI(allocMBB
, DL
, TII
.get(X86::CALLpcrel32
))
1685 .addExternalSymbol("__morestack");
1689 BuildMI(allocMBB
, DL
, TII
.get(X86::MORESTACK_RET_RESTORE_R10
));
1691 BuildMI(allocMBB
, DL
, TII
.get(X86::MORESTACK_RET
));
1693 allocMBB
->addSuccessor(&prologueMBB
);
1695 checkMBB
->addSuccessor(allocMBB
);
1696 checkMBB
->addSuccessor(&prologueMBB
);
1703 /// Erlang programs may need a special prologue to handle the stack size they
1704 /// might need at runtime. That is because Erlang/OTP does not implement a C
1705 /// stack but uses a custom implementation of hybrid stack/heap architecture.
1706 /// (for more information see Eric Stenman's Ph.D. thesis:
1707 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
1710 /// temp0 = sp - MaxStack
1711 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
1715 /// call inc_stack # doubles the stack space
1716 /// temp0 = sp - MaxStack
1717 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
1718 void X86FrameLowering::adjustForHiPEPrologue(MachineFunction
&MF
) const {
1719 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1720 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1721 const unsigned SlotSize
=
1722 static_cast<const X86RegisterInfo
*>(MF
.getSubtarget().getRegisterInfo())
1724 const X86Subtarget
&STI
= MF
.getTarget().getSubtarget
<X86Subtarget
>();
1725 const bool Is64Bit
= STI
.is64Bit();
1726 const bool IsLP64
= STI
.isTarget64BitLP64();
1728 // HiPE-specific values
1729 const unsigned HipeLeafWords
= 24;
1730 const unsigned CCRegisteredArgs
= Is64Bit
? 6 : 5;
1731 const unsigned Guaranteed
= HipeLeafWords
* SlotSize
;
1732 unsigned CallerStkArity
= MF
.getFunction()->arg_size() > CCRegisteredArgs
?
1733 MF
.getFunction()->arg_size() - CCRegisteredArgs
: 0;
1734 unsigned MaxStack
= MFI
->getStackSize() + CallerStkArity
*SlotSize
+ SlotSize
;
1736 assert(STI
.isTargetLinux() &&
1737 "HiPE prologue is only supported on Linux operating systems.");
1739 // Compute the largest caller's frame that is needed to fit the callees'
1740 // frames. This 'MaxStack' is computed from:
1742 // a) the fixed frame size, which is the space needed for all spilled temps,
1743 // b) outgoing on-stack parameter areas, and
1744 // c) the minimum stack space this function needs to make available for the
1745 // functions it calls (a tunable ABI property).
1746 if (MFI
->hasCalls()) {
1747 unsigned MoreStackForCalls
= 0;
1749 for (MachineFunction::iterator MBBI
= MF
.begin(), MBBE
= MF
.end();
1750 MBBI
!= MBBE
; ++MBBI
)
1751 for (MachineBasicBlock::iterator MI
= MBBI
->begin(), ME
= MBBI
->end();
1756 // Get callee operand.
1757 const MachineOperand
&MO
= MI
->getOperand(0);
1759 // Only take account of global function calls (no closures etc.).
1763 const Function
*F
= dyn_cast
<Function
>(MO
.getGlobal());
1767 // Do not update 'MaxStack' for primitive and built-in functions
1768 // (encoded with names either starting with "erlang."/"bif_" or not
1769 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
1770 // "_", such as the BIF "suspend_0") as they are executed on another
1772 if (F
->getName().find("erlang.") != StringRef::npos
||
1773 F
->getName().find("bif_") != StringRef::npos
||
1774 F
->getName().find_first_of("._") == StringRef::npos
)
1777 unsigned CalleeStkArity
=
1778 F
->arg_size() > CCRegisteredArgs
? F
->arg_size()-CCRegisteredArgs
: 0;
1779 if (HipeLeafWords
- 1 > CalleeStkArity
)
1780 MoreStackForCalls
= std::max(MoreStackForCalls
,
1781 (HipeLeafWords
- 1 - CalleeStkArity
) * SlotSize
);
1783 MaxStack
+= MoreStackForCalls
;
1786 // If the stack frame needed is larger than the guaranteed then runtime checks
1787 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
1788 if (MaxStack
> Guaranteed
) {
1789 MachineBasicBlock
&prologueMBB
= MF
.front();
1790 MachineBasicBlock
*stackCheckMBB
= MF
.CreateMachineBasicBlock();
1791 MachineBasicBlock
*incStackMBB
= MF
.CreateMachineBasicBlock();
1793 for (MachineBasicBlock::livein_iterator I
= prologueMBB
.livein_begin(),
1794 E
= prologueMBB
.livein_end(); I
!= E
; I
++) {
1795 stackCheckMBB
->addLiveIn(*I
);
1796 incStackMBB
->addLiveIn(*I
);
1799 MF
.push_front(incStackMBB
);
1800 MF
.push_front(stackCheckMBB
);
1802 unsigned ScratchReg
, SPReg
, PReg
, SPLimitOffset
;
1803 unsigned LEAop
, CMPop
, CALLop
;
1807 LEAop
= X86::LEA64r
;
1808 CMPop
= X86::CMP64rm
;
1809 CALLop
= X86::CALL64pcrel32
;
1810 SPLimitOffset
= 0x90;
1814 LEAop
= X86::LEA32r
;
1815 CMPop
= X86::CMP32rm
;
1816 CALLop
= X86::CALLpcrel32
;
1817 SPLimitOffset
= 0x4c;
1820 ScratchReg
= GetScratchRegister(Is64Bit
, IsLP64
, MF
, true);
1821 assert(!MF
.getRegInfo().isLiveIn(ScratchReg
) &&
1822 "HiPE prologue scratch register is live-in");
1824 // Create new MBB for StackCheck:
1825 addRegOffset(BuildMI(stackCheckMBB
, DL
, TII
.get(LEAop
), ScratchReg
),
1826 SPReg
, false, -MaxStack
);
1827 // SPLimitOffset is in a fixed heap location (pointed by BP).
1828 addRegOffset(BuildMI(stackCheckMBB
, DL
, TII
.get(CMPop
))
1829 .addReg(ScratchReg
), PReg
, false, SPLimitOffset
);
1830 BuildMI(stackCheckMBB
, DL
, TII
.get(X86::JAE_1
)).addMBB(&prologueMBB
);
1832 // Create new MBB for IncStack:
1833 BuildMI(incStackMBB
, DL
, TII
.get(CALLop
)).
1834 addExternalSymbol("inc_stack_0");
1835 addRegOffset(BuildMI(incStackMBB
, DL
, TII
.get(LEAop
), ScratchReg
),
1836 SPReg
, false, -MaxStack
);
1837 addRegOffset(BuildMI(incStackMBB
, DL
, TII
.get(CMPop
))
1838 .addReg(ScratchReg
), PReg
, false, SPLimitOffset
);
1839 BuildMI(incStackMBB
, DL
, TII
.get(X86::JLE_1
)).addMBB(incStackMBB
);
1841 stackCheckMBB
->addSuccessor(&prologueMBB
, 99);
1842 stackCheckMBB
->addSuccessor(incStackMBB
, 1);
1843 incStackMBB
->addSuccessor(&prologueMBB
, 99);
1844 incStackMBB
->addSuccessor(incStackMBB
, 1);
1851 bool X86FrameLowering::
1852 convertArgMovsToPushes(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
1853 MachineBasicBlock::iterator I
, uint64_t Amount
) const {
1854 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1855 const X86RegisterInfo
&RegInfo
= *static_cast<const X86RegisterInfo
*>(
1856 MF
.getSubtarget().getRegisterInfo());
1857 unsigned StackPtr
= RegInfo
.getStackRegister();
1859 // Scan the call setup sequence for the pattern we're looking for.
1860 // We only handle a simple case now - a sequence of MOV32mi or MOV32mr
1861 // instructions, that push a sequence of 32-bit values onto the stack, with
1863 std::map
<int64_t, MachineBasicBlock::iterator
> MovMap
;
1865 int Opcode
= I
->getOpcode();
1866 if (Opcode
!= X86::MOV32mi
&& Opcode
!= X86::MOV32mr
)
1869 // We only want movs of the form:
1870 // movl imm/r32, k(%ecx)
1871 // If we run into something else, bail
1872 // Note that AddrBaseReg may, counterintuitively, not be a register...
1873 if (!I
->getOperand(X86::AddrBaseReg
).isReg() ||
1874 (I
->getOperand(X86::AddrBaseReg
).getReg() != StackPtr
) ||
1875 !I
->getOperand(X86::AddrScaleAmt
).isImm() ||
1876 (I
->getOperand(X86::AddrScaleAmt
).getImm() != 1) ||
1877 (I
->getOperand(X86::AddrIndexReg
).getReg() != X86::NoRegister
) ||
1878 (I
->getOperand(X86::AddrSegmentReg
).getReg() != X86::NoRegister
) ||
1879 !I
->getOperand(X86::AddrDisp
).isImm())
1882 int64_t StackDisp
= I
->getOperand(X86::AddrDisp
).getImm();
1884 // We don't want to consider the unaligned case.
1888 // If the same stack slot is being filled twice, something's fishy.
1889 if (!MovMap
.insert(std::pair
<int64_t, MachineInstr
*>(StackDisp
, I
)).second
)
1893 } while (I
!= MBB
.end());
1895 // We now expect the end of the sequence - a call and a stack adjust.
1900 MachineBasicBlock::iterator Call
= I
;
1901 if ((++I
)->getOpcode() != TII
.getCallFrameDestroyOpcode())
1904 // Now, go through the map, and see that we don't have any gaps,
1905 // but only a series of 32-bit MOVs.
1906 // Since std::map provides ordered iteration, the original order
1907 // of the MOVs doesn't matter.
1908 int64_t ExpectedDist
= 0;
1909 for (auto MMI
= MovMap
.begin(), MME
= MovMap
.end(); MMI
!= MME
;
1910 ++MMI
, ExpectedDist
+= 4)
1911 if (MMI
->first
!= ExpectedDist
)
1914 // Ok, everything looks fine. Do the transformation.
1915 DebugLoc DL
= I
->getDebugLoc();
1917 // It's possible the original stack adjustment amount was larger than
1918 // that done by the pushes. If so, we still need a SUB.
1919 Amount
-= ExpectedDist
;
1921 MachineInstr
* Sub
= BuildMI(MBB
, Call
, DL
,
1922 TII
.get(getSUBriOpcode(false, Amount
)), StackPtr
)
1923 .addReg(StackPtr
).addImm(Amount
);
1924 Sub
->getOperand(3).setIsDead();
1927 // Now, iterate through the map in reverse order, and replace the movs
1928 // with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses.
1929 for (auto MMI
= MovMap
.rbegin(), MME
= MovMap
.rend(); MMI
!= MME
; ++MMI
) {
1930 MachineBasicBlock::iterator MOV
= MMI
->second
;
1931 MachineOperand PushOp
= MOV
->getOperand(X86::AddrNumOperands
);
1933 // Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size
1934 int PushOpcode
= X86::PUSH32r
;
1935 if (MOV
->getOpcode() == X86::MOV32mi
)
1936 PushOpcode
= getPUSHiOpcode(false, PushOp
);
1938 BuildMI(MBB
, Call
, DL
, TII
.get(PushOpcode
)).addOperand(PushOp
);
1945 void X86FrameLowering::
1946 eliminateCallFramePseudoInstr(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
1947 MachineBasicBlock::iterator I
) const {
1948 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1949 const X86RegisterInfo
&RegInfo
= *static_cast<const X86RegisterInfo
*>(
1950 MF
.getSubtarget().getRegisterInfo());
1951 unsigned StackPtr
= RegInfo
.getStackRegister();
1952 bool reserveCallFrame
= hasReservedCallFrame(MF
);
1953 int Opcode
= I
->getOpcode();
1954 bool isDestroy
= Opcode
== TII
.getCallFrameDestroyOpcode();
1955 const X86Subtarget
&STI
= MF
.getTarget().getSubtarget
<X86Subtarget
>();
1956 bool IsLP64
= STI
.isTarget64BitLP64();
1957 DebugLoc DL
= I
->getDebugLoc();
1958 uint64_t Amount
= !reserveCallFrame
? I
->getOperand(0).getImm() : 0;
1959 uint64_t CalleeAmt
= isDestroy
? I
->getOperand(1).getImm() : 0;
1962 if (!reserveCallFrame
) {
1963 // If the stack pointer can be changed after prologue, turn the
1964 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
1965 // adjcallstackdown instruction into 'add ESP, <amt>'
1969 // We need to keep the stack aligned properly. To do this, we round the
1970 // amount of space needed for the outgoing arguments up to the next
1971 // alignment boundary.
1972 unsigned StackAlign
= MF
.getTarget()
1974 ->getFrameLowering()
1975 ->getStackAlignment();
1976 Amount
= (Amount
+ StackAlign
- 1) / StackAlign
* StackAlign
;
1978 MachineInstr
*New
= nullptr;
1979 if (Opcode
== TII
.getCallFrameSetupOpcode()) {
1980 // Try to convert movs to the stack into pushes.
1981 // We currently only look for a pattern that appears in 32-bit
1982 // calling conventions.
1983 if (!IsLP64
&& convertArgMovsToPushes(MF
, MBB
, I
, Amount
))
1986 New
= BuildMI(MF
, DL
, TII
.get(getSUBriOpcode(IsLP64
, Amount
)),
1991 assert(Opcode
== TII
.getCallFrameDestroyOpcode());
1993 // Factor out the amount the callee already popped.
1994 Amount
-= CalleeAmt
;
1997 unsigned Opc
= getADDriOpcode(IsLP64
, Amount
);
1998 New
= BuildMI(MF
, DL
, TII
.get(Opc
), StackPtr
)
1999 .addReg(StackPtr
).addImm(Amount
);
2004 // The EFLAGS implicit def is dead.
2005 New
->getOperand(3).setIsDead();
2007 // Replace the pseudo instruction with a new instruction.
2014 if (Opcode
== TII
.getCallFrameDestroyOpcode() && CalleeAmt
) {
2015 // If we are performing frame pointer elimination and if the callee pops
2016 // something off the stack pointer, add it back. We do this until we have
2017 // more advanced stack pointer tracking ability.
2018 unsigned Opc
= getSUBriOpcode(IsLP64
, CalleeAmt
);
2019 MachineInstr
*New
= BuildMI(MF
, DL
, TII
.get(Opc
), StackPtr
)
2020 .addReg(StackPtr
).addImm(CalleeAmt
);
2022 // The EFLAGS implicit def is dead.
2023 New
->getOperand(3).setIsDead();
2025 // We are not tracking the stack pointer adjustment by the callee, so make
2026 // sure we restore the stack pointer immediately after the call, there may
2027 // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
2028 MachineBasicBlock::iterator B
= MBB
.begin();
2029 while (I
!= B
&& !std::prev(I
)->isCall())