]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file defines the pass which converts floating point instructions from | |
11 | // pseudo registers into register stack instructions. This pass uses live | |
12 | // variable information to indicate where the FPn registers are used and their | |
13 | // lifetimes. | |
14 | // | |
15 | // The x87 hardware tracks liveness of the stack registers, so it is necessary | |
16 | // to implement exact liveness tracking between basic blocks. The CFG edges are | |
17 | // partitioned into bundles where the same FP registers must be live in | |
18 | // identical stack positions. Instructions are inserted at the end of each basic | |
19 | // block to rearrange the live registers to match the outgoing bundle. | |
20 | // | |
21 | // This approach avoids splitting critical edges at the potential cost of more | |
22 | // live register shuffling instructions when critical edges are present. | |
23 | // | |
24 | //===----------------------------------------------------------------------===// | |
25 | ||
223e47cc LB |
26 | #include "X86.h" |
27 | #include "X86InstrInfo.h" | |
223e47cc | 28 | #include "llvm/ADT/DepthFirstIterator.h" |
970d7e83 | 29 | #include "llvm/ADT/STLExtras.h" |
223e47cc | 30 | #include "llvm/ADT/SmallPtrSet.h" |
1a4d82fc | 31 | #include "llvm/ADT/SmallSet.h" |
223e47cc LB |
32 | #include "llvm/ADT/SmallVector.h" |
33 | #include "llvm/ADT/Statistic.h" | |
223e47cc LB |
34 | #include "llvm/CodeGen/EdgeBundles.h" |
35 | #include "llvm/CodeGen/MachineFunctionPass.h" | |
36 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |
37 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |
1a4d82fc | 38 | #include "llvm/CodeGen/LivePhysRegs.h" |
223e47cc | 39 | #include "llvm/CodeGen/Passes.h" |
970d7e83 | 40 | #include "llvm/IR/InlineAsm.h" |
223e47cc LB |
41 | #include "llvm/Support/Debug.h" |
42 | #include "llvm/Support/ErrorHandling.h" | |
43 | #include "llvm/Support/raw_ostream.h" | |
44 | #include "llvm/Target/TargetInstrInfo.h" | |
45 | #include "llvm/Target/TargetMachine.h" | |
1a4d82fc | 46 | #include "llvm/Target/TargetSubtargetInfo.h" |
223e47cc | 47 | #include <algorithm> |
1a4d82fc | 48 | #include <bitset> |
223e47cc LB |
49 | using namespace llvm; |
50 | ||
1a4d82fc JJ |
51 | #define DEBUG_TYPE "x86-codegen" |
52 | ||
223e47cc LB |
53 | STATISTIC(NumFXCH, "Number of fxch instructions inserted"); |
54 | STATISTIC(NumFP , "Number of floating point instructions"); | |
55 | ||
56 | namespace { | |
1a4d82fc JJ |
57 | const unsigned ScratchFPReg = 7; |
58 | ||
223e47cc LB |
59 | struct FPS : public MachineFunctionPass { |
60 | static char ID; | |
61 | FPS() : MachineFunctionPass(ID) { | |
62 | initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); | |
63 | // This is really only to keep valgrind quiet. | |
64 | // The logic in isLive() is too much for it. | |
65 | memset(Stack, 0, sizeof(Stack)); | |
66 | memset(RegMap, 0, sizeof(RegMap)); | |
67 | } | |
68 | ||
1a4d82fc | 69 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
223e47cc LB |
70 | AU.setPreservesCFG(); |
71 | AU.addRequired<EdgeBundles>(); | |
72 | AU.addPreservedID(MachineLoopInfoID); | |
73 | AU.addPreservedID(MachineDominatorsID); | |
74 | MachineFunctionPass::getAnalysisUsage(AU); | |
75 | } | |
76 | ||
1a4d82fc | 77 | bool runOnMachineFunction(MachineFunction &MF) override; |
223e47cc | 78 | |
1a4d82fc | 79 | const char *getPassName() const override { return "X86 FP Stackifier"; } |
223e47cc LB |
80 | |
81 | private: | |
82 | const TargetInstrInfo *TII; // Machine instruction info. | |
83 | ||
84 | // Two CFG edges are related if they leave the same block, or enter the same | |
85 | // block. The transitive closure of an edge under this relation is a | |
86 | // LiveBundle. It represents a set of CFG edges where the live FP stack | |
87 | // registers must be allocated identically in the x87 stack. | |
88 | // | |
89 | // A LiveBundle is usually all the edges leaving a block, or all the edges | |
90 | // entering a block, but it can contain more edges if critical edges are | |
91 | // present. | |
92 | // | |
93 | // The set of live FP registers in a LiveBundle is calculated by bundleCFG, | |
94 | // but the exact mapping of FP registers to stack slots is fixed later. | |
95 | struct LiveBundle { | |
96 | // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. | |
97 | unsigned Mask; | |
98 | ||
99 | // Number of pre-assigned live registers in FixStack. This is 0 when the | |
100 | // stack order has not yet been fixed. | |
101 | unsigned FixCount; | |
102 | ||
103 | // Assigned stack order for live-in registers. | |
104 | // FixStack[i] == getStackEntry(i) for all i < FixCount. | |
105 | unsigned char FixStack[8]; | |
106 | ||
107 | LiveBundle() : Mask(0), FixCount(0) {} | |
108 | ||
109 | // Have the live registers been assigned a stack order yet? | |
110 | bool isFixed() const { return !Mask || FixCount; } | |
111 | }; | |
112 | ||
113 | // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges | |
114 | // with no live FP registers. | |
115 | SmallVector<LiveBundle, 8> LiveBundles; | |
116 | ||
117 | // The edge bundle analysis provides indices into the LiveBundles vector. | |
118 | EdgeBundles *Bundles; | |
119 | ||
120 | // Return a bitmask of FP registers in block's live-in list. | |
970d7e83 | 121 | static unsigned calcLiveInMask(MachineBasicBlock *MBB) { |
223e47cc LB |
122 | unsigned Mask = 0; |
123 | for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), | |
124 | E = MBB->livein_end(); I != E; ++I) { | |
1a4d82fc JJ |
125 | unsigned Reg = *I; |
126 | if (Reg < X86::FP0 || Reg > X86::FP6) | |
127 | continue; | |
128 | Mask |= 1 << (Reg - X86::FP0); | |
223e47cc LB |
129 | } |
130 | return Mask; | |
131 | } | |
132 | ||
133 | // Partition all the CFG edges into LiveBundles. | |
134 | void bundleCFG(MachineFunction &MF); | |
135 | ||
136 | MachineBasicBlock *MBB; // Current basic block | |
137 | ||
138 | // The hardware keeps track of how many FP registers are live, so we have | |
139 | // to model that exactly. Usually, each live register corresponds to an | |
140 | // FP<n> register, but when dealing with calls, returns, and inline | |
141 | // assembly, it is sometimes necessary to have live scratch registers. | |
142 | unsigned Stack[8]; // FP<n> Registers in each stack slot... | |
143 | unsigned StackTop; // The current top of the FP stack. | |
144 | ||
145 | enum { | |
1a4d82fc | 146 | NumFPRegs = 8 // Including scratch pseudo-registers. |
223e47cc LB |
147 | }; |
148 | ||
149 | // For each live FP<n> register, point to its Stack[] entry. | |
150 | // The first entries correspond to FP0-FP6, the rest are scratch registers | |
151 | // used when we need slightly different live registers than what the | |
152 | // register allocator thinks. | |
153 | unsigned RegMap[NumFPRegs]; | |
154 | ||
223e47cc LB |
155 | // Set up our stack model to match the incoming registers to MBB. |
156 | void setupBlockStack(); | |
157 | ||
158 | // Shuffle live registers to match the expectations of successor blocks. | |
159 | void finishBlockStack(); | |
160 | ||
161 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |
162 | void dumpStack() const { | |
163 | dbgs() << "Stack contents:"; | |
164 | for (unsigned i = 0; i != StackTop; ++i) { | |
165 | dbgs() << " FP" << Stack[i]; | |
166 | assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); | |
167 | } | |
223e47cc LB |
168 | } |
169 | #endif | |
170 | ||
171 | /// getSlot - Return the stack slot number a particular register number is | |
172 | /// in. | |
173 | unsigned getSlot(unsigned RegNo) const { | |
174 | assert(RegNo < NumFPRegs && "Regno out of range!"); | |
175 | return RegMap[RegNo]; | |
176 | } | |
177 | ||
178 | /// isLive - Is RegNo currently live in the stack? | |
179 | bool isLive(unsigned RegNo) const { | |
180 | unsigned Slot = getSlot(RegNo); | |
181 | return Slot < StackTop && Stack[Slot] == RegNo; | |
182 | } | |
183 | ||
223e47cc LB |
184 | /// getStackEntry - Return the X86::FP<n> register in register ST(i). |
185 | unsigned getStackEntry(unsigned STi) const { | |
186 | if (STi >= StackTop) | |
187 | report_fatal_error("Access past stack top!"); | |
188 | return Stack[StackTop-1-STi]; | |
189 | } | |
190 | ||
191 | /// getSTReg - Return the X86::ST(i) register which contains the specified | |
192 | /// FP<RegNo> register. | |
193 | unsigned getSTReg(unsigned RegNo) const { | |
194 | return StackTop - 1 - getSlot(RegNo) + X86::ST0; | |
195 | } | |
196 | ||
197 | // pushReg - Push the specified FP<n> register onto the stack. | |
198 | void pushReg(unsigned Reg) { | |
199 | assert(Reg < NumFPRegs && "Register number out of range!"); | |
200 | if (StackTop >= 8) | |
201 | report_fatal_error("Stack overflow!"); | |
202 | Stack[StackTop] = Reg; | |
203 | RegMap[Reg] = StackTop++; | |
204 | } | |
205 | ||
206 | bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } | |
207 | void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { | |
208 | DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); | |
209 | if (isAtTop(RegNo)) return; | |
210 | ||
211 | unsigned STReg = getSTReg(RegNo); | |
212 | unsigned RegOnTop = getStackEntry(0); | |
213 | ||
214 | // Swap the slots the regs are in. | |
215 | std::swap(RegMap[RegNo], RegMap[RegOnTop]); | |
216 | ||
217 | // Swap stack slot contents. | |
218 | if (RegMap[RegOnTop] >= StackTop) | |
219 | report_fatal_error("Access past stack top!"); | |
220 | std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); | |
221 | ||
222 | // Emit an fxch to update the runtime processors version of the state. | |
223 | BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); | |
224 | ++NumFXCH; | |
225 | } | |
226 | ||
227 | void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { | |
228 | DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); | |
229 | unsigned STReg = getSTReg(RegNo); | |
230 | pushReg(AsReg); // New register on top of stack | |
231 | ||
232 | BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); | |
233 | } | |
234 | ||
223e47cc LB |
235 | /// popStackAfter - Pop the current value off of the top of the FP stack |
236 | /// after the specified instruction. | |
237 | void popStackAfter(MachineBasicBlock::iterator &I); | |
238 | ||
239 | /// freeStackSlotAfter - Free the specified register from the register | |
240 | /// stack, so that it is no longer in a register. If the register is | |
241 | /// currently at the top of the stack, we just pop the current instruction, | |
242 | /// otherwise we store the current top-of-stack into the specified slot, | |
243 | /// then pop the top of stack. | |
244 | void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); | |
245 | ||
246 | /// freeStackSlotBefore - Just the pop, no folding. Return the inserted | |
247 | /// instruction. | |
248 | MachineBasicBlock::iterator | |
249 | freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo); | |
250 | ||
251 | /// Adjust the live registers to be the set in Mask. | |
252 | void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); | |
253 | ||
254 | /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is | |
255 | /// st(0), FP reg FixStack[1] is st(1) etc. | |
256 | void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, | |
257 | MachineBasicBlock::iterator I); | |
258 | ||
259 | bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); | |
260 | ||
1a4d82fc | 261 | void handleCall(MachineBasicBlock::iterator &I); |
223e47cc LB |
262 | void handleZeroArgFP(MachineBasicBlock::iterator &I); |
263 | void handleOneArgFP(MachineBasicBlock::iterator &I); | |
264 | void handleOneArgFPRW(MachineBasicBlock::iterator &I); | |
265 | void handleTwoArgFP(MachineBasicBlock::iterator &I); | |
266 | void handleCompareFP(MachineBasicBlock::iterator &I); | |
267 | void handleCondMovFP(MachineBasicBlock::iterator &I); | |
268 | void handleSpecialFP(MachineBasicBlock::iterator &I); | |
269 | ||
270 | // Check if a COPY instruction is using FP registers. | |
970d7e83 | 271 | static bool isFPCopy(MachineInstr *MI) { |
223e47cc LB |
272 | unsigned DstReg = MI->getOperand(0).getReg(); |
273 | unsigned SrcReg = MI->getOperand(1).getReg(); | |
274 | ||
275 | return X86::RFP80RegClass.contains(DstReg) || | |
276 | X86::RFP80RegClass.contains(SrcReg); | |
277 | } | |
1a4d82fc JJ |
278 | |
279 | void setKillFlags(MachineBasicBlock &MBB) const; | |
223e47cc LB |
280 | }; |
281 | char FPS::ID = 0; | |
282 | } | |
283 | ||
284 | FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } | |
285 | ||
286 | /// getFPReg - Return the X86::FPx register number for the specified operand. | |
287 | /// For example, this returns 3 for X86::FP3. | |
288 | static unsigned getFPReg(const MachineOperand &MO) { | |
289 | assert(MO.isReg() && "Expected an FP register!"); | |
290 | unsigned Reg = MO.getReg(); | |
291 | assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); | |
292 | return Reg - X86::FP0; | |
293 | } | |
294 | ||
295 | /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP | |
296 | /// register references into FP stack references. | |
297 | /// | |
298 | bool FPS::runOnMachineFunction(MachineFunction &MF) { | |
299 | // We only need to run this pass if there are any FP registers used in this | |
300 | // function. If it is all integer, there is nothing for us to do! | |
301 | bool FPIsUsed = false; | |
302 | ||
303 | assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); | |
304 | for (unsigned i = 0; i <= 6; ++i) | |
305 | if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { | |
306 | FPIsUsed = true; | |
307 | break; | |
308 | } | |
309 | ||
310 | // Early exit. | |
311 | if (!FPIsUsed) return false; | |
312 | ||
313 | Bundles = &getAnalysis<EdgeBundles>(); | |
1a4d82fc | 314 | TII = MF.getSubtarget().getInstrInfo(); |
223e47cc LB |
315 | |
316 | // Prepare cross-MBB liveness. | |
317 | bundleCFG(MF); | |
318 | ||
319 | StackTop = 0; | |
320 | ||
321 | // Process the function in depth first order so that we process at least one | |
322 | // of the predecessors for every reachable block in the function. | |
323 | SmallPtrSet<MachineBasicBlock*, 8> Processed; | |
324 | MachineBasicBlock *Entry = MF.begin(); | |
325 | ||
326 | bool Changed = false; | |
1a4d82fc JJ |
327 | for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed)) |
328 | Changed |= processBasicBlock(MF, *BB); | |
223e47cc LB |
329 | |
330 | // Process any unreachable blocks in arbitrary order now. | |
331 | if (MF.size() != Processed.size()) | |
332 | for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) | |
85aaf69f | 333 | if (Processed.insert(BB).second) |
223e47cc LB |
334 | Changed |= processBasicBlock(MF, *BB); |
335 | ||
336 | LiveBundles.clear(); | |
337 | ||
338 | return Changed; | |
339 | } | |
340 | ||
341 | /// bundleCFG - Scan all the basic blocks to determine consistent live-in and | |
342 | /// live-out sets for the FP registers. Consistent means that the set of | |
343 | /// registers live-out from a block is identical to the live-in set of all | |
344 | /// successors. This is not enforced by the normal live-in lists since | |
345 | /// registers may be implicitly defined, or not used by all successors. | |
346 | void FPS::bundleCFG(MachineFunction &MF) { | |
347 | assert(LiveBundles.empty() && "Stale data in LiveBundles"); | |
348 | LiveBundles.resize(Bundles->getNumBundles()); | |
349 | ||
350 | // Gather the actual live-in masks for all MBBs. | |
351 | for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { | |
352 | MachineBasicBlock *MBB = I; | |
353 | const unsigned Mask = calcLiveInMask(MBB); | |
354 | if (!Mask) | |
355 | continue; | |
356 | // Update MBB ingoing bundle mask. | |
357 | LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask; | |
358 | } | |
359 | } | |
360 | ||
361 | /// processBasicBlock - Loop over all of the instructions in the basic block, | |
362 | /// transforming FP instructions into their stack form. | |
363 | /// | |
364 | bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { | |
365 | bool Changed = false; | |
366 | MBB = &BB; | |
223e47cc | 367 | |
1a4d82fc | 368 | setKillFlags(BB); |
223e47cc LB |
369 | setupBlockStack(); |
370 | ||
371 | for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { | |
372 | MachineInstr *MI = I; | |
373 | uint64_t Flags = MI->getDesc().TSFlags; | |
374 | ||
375 | unsigned FPInstClass = Flags & X86II::FPTypeMask; | |
376 | if (MI->isInlineAsm()) | |
377 | FPInstClass = X86II::SpecialFP; | |
378 | ||
379 | if (MI->isCopy() && isFPCopy(MI)) | |
380 | FPInstClass = X86II::SpecialFP; | |
381 | ||
382 | if (MI->isImplicitDef() && | |
383 | X86::RFP80RegClass.contains(MI->getOperand(0).getReg())) | |
384 | FPInstClass = X86II::SpecialFP; | |
385 | ||
1a4d82fc JJ |
386 | if (MI->isCall()) |
387 | FPInstClass = X86II::SpecialFP; | |
388 | ||
223e47cc LB |
389 | if (FPInstClass == X86II::NotFP) |
390 | continue; // Efficiently ignore non-fp insts! | |
391 | ||
1a4d82fc | 392 | MachineInstr *PrevMI = nullptr; |
223e47cc | 393 | if (I != BB.begin()) |
1a4d82fc | 394 | PrevMI = std::prev(I); |
223e47cc LB |
395 | |
396 | ++NumFP; // Keep track of # of pseudo instrs | |
397 | DEBUG(dbgs() << "\nFPInst:\t" << *MI); | |
398 | ||
399 | // Get dead variables list now because the MI pointer may be deleted as part | |
400 | // of processing! | |
401 | SmallVector<unsigned, 8> DeadRegs; | |
402 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |
403 | const MachineOperand &MO = MI->getOperand(i); | |
404 | if (MO.isReg() && MO.isDead()) | |
405 | DeadRegs.push_back(MO.getReg()); | |
406 | } | |
407 | ||
408 | switch (FPInstClass) { | |
409 | case X86II::ZeroArgFP: handleZeroArgFP(I); break; | |
410 | case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) | |
411 | case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) | |
412 | case X86II::TwoArgFP: handleTwoArgFP(I); break; | |
413 | case X86II::CompareFP: handleCompareFP(I); break; | |
414 | case X86II::CondMovFP: handleCondMovFP(I); break; | |
415 | case X86II::SpecialFP: handleSpecialFP(I); break; | |
416 | default: llvm_unreachable("Unknown FP Type!"); | |
417 | } | |
418 | ||
419 | // Check to see if any of the values defined by this instruction are dead | |
420 | // after definition. If so, pop them. | |
421 | for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { | |
422 | unsigned Reg = DeadRegs[i]; | |
1a4d82fc JJ |
423 | // Check if Reg is live on the stack. An inline-asm register operand that |
424 | // is in the clobber list and marked dead might not be live on the stack. | |
425 | if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) { | |
223e47cc LB |
426 | DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); |
427 | freeStackSlotAfter(I, Reg-X86::FP0); | |
428 | } | |
429 | } | |
430 | ||
431 | // Print out all of the instructions expanded to if -debug | |
432 | DEBUG( | |
433 | MachineBasicBlock::iterator PrevI(PrevMI); | |
434 | if (I == PrevI) { | |
435 | dbgs() << "Just deleted pseudo instruction\n"; | |
436 | } else { | |
437 | MachineBasicBlock::iterator Start = I; | |
438 | // Rewind to first instruction newly inserted. | |
1a4d82fc | 439 | while (Start != BB.begin() && std::prev(Start) != PrevI) --Start; |
223e47cc LB |
440 | dbgs() << "Inserted instructions:\n\t"; |
441 | Start->print(dbgs(), &MF.getTarget()); | |
1a4d82fc | 442 | while (++Start != std::next(I)) {} |
223e47cc LB |
443 | } |
444 | dumpStack(); | |
445 | ); | |
446 | (void)PrevMI; | |
447 | ||
448 | Changed = true; | |
449 | } | |
450 | ||
451 | finishBlockStack(); | |
452 | ||
453 | return Changed; | |
454 | } | |
455 | ||
456 | /// setupBlockStack - Use the live bundles to set up our model of the stack | |
457 | /// to match predecessors' live out stack. | |
458 | void FPS::setupBlockStack() { | |
459 | DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() | |
460 | << " derived from " << MBB->getName() << ".\n"); | |
461 | StackTop = 0; | |
462 | // Get the live-in bundle for MBB. | |
463 | const LiveBundle &Bundle = | |
464 | LiveBundles[Bundles->getBundle(MBB->getNumber(), false)]; | |
465 | ||
466 | if (!Bundle.Mask) { | |
467 | DEBUG(dbgs() << "Block has no FP live-ins.\n"); | |
468 | return; | |
469 | } | |
470 | ||
471 | // Depth-first iteration should ensure that we always have an assigned stack. | |
472 | assert(Bundle.isFixed() && "Reached block before any predecessors"); | |
473 | ||
474 | // Push the fixed live-in registers. | |
475 | for (unsigned i = Bundle.FixCount; i > 0; --i) { | |
476 | MBB->addLiveIn(X86::ST0+i-1); | |
477 | DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" | |
478 | << unsigned(Bundle.FixStack[i-1]) << '\n'); | |
479 | pushReg(Bundle.FixStack[i-1]); | |
480 | } | |
481 | ||
482 | // Kill off unwanted live-ins. This can happen with a critical edge. | |
483 | // FIXME: We could keep these live registers around as zombies. They may need | |
484 | // to be revived at the end of a short block. It might save a few instrs. | |
485 | adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); | |
486 | DEBUG(MBB->dump()); | |
487 | } | |
488 | ||
489 | /// finishBlockStack - Revive live-outs that are implicitly defined out of | |
490 | /// MBB. Shuffle live registers to match the expected fixed stack of any | |
491 | /// predecessors, and ensure that all predecessors are expecting the same | |
492 | /// stack. | |
493 | void FPS::finishBlockStack() { | |
494 | // The RET handling below takes care of return blocks for us. | |
495 | if (MBB->succ_empty()) | |
496 | return; | |
497 | ||
498 | DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() | |
499 | << " derived from " << MBB->getName() << ".\n"); | |
500 | ||
501 | // Get MBB's live-out bundle. | |
502 | unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true); | |
503 | LiveBundle &Bundle = LiveBundles[BundleIdx]; | |
504 | ||
505 | // We may need to kill and define some registers to match successors. | |
506 | // FIXME: This can probably be combined with the shuffle below. | |
507 | MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); | |
508 | adjustLiveRegs(Bundle.Mask, Term); | |
509 | ||
510 | if (!Bundle.Mask) { | |
511 | DEBUG(dbgs() << "No live-outs.\n"); | |
512 | return; | |
513 | } | |
514 | ||
515 | // Has the stack order been fixed yet? | |
516 | DEBUG(dbgs() << "LB#" << BundleIdx << ": "); | |
517 | if (Bundle.isFixed()) { | |
518 | DEBUG(dbgs() << "Shuffling stack to match.\n"); | |
519 | shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term); | |
520 | } else { | |
521 | // Not fixed yet, we get to choose. | |
522 | DEBUG(dbgs() << "Fixing stack order now.\n"); | |
523 | Bundle.FixCount = StackTop; | |
524 | for (unsigned i = 0; i < StackTop; ++i) | |
525 | Bundle.FixStack[i] = getStackEntry(i); | |
526 | } | |
527 | } | |
528 | ||
529 | ||
530 | //===----------------------------------------------------------------------===// | |
531 | // Efficient Lookup Table Support | |
532 | //===----------------------------------------------------------------------===// | |
533 | ||
534 | namespace { | |
535 | struct TableEntry { | |
536 | uint16_t from; | |
537 | uint16_t to; | |
538 | bool operator<(const TableEntry &TE) const { return from < TE.from; } | |
539 | friend bool operator<(const TableEntry &TE, unsigned V) { | |
540 | return TE.from < V; | |
541 | } | |
542 | friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V, | |
543 | const TableEntry &TE) { | |
544 | return V < TE.from; | |
545 | } | |
546 | }; | |
547 | } | |
548 | ||
549 | #ifndef NDEBUG | |
550 | static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { | |
551 | for (unsigned i = 0; i != NumEntries-1; ++i) | |
552 | if (!(Table[i] < Table[i+1])) return false; | |
553 | return true; | |
554 | } | |
555 | #endif | |
556 | ||
557 | static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { | |
558 | const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); | |
559 | if (I != Table+N && I->from == Opcode) | |
560 | return I->to; | |
561 | return -1; | |
562 | } | |
563 | ||
564 | #ifdef NDEBUG | |
565 | #define ASSERT_SORTED(TABLE) | |
566 | #else | |
567 | #define ASSERT_SORTED(TABLE) \ | |
568 | { static bool TABLE##Checked = false; \ | |
569 | if (!TABLE##Checked) { \ | |
570 | assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \ | |
571 | "All lookup tables must be sorted for efficient access!"); \ | |
572 | TABLE##Checked = true; \ | |
573 | } \ | |
574 | } | |
575 | #endif | |
576 | ||
577 | //===----------------------------------------------------------------------===// | |
578 | // Register File -> Register Stack Mapping Methods | |
579 | //===----------------------------------------------------------------------===// | |
580 | ||
581 | // OpcodeTable - Sorted map of register instructions to their stack version. | |
582 | // The first element is an register file pseudo instruction, the second is the | |
583 | // concrete X86 instruction which uses the register stack. | |
584 | // | |
585 | static const TableEntry OpcodeTable[] = { | |
586 | { X86::ABS_Fp32 , X86::ABS_F }, | |
587 | { X86::ABS_Fp64 , X86::ABS_F }, | |
588 | { X86::ABS_Fp80 , X86::ABS_F }, | |
589 | { X86::ADD_Fp32m , X86::ADD_F32m }, | |
590 | { X86::ADD_Fp64m , X86::ADD_F64m }, | |
591 | { X86::ADD_Fp64m32 , X86::ADD_F32m }, | |
592 | { X86::ADD_Fp80m32 , X86::ADD_F32m }, | |
593 | { X86::ADD_Fp80m64 , X86::ADD_F64m }, | |
594 | { X86::ADD_FpI16m32 , X86::ADD_FI16m }, | |
595 | { X86::ADD_FpI16m64 , X86::ADD_FI16m }, | |
596 | { X86::ADD_FpI16m80 , X86::ADD_FI16m }, | |
597 | { X86::ADD_FpI32m32 , X86::ADD_FI32m }, | |
598 | { X86::ADD_FpI32m64 , X86::ADD_FI32m }, | |
599 | { X86::ADD_FpI32m80 , X86::ADD_FI32m }, | |
600 | { X86::CHS_Fp32 , X86::CHS_F }, | |
601 | { X86::CHS_Fp64 , X86::CHS_F }, | |
602 | { X86::CHS_Fp80 , X86::CHS_F }, | |
603 | { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, | |
604 | { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, | |
605 | { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, | |
606 | { X86::CMOVB_Fp32 , X86::CMOVB_F }, | |
607 | { X86::CMOVB_Fp64 , X86::CMOVB_F }, | |
608 | { X86::CMOVB_Fp80 , X86::CMOVB_F }, | |
609 | { X86::CMOVE_Fp32 , X86::CMOVE_F }, | |
610 | { X86::CMOVE_Fp64 , X86::CMOVE_F }, | |
611 | { X86::CMOVE_Fp80 , X86::CMOVE_F }, | |
612 | { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, | |
613 | { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, | |
614 | { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, | |
615 | { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, | |
616 | { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, | |
617 | { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, | |
618 | { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, | |
619 | { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, | |
620 | { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, | |
621 | { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, | |
622 | { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, | |
623 | { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, | |
624 | { X86::CMOVP_Fp32 , X86::CMOVP_F }, | |
625 | { X86::CMOVP_Fp64 , X86::CMOVP_F }, | |
626 | { X86::CMOVP_Fp80 , X86::CMOVP_F }, | |
627 | { X86::COS_Fp32 , X86::COS_F }, | |
628 | { X86::COS_Fp64 , X86::COS_F }, | |
629 | { X86::COS_Fp80 , X86::COS_F }, | |
630 | { X86::DIVR_Fp32m , X86::DIVR_F32m }, | |
631 | { X86::DIVR_Fp64m , X86::DIVR_F64m }, | |
632 | { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, | |
633 | { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, | |
634 | { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, | |
635 | { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, | |
636 | { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, | |
637 | { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, | |
638 | { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, | |
639 | { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, | |
640 | { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, | |
641 | { X86::DIV_Fp32m , X86::DIV_F32m }, | |
642 | { X86::DIV_Fp64m , X86::DIV_F64m }, | |
643 | { X86::DIV_Fp64m32 , X86::DIV_F32m }, | |
644 | { X86::DIV_Fp80m32 , X86::DIV_F32m }, | |
645 | { X86::DIV_Fp80m64 , X86::DIV_F64m }, | |
646 | { X86::DIV_FpI16m32 , X86::DIV_FI16m }, | |
647 | { X86::DIV_FpI16m64 , X86::DIV_FI16m }, | |
648 | { X86::DIV_FpI16m80 , X86::DIV_FI16m }, | |
649 | { X86::DIV_FpI32m32 , X86::DIV_FI32m }, | |
650 | { X86::DIV_FpI32m64 , X86::DIV_FI32m }, | |
651 | { X86::DIV_FpI32m80 , X86::DIV_FI32m }, | |
652 | { X86::ILD_Fp16m32 , X86::ILD_F16m }, | |
653 | { X86::ILD_Fp16m64 , X86::ILD_F16m }, | |
654 | { X86::ILD_Fp16m80 , X86::ILD_F16m }, | |
655 | { X86::ILD_Fp32m32 , X86::ILD_F32m }, | |
656 | { X86::ILD_Fp32m64 , X86::ILD_F32m }, | |
657 | { X86::ILD_Fp32m80 , X86::ILD_F32m }, | |
658 | { X86::ILD_Fp64m32 , X86::ILD_F64m }, | |
659 | { X86::ILD_Fp64m64 , X86::ILD_F64m }, | |
660 | { X86::ILD_Fp64m80 , X86::ILD_F64m }, | |
661 | { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, | |
662 | { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, | |
663 | { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, | |
664 | { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, | |
665 | { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, | |
666 | { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, | |
667 | { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, | |
668 | { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, | |
669 | { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, | |
670 | { X86::IST_Fp16m32 , X86::IST_F16m }, | |
671 | { X86::IST_Fp16m64 , X86::IST_F16m }, | |
672 | { X86::IST_Fp16m80 , X86::IST_F16m }, | |
673 | { X86::IST_Fp32m32 , X86::IST_F32m }, | |
674 | { X86::IST_Fp32m64 , X86::IST_F32m }, | |
675 | { X86::IST_Fp32m80 , X86::IST_F32m }, | |
676 | { X86::IST_Fp64m32 , X86::IST_FP64m }, | |
677 | { X86::IST_Fp64m64 , X86::IST_FP64m }, | |
678 | { X86::IST_Fp64m80 , X86::IST_FP64m }, | |
679 | { X86::LD_Fp032 , X86::LD_F0 }, | |
680 | { X86::LD_Fp064 , X86::LD_F0 }, | |
681 | { X86::LD_Fp080 , X86::LD_F0 }, | |
682 | { X86::LD_Fp132 , X86::LD_F1 }, | |
683 | { X86::LD_Fp164 , X86::LD_F1 }, | |
684 | { X86::LD_Fp180 , X86::LD_F1 }, | |
685 | { X86::LD_Fp32m , X86::LD_F32m }, | |
686 | { X86::LD_Fp32m64 , X86::LD_F32m }, | |
687 | { X86::LD_Fp32m80 , X86::LD_F32m }, | |
688 | { X86::LD_Fp64m , X86::LD_F64m }, | |
689 | { X86::LD_Fp64m80 , X86::LD_F64m }, | |
690 | { X86::LD_Fp80m , X86::LD_F80m }, | |
691 | { X86::MUL_Fp32m , X86::MUL_F32m }, | |
692 | { X86::MUL_Fp64m , X86::MUL_F64m }, | |
693 | { X86::MUL_Fp64m32 , X86::MUL_F32m }, | |
694 | { X86::MUL_Fp80m32 , X86::MUL_F32m }, | |
695 | { X86::MUL_Fp80m64 , X86::MUL_F64m }, | |
696 | { X86::MUL_FpI16m32 , X86::MUL_FI16m }, | |
697 | { X86::MUL_FpI16m64 , X86::MUL_FI16m }, | |
698 | { X86::MUL_FpI16m80 , X86::MUL_FI16m }, | |
699 | { X86::MUL_FpI32m32 , X86::MUL_FI32m }, | |
700 | { X86::MUL_FpI32m64 , X86::MUL_FI32m }, | |
701 | { X86::MUL_FpI32m80 , X86::MUL_FI32m }, | |
702 | { X86::SIN_Fp32 , X86::SIN_F }, | |
703 | { X86::SIN_Fp64 , X86::SIN_F }, | |
704 | { X86::SIN_Fp80 , X86::SIN_F }, | |
705 | { X86::SQRT_Fp32 , X86::SQRT_F }, | |
706 | { X86::SQRT_Fp64 , X86::SQRT_F }, | |
707 | { X86::SQRT_Fp80 , X86::SQRT_F }, | |
708 | { X86::ST_Fp32m , X86::ST_F32m }, | |
709 | { X86::ST_Fp64m , X86::ST_F64m }, | |
710 | { X86::ST_Fp64m32 , X86::ST_F32m }, | |
711 | { X86::ST_Fp80m32 , X86::ST_F32m }, | |
712 | { X86::ST_Fp80m64 , X86::ST_F64m }, | |
713 | { X86::ST_FpP80m , X86::ST_FP80m }, | |
714 | { X86::SUBR_Fp32m , X86::SUBR_F32m }, | |
715 | { X86::SUBR_Fp64m , X86::SUBR_F64m }, | |
716 | { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, | |
717 | { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, | |
718 | { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, | |
719 | { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, | |
720 | { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, | |
721 | { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, | |
722 | { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, | |
723 | { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, | |
724 | { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, | |
725 | { X86::SUB_Fp32m , X86::SUB_F32m }, | |
726 | { X86::SUB_Fp64m , X86::SUB_F64m }, | |
727 | { X86::SUB_Fp64m32 , X86::SUB_F32m }, | |
728 | { X86::SUB_Fp80m32 , X86::SUB_F32m }, | |
729 | { X86::SUB_Fp80m64 , X86::SUB_F64m }, | |
730 | { X86::SUB_FpI16m32 , X86::SUB_FI16m }, | |
731 | { X86::SUB_FpI16m64 , X86::SUB_FI16m }, | |
732 | { X86::SUB_FpI16m80 , X86::SUB_FI16m }, | |
733 | { X86::SUB_FpI32m32 , X86::SUB_FI32m }, | |
734 | { X86::SUB_FpI32m64 , X86::SUB_FI32m }, | |
735 | { X86::SUB_FpI32m80 , X86::SUB_FI32m }, | |
736 | { X86::TST_Fp32 , X86::TST_F }, | |
737 | { X86::TST_Fp64 , X86::TST_F }, | |
738 | { X86::TST_Fp80 , X86::TST_F }, | |
739 | { X86::UCOM_FpIr32 , X86::UCOM_FIr }, | |
740 | { X86::UCOM_FpIr64 , X86::UCOM_FIr }, | |
741 | { X86::UCOM_FpIr80 , X86::UCOM_FIr }, | |
742 | { X86::UCOM_Fpr32 , X86::UCOM_Fr }, | |
743 | { X86::UCOM_Fpr64 , X86::UCOM_Fr }, | |
744 | { X86::UCOM_Fpr80 , X86::UCOM_Fr }, | |
745 | }; | |
746 | ||
747 | static unsigned getConcreteOpcode(unsigned Opcode) { | |
748 | ASSERT_SORTED(OpcodeTable); | |
749 | int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode); | |
750 | assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); | |
751 | return Opc; | |
752 | } | |
753 | ||
754 | //===----------------------------------------------------------------------===// | |
755 | // Helper Methods | |
756 | //===----------------------------------------------------------------------===// | |
757 | ||
758 | // PopTable - Sorted map of instructions to their popping version. The first | |
759 | // element is an instruction, the second is the version which pops. | |
760 | // | |
761 | static const TableEntry PopTable[] = { | |
762 | { X86::ADD_FrST0 , X86::ADD_FPrST0 }, | |
763 | ||
764 | { X86::DIVR_FrST0, X86::DIVR_FPrST0 }, | |
765 | { X86::DIV_FrST0 , X86::DIV_FPrST0 }, | |
766 | ||
767 | { X86::IST_F16m , X86::IST_FP16m }, | |
768 | { X86::IST_F32m , X86::IST_FP32m }, | |
769 | ||
770 | { X86::MUL_FrST0 , X86::MUL_FPrST0 }, | |
771 | ||
772 | { X86::ST_F32m , X86::ST_FP32m }, | |
773 | { X86::ST_F64m , X86::ST_FP64m }, | |
774 | { X86::ST_Frr , X86::ST_FPrr }, | |
775 | ||
776 | { X86::SUBR_FrST0, X86::SUBR_FPrST0 }, | |
777 | { X86::SUB_FrST0 , X86::SUB_FPrST0 }, | |
778 | ||
779 | { X86::UCOM_FIr , X86::UCOM_FIPr }, | |
780 | ||
781 | { X86::UCOM_FPr , X86::UCOM_FPPr }, | |
782 | { X86::UCOM_Fr , X86::UCOM_FPr }, | |
783 | }; | |
784 | ||
785 | /// popStackAfter - Pop the current value off of the top of the FP stack after | |
786 | /// the specified instruction. This attempts to be sneaky and combine the pop | |
787 | /// into the instruction itself if possible. The iterator is left pointing to | |
788 | /// the last instruction, be it a new pop instruction inserted, or the old | |
789 | /// instruction if it was modified in place. | |
790 | /// | |
791 | void FPS::popStackAfter(MachineBasicBlock::iterator &I) { | |
792 | MachineInstr* MI = I; | |
793 | DebugLoc dl = MI->getDebugLoc(); | |
794 | ASSERT_SORTED(PopTable); | |
795 | if (StackTop == 0) | |
796 | report_fatal_error("Cannot pop empty stack!"); | |
797 | RegMap[Stack[--StackTop]] = ~0; // Update state | |
798 | ||
799 | // Check to see if there is a popping version of this instruction... | |
800 | int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode()); | |
801 | if (Opcode != -1) { | |
802 | I->setDesc(TII->get(Opcode)); | |
803 | if (Opcode == X86::UCOM_FPPr) | |
804 | I->RemoveOperand(0); | |
805 | } else { // Insert an explicit pop | |
806 | I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0); | |
807 | } | |
808 | } | |
809 | ||
810 | /// freeStackSlotAfter - Free the specified register from the register stack, so | |
811 | /// that it is no longer in a register. If the register is currently at the top | |
812 | /// of the stack, we just pop the current instruction, otherwise we store the | |
813 | /// current top-of-stack into the specified slot, then pop the top of stack. | |
814 | void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { | |
815 | if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. | |
816 | popStackAfter(I); | |
817 | return; | |
818 | } | |
819 | ||
820 | // Otherwise, store the top of stack into the dead slot, killing the operand | |
821 | // without having to add in an explicit xchg then pop. | |
822 | // | |
823 | I = freeStackSlotBefore(++I, FPRegNo); | |
824 | } | |
825 | ||
826 | /// freeStackSlotBefore - Free the specified register without trying any | |
827 | /// folding. | |
828 | MachineBasicBlock::iterator | |
829 | FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) { | |
830 | unsigned STReg = getSTReg(FPRegNo); | |
831 | unsigned OldSlot = getSlot(FPRegNo); | |
832 | unsigned TopReg = Stack[StackTop-1]; | |
833 | Stack[OldSlot] = TopReg; | |
834 | RegMap[TopReg] = OldSlot; | |
835 | RegMap[FPRegNo] = ~0; | |
836 | Stack[--StackTop] = ~0; | |
85aaf69f SL |
837 | return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)) |
838 | .addReg(STReg) | |
839 | .getInstr(); | |
223e47cc LB |
840 | } |
841 | ||
842 | /// adjustLiveRegs - Kill and revive registers such that exactly the FP | |
843 | /// registers with a bit in Mask are live. | |
844 | void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { | |
845 | unsigned Defs = Mask; | |
846 | unsigned Kills = 0; | |
847 | for (unsigned i = 0; i < StackTop; ++i) { | |
848 | unsigned RegNo = Stack[i]; | |
849 | if (!(Defs & (1 << RegNo))) | |
850 | // This register is live, but we don't want it. | |
851 | Kills |= (1 << RegNo); | |
852 | else | |
853 | // We don't need to imp-def this live register. | |
854 | Defs &= ~(1 << RegNo); | |
855 | } | |
856 | assert((Kills & Defs) == 0 && "Register needs killing and def'ing?"); | |
857 | ||
858 | // Produce implicit-defs for free by using killed registers. | |
859 | while (Kills && Defs) { | |
1a4d82fc JJ |
860 | unsigned KReg = countTrailingZeros(Kills); |
861 | unsigned DReg = countTrailingZeros(Defs); | |
223e47cc LB |
862 | DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n"); |
863 | std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); | |
864 | std::swap(RegMap[KReg], RegMap[DReg]); | |
865 | Kills &= ~(1 << KReg); | |
866 | Defs &= ~(1 << DReg); | |
867 | } | |
868 | ||
869 | // Kill registers by popping. | |
870 | if (Kills && I != MBB->begin()) { | |
1a4d82fc | 871 | MachineBasicBlock::iterator I2 = std::prev(I); |
223e47cc LB |
872 | while (StackTop) { |
873 | unsigned KReg = getStackEntry(0); | |
874 | if (!(Kills & (1 << KReg))) | |
875 | break; | |
876 | DEBUG(dbgs() << "Popping %FP" << KReg << "\n"); | |
877 | popStackAfter(I2); | |
878 | Kills &= ~(1 << KReg); | |
879 | } | |
880 | } | |
881 | ||
882 | // Manually kill the rest. | |
883 | while (Kills) { | |
1a4d82fc | 884 | unsigned KReg = countTrailingZeros(Kills); |
223e47cc LB |
885 | DEBUG(dbgs() << "Killing %FP" << KReg << "\n"); |
886 | freeStackSlotBefore(I, KReg); | |
887 | Kills &= ~(1 << KReg); | |
888 | } | |
889 | ||
890 | // Load zeros for all the imp-defs. | |
891 | while(Defs) { | |
1a4d82fc | 892 | unsigned DReg = countTrailingZeros(Defs); |
223e47cc LB |
893 | DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n"); |
894 | BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); | |
895 | pushReg(DReg); | |
896 | Defs &= ~(1 << DReg); | |
897 | } | |
898 | ||
899 | // Now we should have the correct registers live. | |
900 | DEBUG(dumpStack()); | |
901 | assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch"); | |
902 | } | |
903 | ||
904 | /// shuffleStackTop - emit fxch instructions before I to shuffle the top | |
905 | /// FixCount entries into the order given by FixStack. | |
906 | /// FIXME: Is there a better algorithm than insertion sort? | |
907 | void FPS::shuffleStackTop(const unsigned char *FixStack, | |
908 | unsigned FixCount, | |
909 | MachineBasicBlock::iterator I) { | |
910 | // Move items into place, starting from the desired stack bottom. | |
911 | while (FixCount--) { | |
912 | // Old register at position FixCount. | |
913 | unsigned OldReg = getStackEntry(FixCount); | |
914 | // Desired register at position FixCount. | |
915 | unsigned Reg = FixStack[FixCount]; | |
916 | if (Reg == OldReg) | |
917 | continue; | |
918 | // (Reg st0) (OldReg st0) = (Reg OldReg st0) | |
919 | moveToTop(Reg, I); | |
920 | if (FixCount > 0) | |
921 | moveToTop(OldReg, I); | |
922 | } | |
923 | DEBUG(dumpStack()); | |
924 | } | |
925 | ||
926 | ||
927 | //===----------------------------------------------------------------------===// | |
928 | // Instruction transformation implementation | |
929 | //===----------------------------------------------------------------------===// | |
930 | ||
1a4d82fc JJ |
931 | void FPS::handleCall(MachineBasicBlock::iterator &I) { |
932 | unsigned STReturns = 0; | |
933 | ||
934 | for (const auto &MO : I->operands()) { | |
935 | if (!MO.isReg()) | |
936 | continue; | |
937 | ||
938 | unsigned R = MO.getReg() - X86::FP0; | |
939 | ||
940 | if (R < 8) { | |
941 | assert(MO.isDef() && MO.isImplicit()); | |
942 | STReturns |= 1 << R; | |
943 | } | |
944 | } | |
945 | ||
946 | unsigned N = CountTrailingOnes_32(STReturns); | |
947 | ||
948 | // FP registers used for function return must be consecutive starting at | |
949 | // FP0. | |
950 | assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2)); | |
951 | ||
952 | for (unsigned I = 0; I < N; ++I) | |
953 | pushReg(N - I - 1); | |
954 | } | |
955 | ||
223e47cc LB |
956 | /// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem> |
957 | /// | |
958 | void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { | |
959 | MachineInstr *MI = I; | |
960 | unsigned DestReg = getFPReg(MI->getOperand(0)); | |
961 | ||
962 | // Change from the pseudo instruction to the concrete instruction. | |
963 | MI->RemoveOperand(0); // Remove the explicit ST(0) operand | |
964 | MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); | |
965 | ||
966 | // Result gets pushed on the stack. | |
967 | pushReg(DestReg); | |
968 | } | |
969 | ||
970 | /// handleOneArgFP - fst <mem>, ST(0) | |
971 | /// | |
972 | void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { | |
973 | MachineInstr *MI = I; | |
974 | unsigned NumOps = MI->getDesc().getNumOperands(); | |
975 | assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && | |
976 | "Can only handle fst* & ftst instructions!"); | |
977 | ||
978 | // Is this the last use of the source register? | |
979 | unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); | |
980 | bool KillsSrc = MI->killsRegister(X86::FP0+Reg); | |
981 | ||
223e47cc LB |
982 | // FISTP64m is strange because there isn't a non-popping versions. |
983 | // If we have one _and_ we don't want to pop the operand, duplicate the value | |
984 | // on the stack instead of moving it. This ensure that popping the value is | |
985 | // always ok. | |
986 | // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m. | |
987 | // | |
988 | if (!KillsSrc && | |
989 | (MI->getOpcode() == X86::IST_Fp64m32 || | |
990 | MI->getOpcode() == X86::ISTT_Fp16m32 || | |
991 | MI->getOpcode() == X86::ISTT_Fp32m32 || | |
992 | MI->getOpcode() == X86::ISTT_Fp64m32 || | |
993 | MI->getOpcode() == X86::IST_Fp64m64 || | |
994 | MI->getOpcode() == X86::ISTT_Fp16m64 || | |
995 | MI->getOpcode() == X86::ISTT_Fp32m64 || | |
996 | MI->getOpcode() == X86::ISTT_Fp64m64 || | |
997 | MI->getOpcode() == X86::IST_Fp64m80 || | |
998 | MI->getOpcode() == X86::ISTT_Fp16m80 || | |
999 | MI->getOpcode() == X86::ISTT_Fp32m80 || | |
1000 | MI->getOpcode() == X86::ISTT_Fp64m80 || | |
1001 | MI->getOpcode() == X86::ST_FpP80m)) { | |
1a4d82fc | 1002 | duplicateToTop(Reg, ScratchFPReg, I); |
223e47cc LB |
1003 | } else { |
1004 | moveToTop(Reg, I); // Move to the top of the stack... | |
1005 | } | |
1006 | ||
1007 | // Convert from the pseudo instruction to the concrete instruction. | |
1008 | MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand | |
1009 | MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); | |
1010 | ||
1011 | if (MI->getOpcode() == X86::IST_FP64m || | |
1012 | MI->getOpcode() == X86::ISTT_FP16m || | |
1013 | MI->getOpcode() == X86::ISTT_FP32m || | |
1014 | MI->getOpcode() == X86::ISTT_FP64m || | |
1015 | MI->getOpcode() == X86::ST_FP80m) { | |
1016 | if (StackTop == 0) | |
1017 | report_fatal_error("Stack empty??"); | |
1018 | --StackTop; | |
1019 | } else if (KillsSrc) { // Last use of operand? | |
1020 | popStackAfter(I); | |
1021 | } | |
1022 | } | |
1023 | ||
1024 | ||
1025 | /// handleOneArgFPRW: Handle instructions that read from the top of stack and | |
1026 | /// replace the value with a newly computed value. These instructions may have | |
1027 | /// non-fp operands after their FP operands. | |
1028 | /// | |
1029 | /// Examples: | |
1030 | /// R1 = fchs R2 | |
1031 | /// R1 = fadd R2, [mem] | |
1032 | /// | |
1033 | void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { | |
1034 | MachineInstr *MI = I; | |
1035 | #ifndef NDEBUG | |
1036 | unsigned NumOps = MI->getDesc().getNumOperands(); | |
1037 | assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); | |
1038 | #endif | |
1039 | ||
1040 | // Is this the last use of the source register? | |
1041 | unsigned Reg = getFPReg(MI->getOperand(1)); | |
1042 | bool KillsSrc = MI->killsRegister(X86::FP0+Reg); | |
1043 | ||
1044 | if (KillsSrc) { | |
223e47cc LB |
1045 | // If this is the last use of the source register, just make sure it's on |
1046 | // the top of the stack. | |
1047 | moveToTop(Reg, I); | |
1048 | if (StackTop == 0) | |
1049 | report_fatal_error("Stack cannot be empty!"); | |
1050 | --StackTop; | |
1051 | pushReg(getFPReg(MI->getOperand(0))); | |
1052 | } else { | |
1053 | // If this is not the last use of the source register, _copy_ it to the top | |
1054 | // of the stack. | |
1055 | duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I); | |
1056 | } | |
1057 | ||
1058 | // Change from the pseudo instruction to the concrete instruction. | |
1059 | MI->RemoveOperand(1); // Drop the source operand. | |
1060 | MI->RemoveOperand(0); // Drop the destination operand. | |
1061 | MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); | |
1062 | } | |
1063 | ||
1064 | ||
1065 | //===----------------------------------------------------------------------===// | |
1066 | // Define tables of various ways to map pseudo instructions | |
1067 | // | |
1068 | ||
1069 | // ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) | |
1070 | static const TableEntry ForwardST0Table[] = { | |
1071 | { X86::ADD_Fp32 , X86::ADD_FST0r }, | |
1072 | { X86::ADD_Fp64 , X86::ADD_FST0r }, | |
1073 | { X86::ADD_Fp80 , X86::ADD_FST0r }, | |
1074 | { X86::DIV_Fp32 , X86::DIV_FST0r }, | |
1075 | { X86::DIV_Fp64 , X86::DIV_FST0r }, | |
1076 | { X86::DIV_Fp80 , X86::DIV_FST0r }, | |
1077 | { X86::MUL_Fp32 , X86::MUL_FST0r }, | |
1078 | { X86::MUL_Fp64 , X86::MUL_FST0r }, | |
1079 | { X86::MUL_Fp80 , X86::MUL_FST0r }, | |
1080 | { X86::SUB_Fp32 , X86::SUB_FST0r }, | |
1081 | { X86::SUB_Fp64 , X86::SUB_FST0r }, | |
1082 | { X86::SUB_Fp80 , X86::SUB_FST0r }, | |
1083 | }; | |
1084 | ||
1085 | // ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) | |
1086 | static const TableEntry ReverseST0Table[] = { | |
1087 | { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative | |
1088 | { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative | |
1089 | { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative | |
1090 | { X86::DIV_Fp32 , X86::DIVR_FST0r }, | |
1091 | { X86::DIV_Fp64 , X86::DIVR_FST0r }, | |
1092 | { X86::DIV_Fp80 , X86::DIVR_FST0r }, | |
1093 | { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative | |
1094 | { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative | |
1095 | { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative | |
1096 | { X86::SUB_Fp32 , X86::SUBR_FST0r }, | |
1097 | { X86::SUB_Fp64 , X86::SUBR_FST0r }, | |
1098 | { X86::SUB_Fp80 , X86::SUBR_FST0r }, | |
1099 | }; | |
1100 | ||
1101 | // ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) | |
1102 | static const TableEntry ForwardSTiTable[] = { | |
1103 | { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative | |
1104 | { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative | |
1105 | { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative | |
1106 | { X86::DIV_Fp32 , X86::DIVR_FrST0 }, | |
1107 | { X86::DIV_Fp64 , X86::DIVR_FrST0 }, | |
1108 | { X86::DIV_Fp80 , X86::DIVR_FrST0 }, | |
1109 | { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative | |
1110 | { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative | |
1111 | { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative | |
1112 | { X86::SUB_Fp32 , X86::SUBR_FrST0 }, | |
1113 | { X86::SUB_Fp64 , X86::SUBR_FrST0 }, | |
1114 | { X86::SUB_Fp80 , X86::SUBR_FrST0 }, | |
1115 | }; | |
1116 | ||
1117 | // ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) | |
1118 | static const TableEntry ReverseSTiTable[] = { | |
1119 | { X86::ADD_Fp32 , X86::ADD_FrST0 }, | |
1120 | { X86::ADD_Fp64 , X86::ADD_FrST0 }, | |
1121 | { X86::ADD_Fp80 , X86::ADD_FrST0 }, | |
1122 | { X86::DIV_Fp32 , X86::DIV_FrST0 }, | |
1123 | { X86::DIV_Fp64 , X86::DIV_FrST0 }, | |
1124 | { X86::DIV_Fp80 , X86::DIV_FrST0 }, | |
1125 | { X86::MUL_Fp32 , X86::MUL_FrST0 }, | |
1126 | { X86::MUL_Fp64 , X86::MUL_FrST0 }, | |
1127 | { X86::MUL_Fp80 , X86::MUL_FrST0 }, | |
1128 | { X86::SUB_Fp32 , X86::SUB_FrST0 }, | |
1129 | { X86::SUB_Fp64 , X86::SUB_FrST0 }, | |
1130 | { X86::SUB_Fp80 , X86::SUB_FrST0 }, | |
1131 | }; | |
1132 | ||
1133 | ||
1134 | /// handleTwoArgFP - Handle instructions like FADD and friends which are virtual | |
1135 | /// instructions which need to be simplified and possibly transformed. | |
1136 | /// | |
1137 | /// Result: ST(0) = fsub ST(0), ST(i) | |
1138 | /// ST(i) = fsub ST(0), ST(i) | |
1139 | /// ST(0) = fsubr ST(0), ST(i) | |
1140 | /// ST(i) = fsubr ST(0), ST(i) | |
1141 | /// | |
1142 | void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { | |
1143 | ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); | |
1144 | ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); | |
1145 | MachineInstr *MI = I; | |
1146 | ||
1147 | unsigned NumOperands = MI->getDesc().getNumOperands(); | |
1148 | assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); | |
1149 | unsigned Dest = getFPReg(MI->getOperand(0)); | |
1150 | unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); | |
1151 | unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); | |
1152 | bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); | |
1153 | bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); | |
1154 | DebugLoc dl = MI->getDebugLoc(); | |
1155 | ||
1156 | unsigned TOS = getStackEntry(0); | |
1157 | ||
1158 | // One of our operands must be on the top of the stack. If neither is yet, we | |
1159 | // need to move one. | |
1160 | if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? | |
1161 | // We can choose to move either operand to the top of the stack. If one of | |
1162 | // the operands is killed by this instruction, we want that one so that we | |
1163 | // can update right on top of the old version. | |
1164 | if (KillsOp0) { | |
1165 | moveToTop(Op0, I); // Move dead operand to TOS. | |
1166 | TOS = Op0; | |
1167 | } else if (KillsOp1) { | |
1168 | moveToTop(Op1, I); | |
1169 | TOS = Op1; | |
1170 | } else { | |
1171 | // All of the operands are live after this instruction executes, so we | |
1172 | // cannot update on top of any operand. Because of this, we must | |
1173 | // duplicate one of the stack elements to the top. It doesn't matter | |
1174 | // which one we pick. | |
1175 | // | |
1176 | duplicateToTop(Op0, Dest, I); | |
1177 | Op0 = TOS = Dest; | |
1178 | KillsOp0 = true; | |
1179 | } | |
1180 | } else if (!KillsOp0 && !KillsOp1) { | |
1181 | // If we DO have one of our operands at the top of the stack, but we don't | |
1182 | // have a dead operand, we must duplicate one of the operands to a new slot | |
1183 | // on the stack. | |
1184 | duplicateToTop(Op0, Dest, I); | |
1185 | Op0 = TOS = Dest; | |
1186 | KillsOp0 = true; | |
1187 | } | |
1188 | ||
1189 | // Now we know that one of our operands is on the top of the stack, and at | |
1190 | // least one of our operands is killed by this instruction. | |
1191 | assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && | |
1192 | "Stack conditions not set up right!"); | |
1193 | ||
1194 | // We decide which form to use based on what is on the top of the stack, and | |
1195 | // which operand is killed by this instruction. | |
1196 | const TableEntry *InstTable; | |
1197 | bool isForward = TOS == Op0; | |
1198 | bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); | |
1199 | if (updateST0) { | |
1200 | if (isForward) | |
1201 | InstTable = ForwardST0Table; | |
1202 | else | |
1203 | InstTable = ReverseST0Table; | |
1204 | } else { | |
1205 | if (isForward) | |
1206 | InstTable = ForwardSTiTable; | |
1207 | else | |
1208 | InstTable = ReverseSTiTable; | |
1209 | } | |
1210 | ||
1211 | int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table), | |
1212 | MI->getOpcode()); | |
1213 | assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); | |
1214 | ||
1215 | // NotTOS - The register which is not on the top of stack... | |
1216 | unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; | |
1217 | ||
1218 | // Replace the old instruction with a new instruction | |
1219 | MBB->remove(I++); | |
1220 | I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); | |
1221 | ||
1222 | // If both operands are killed, pop one off of the stack in addition to | |
1223 | // overwriting the other one. | |
1224 | if (KillsOp0 && KillsOp1 && Op0 != Op1) { | |
1225 | assert(!updateST0 && "Should have updated other operand!"); | |
1226 | popStackAfter(I); // Pop the top of stack | |
1227 | } | |
1228 | ||
1229 | // Update stack information so that we know the destination register is now on | |
1230 | // the stack. | |
1231 | unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); | |
1232 | assert(UpdatedSlot < StackTop && Dest < 7); | |
1233 | Stack[UpdatedSlot] = Dest; | |
1234 | RegMap[Dest] = UpdatedSlot; | |
1235 | MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction | |
1236 | } | |
1237 | ||
1238 | /// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP | |
1239 | /// register arguments and no explicit destinations. | |
1240 | /// | |
1241 | void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { | |
1242 | ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); | |
1243 | ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); | |
1244 | MachineInstr *MI = I; | |
1245 | ||
1246 | unsigned NumOperands = MI->getDesc().getNumOperands(); | |
1247 | assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); | |
1248 | unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); | |
1249 | unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); | |
1250 | bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); | |
1251 | bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); | |
1252 | ||
1253 | // Make sure the first operand is on the top of stack, the other one can be | |
1254 | // anywhere. | |
1255 | moveToTop(Op0, I); | |
1256 | ||
1257 | // Change from the pseudo instruction to the concrete instruction. | |
1258 | MI->getOperand(0).setReg(getSTReg(Op1)); | |
1259 | MI->RemoveOperand(1); | |
1260 | MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); | |
1261 | ||
1262 | // If any of the operands are killed by this instruction, free them. | |
1263 | if (KillsOp0) freeStackSlotAfter(I, Op0); | |
1264 | if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); | |
1265 | } | |
1266 | ||
1267 | /// handleCondMovFP - Handle two address conditional move instructions. These | |
1268 | /// instructions move a st(i) register to st(0) iff a condition is true. These | |
1269 | /// instructions require that the first operand is at the top of the stack, but | |
1270 | /// otherwise don't modify the stack at all. | |
1271 | void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { | |
1272 | MachineInstr *MI = I; | |
1273 | ||
1274 | unsigned Op0 = getFPReg(MI->getOperand(0)); | |
1275 | unsigned Op1 = getFPReg(MI->getOperand(2)); | |
1276 | bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); | |
1277 | ||
1278 | // The first operand *must* be on the top of the stack. | |
1279 | moveToTop(Op0, I); | |
1280 | ||
1281 | // Change the second operand to the stack register that the operand is in. | |
1282 | // Change from the pseudo instruction to the concrete instruction. | |
1283 | MI->RemoveOperand(0); | |
1284 | MI->RemoveOperand(1); | |
1285 | MI->getOperand(0).setReg(getSTReg(Op1)); | |
1286 | MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); | |
1287 | ||
1288 | // If we kill the second operand, make sure to pop it from the stack. | |
1289 | if (Op0 != Op1 && KillsOp1) { | |
1290 | // Get this value off of the register stack. | |
1291 | freeStackSlotAfter(I, Op1); | |
1292 | } | |
1293 | } | |
1294 | ||
1295 | ||
1296 | /// handleSpecialFP - Handle special instructions which behave unlike other | |
1297 | /// floating point instructions. This is primarily intended for use by pseudo | |
1298 | /// instructions. | |
1299 | /// | |
1a4d82fc JJ |
1300 | void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { |
1301 | MachineInstr *MI = Inst; | |
1302 | ||
1303 | if (MI->isCall()) { | |
1304 | handleCall(Inst); | |
1305 | return; | |
1306 | } | |
1307 | ||
223e47cc LB |
1308 | switch (MI->getOpcode()) { |
1309 | default: llvm_unreachable("Unknown SpecialFP instruction!"); | |
1310 | case TargetOpcode::COPY: { | |
1311 | // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP. | |
1312 | const MachineOperand &MO1 = MI->getOperand(1); | |
1313 | const MachineOperand &MO0 = MI->getOperand(0); | |
223e47cc LB |
1314 | bool KillsSrc = MI->killsRegister(MO1.getReg()); |
1315 | ||
223e47cc LB |
1316 | // FP <- FP copy. |
1317 | unsigned DstFP = getFPReg(MO0); | |
1318 | unsigned SrcFP = getFPReg(MO1); | |
1319 | assert(isLive(SrcFP) && "Cannot copy dead register"); | |
1320 | if (KillsSrc) { | |
1321 | // If the input operand is killed, we can just change the owner of the | |
1322 | // incoming stack slot into the result. | |
1323 | unsigned Slot = getSlot(SrcFP); | |
1324 | Stack[Slot] = DstFP; | |
1325 | RegMap[DstFP] = Slot; | |
1326 | } else { | |
1327 | // For COPY we just duplicate the specified value to a new stack slot. | |
1328 | // This could be made better, but would require substantial changes. | |
1a4d82fc | 1329 | duplicateToTop(SrcFP, DstFP, Inst); |
223e47cc LB |
1330 | } |
1331 | break; | |
1332 | } | |
1333 | ||
1334 | case TargetOpcode::IMPLICIT_DEF: { | |
1335 | // All FP registers must be explicitly defined, so load a 0 instead. | |
1336 | unsigned Reg = MI->getOperand(0).getReg() - X86::FP0; | |
1337 | DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n'); | |
1a4d82fc | 1338 | BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::LD_F0)); |
223e47cc LB |
1339 | pushReg(Reg); |
1340 | break; | |
1341 | } | |
1342 | ||
223e47cc LB |
1343 | case TargetOpcode::INLINEASM: { |
1344 | // The inline asm MachineInstr currently only *uses* FP registers for the | |
1345 | // 'f' constraint. These should be turned into the current ST(x) register | |
1346 | // in the machine instr. | |
1347 | // | |
1348 | // There are special rules for x87 inline assembly. The compiler must know | |
1349 | // exactly how many registers are popped and pushed implicitly by the asm. | |
1350 | // Otherwise it is not possible to restore the stack state after the inline | |
1351 | // asm. | |
1352 | // | |
1353 | // There are 3 kinds of input operands: | |
1354 | // | |
1355 | // 1. Popped inputs. These must appear at the stack top in ST0-STn. A | |
1356 | // popped input operand must be in a fixed stack slot, and it is either | |
1357 | // tied to an output operand, or in the clobber list. The MI has ST use | |
1358 | // and def operands for these inputs. | |
1359 | // | |
1360 | // 2. Fixed inputs. These inputs appear in fixed stack slots, but are | |
1361 | // preserved by the inline asm. The fixed stack slots must be STn-STm | |
1362 | // following the popped inputs. A fixed input operand cannot be tied to | |
1363 | // an output or appear in the clobber list. The MI has ST use operands | |
1364 | // and no defs for these inputs. | |
1365 | // | |
1366 | // 3. Preserved inputs. These inputs use the "f" constraint which is | |
1367 | // represented as an FP register. The inline asm won't change these | |
1368 | // stack slots. | |
1369 | // | |
1370 | // Outputs must be in ST registers, FP outputs are not allowed. Clobbered | |
1371 | // registers do not count as output operands. The inline asm changes the | |
1372 | // stack as if it popped all the popped inputs and then pushed all the | |
1373 | // output operands. | |
1374 | ||
1375 | // Scan the assembly for ST registers used, defined and clobbered. We can | |
1376 | // only tell clobbers from defs by looking at the asm descriptor. | |
1377 | unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0; | |
1378 | unsigned NumOps = 0; | |
1a4d82fc JJ |
1379 | SmallSet<unsigned, 1> FRegIdx; |
1380 | unsigned RCID; | |
1381 | ||
223e47cc LB |
1382 | for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands(); |
1383 | i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) { | |
1384 | unsigned Flags = MI->getOperand(i).getImm(); | |
1a4d82fc | 1385 | |
223e47cc LB |
1386 | NumOps = InlineAsm::getNumOperandRegisters(Flags); |
1387 | if (NumOps != 1) | |
1388 | continue; | |
1389 | const MachineOperand &MO = MI->getOperand(i + 1); | |
1390 | if (!MO.isReg()) | |
1391 | continue; | |
1a4d82fc | 1392 | unsigned STReg = MO.getReg() - X86::FP0; |
223e47cc LB |
1393 | if (STReg >= 8) |
1394 | continue; | |
1395 | ||
1a4d82fc JJ |
1396 | // If the flag has a register class constraint, this must be an operand |
1397 | // with constraint "f". Record its index and continue. | |
1398 | if (InlineAsm::hasRegClassConstraint(Flags, RCID)) { | |
1399 | FRegIdx.insert(i + 1); | |
1400 | continue; | |
1401 | } | |
1402 | ||
223e47cc LB |
1403 | switch (InlineAsm::getKind(Flags)) { |
1404 | case InlineAsm::Kind_RegUse: | |
1405 | STUses |= (1u << STReg); | |
1406 | break; | |
1407 | case InlineAsm::Kind_RegDef: | |
1408 | case InlineAsm::Kind_RegDefEarlyClobber: | |
1409 | STDefs |= (1u << STReg); | |
1410 | if (MO.isDead()) | |
1411 | STDeadDefs |= (1u << STReg); | |
1412 | break; | |
1413 | case InlineAsm::Kind_Clobber: | |
1414 | STClobbers |= (1u << STReg); | |
1415 | break; | |
1416 | default: | |
1417 | break; | |
1418 | } | |
1419 | } | |
1420 | ||
1421 | if (STUses && !isMask_32(STUses)) | |
1422 | MI->emitError("fixed input regs must be last on the x87 stack"); | |
1423 | unsigned NumSTUses = CountTrailingOnes_32(STUses); | |
1424 | ||
1425 | // Defs must be contiguous from the stack top. ST0-STn. | |
1426 | if (STDefs && !isMask_32(STDefs)) { | |
1427 | MI->emitError("output regs must be last on the x87 stack"); | |
1428 | STDefs = NextPowerOf2(STDefs) - 1; | |
1429 | } | |
1430 | unsigned NumSTDefs = CountTrailingOnes_32(STDefs); | |
1431 | ||
1432 | // So must the clobbered stack slots. ST0-STm, m >= n. | |
1433 | if (STClobbers && !isMask_32(STDefs | STClobbers)) | |
1434 | MI->emitError("clobbers must be last on the x87 stack"); | |
1435 | ||
1436 | // Popped inputs are the ones that are also clobbered or defined. | |
1437 | unsigned STPopped = STUses & (STDefs | STClobbers); | |
1438 | if (STPopped && !isMask_32(STPopped)) | |
1439 | MI->emitError("implicitly popped regs must be last on the x87 stack"); | |
1440 | unsigned NumSTPopped = CountTrailingOnes_32(STPopped); | |
1441 | ||
1442 | DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops " | |
1443 | << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n"); | |
1444 | ||
1a4d82fc JJ |
1445 | #ifndef NDEBUG |
1446 | // If any input operand uses constraint "f", all output register | |
1447 | // constraints must be early-clobber defs. | |
1448 | for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I) | |
1449 | if (FRegIdx.count(I)) { | |
1450 | assert((1 << getFPReg(MI->getOperand(I)) & STDefs) == 0 && | |
1451 | "Operands with constraint \"f\" cannot overlap with defs"); | |
1452 | } | |
1453 | #endif | |
1454 | ||
1455 | // Collect all FP registers (register operands with constraints "t", "u", | |
1456 | // and "f") to kill afer the instruction. | |
223e47cc | 1457 | unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff; |
223e47cc LB |
1458 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { |
1459 | MachineOperand &Op = MI->getOperand(i); | |
1460 | if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) | |
1461 | continue; | |
223e47cc | 1462 | unsigned FPReg = getFPReg(Op); |
223e47cc LB |
1463 | |
1464 | // If we kill this operand, make sure to pop it from the stack after the | |
1465 | // asm. We just remember it for now, and pop them all off at the end in | |
1466 | // a batch. | |
1a4d82fc | 1467 | if (Op.isUse() && Op.isKill()) |
223e47cc LB |
1468 | FPKills |= 1U << FPReg; |
1469 | } | |
1470 | ||
1a4d82fc JJ |
1471 | // Do not include registers that are implicitly popped by defs/clobbers. |
1472 | FPKills &= ~(STDefs | STClobbers); | |
223e47cc LB |
1473 | |
1474 | // Now we can rearrange the live registers to match what was requested. | |
1a4d82fc JJ |
1475 | unsigned char STUsesArray[8]; |
1476 | ||
1477 | for (unsigned I = 0; I < NumSTUses; ++I) | |
1478 | STUsesArray[I] = I; | |
1479 | ||
1480 | shuffleStackTop(STUsesArray, NumSTUses, Inst); | |
223e47cc LB |
1481 | DEBUG({dbgs() << "Before asm: "; dumpStack();}); |
1482 | ||
1483 | // With the stack layout fixed, rewrite the FP registers. | |
1484 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |
1485 | MachineOperand &Op = MI->getOperand(i); | |
1486 | if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) | |
1487 | continue; | |
1a4d82fc | 1488 | |
223e47cc | 1489 | unsigned FPReg = getFPReg(Op); |
1a4d82fc JJ |
1490 | |
1491 | if (FRegIdx.count(i)) | |
1492 | // Operand with constraint "f". | |
1493 | Op.setReg(getSTReg(FPReg)); | |
1494 | else | |
1495 | // Operand with a single register class constraint ("t" or "u"). | |
1496 | Op.setReg(X86::ST0 + FPReg); | |
223e47cc LB |
1497 | } |
1498 | ||
1499 | // Simulate the inline asm popping its inputs and pushing its outputs. | |
1500 | StackTop -= NumSTPopped; | |
1501 | ||
223e47cc | 1502 | for (unsigned i = 0; i < NumSTDefs; ++i) |
1a4d82fc | 1503 | pushReg(NumSTDefs - i - 1); |
223e47cc LB |
1504 | |
1505 | // If this asm kills any FP registers (is the last use of them) we must | |
1506 | // explicitly emit pop instructions for them. Do this now after the asm has | |
1507 | // executed so that the ST(x) numbers are not off (which would happen if we | |
1508 | // did this inline with operand rewriting). | |
1509 | // | |
1510 | // Note: this might be a non-optimal pop sequence. We might be able to do | |
1511 | // better by trying to pop in stack order or something. | |
1512 | while (FPKills) { | |
1a4d82fc | 1513 | unsigned FPReg = countTrailingZeros(FPKills); |
223e47cc | 1514 | if (isLive(FPReg)) |
1a4d82fc | 1515 | freeStackSlotAfter(Inst, FPReg); |
223e47cc LB |
1516 | FPKills &= ~(1U << FPReg); |
1517 | } | |
1a4d82fc | 1518 | |
223e47cc LB |
1519 | // Don't delete the inline asm! |
1520 | return; | |
1521 | } | |
1522 | ||
1523 | case X86::WIN_FTOL_32: | |
1524 | case X86::WIN_FTOL_64: { | |
1525 | // Push the operand into ST0. | |
1526 | MachineOperand &Op = MI->getOperand(0); | |
1527 | assert(Op.isUse() && Op.isReg() && | |
1528 | Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6); | |
1529 | unsigned FPReg = getFPReg(Op); | |
1530 | if (Op.isKill()) | |
1a4d82fc | 1531 | moveToTop(FPReg, Inst); |
223e47cc | 1532 | else |
1a4d82fc | 1533 | duplicateToTop(FPReg, FPReg, Inst); |
223e47cc LB |
1534 | |
1535 | // Emit the call. This will pop the operand. | |
1a4d82fc | 1536 | BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32)) |
223e47cc LB |
1537 | .addExternalSymbol("_ftol2") |
1538 | .addReg(X86::ST0, RegState::ImplicitKill) | |
1a4d82fc | 1539 | .addReg(X86::ECX, RegState::ImplicitDefine) |
223e47cc LB |
1540 | .addReg(X86::EAX, RegState::Define | RegState::Implicit) |
1541 | .addReg(X86::EDX, RegState::Define | RegState::Implicit) | |
1542 | .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); | |
1543 | --StackTop; | |
1544 | ||
1545 | break; | |
1546 | } | |
1547 | ||
1a4d82fc JJ |
1548 | case X86::RETQ: |
1549 | case X86::RETL: | |
1550 | case X86::RETIL: | |
1551 | case X86::RETIQ: | |
223e47cc LB |
1552 | // If RET has an FP register use operand, pass the first one in ST(0) and |
1553 | // the second one in ST(1). | |
1554 | ||
1555 | // Find the register operands. | |
1556 | unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; | |
1557 | unsigned LiveMask = 0; | |
1558 | ||
1559 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |
1560 | MachineOperand &Op = MI->getOperand(i); | |
1561 | if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) | |
1562 | continue; | |
1563 | // FP Register uses must be kills unless there are two uses of the same | |
1564 | // register, in which case only one will be a kill. | |
1565 | assert(Op.isUse() && | |
1566 | (Op.isKill() || // Marked kill. | |
1567 | getFPReg(Op) == FirstFPRegOp || // Second instance. | |
1568 | MI->killsRegister(Op.getReg())) && // Later use is marked kill. | |
1569 | "Ret only defs operands, and values aren't live beyond it"); | |
1570 | ||
1571 | if (FirstFPRegOp == ~0U) | |
1572 | FirstFPRegOp = getFPReg(Op); | |
1573 | else { | |
1574 | assert(SecondFPRegOp == ~0U && "More than two fp operands!"); | |
1575 | SecondFPRegOp = getFPReg(Op); | |
1576 | } | |
1577 | LiveMask |= (1 << getFPReg(Op)); | |
1578 | ||
1579 | // Remove the operand so that later passes don't see it. | |
1580 | MI->RemoveOperand(i); | |
1581 | --i, --e; | |
1582 | } | |
1583 | ||
1584 | // We may have been carrying spurious live-ins, so make sure only the returned | |
1585 | // registers are left live. | |
1586 | adjustLiveRegs(LiveMask, MI); | |
1587 | if (!LiveMask) return; // Quick check to see if any are possible. | |
1588 | ||
1589 | // There are only four possibilities here: | |
1590 | // 1) we are returning a single FP value. In this case, it has to be in | |
1591 | // ST(0) already, so just declare success by removing the value from the | |
1592 | // FP Stack. | |
1593 | if (SecondFPRegOp == ~0U) { | |
1594 | // Assert that the top of stack contains the right FP register. | |
1595 | assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && | |
1596 | "Top of stack not the right register for RET!"); | |
1597 | ||
1598 | // Ok, everything is good, mark the value as not being on the stack | |
1599 | // anymore so that our assertion about the stack being empty at end of | |
1600 | // block doesn't fire. | |
1601 | StackTop = 0; | |
1602 | return; | |
1603 | } | |
1604 | ||
1605 | // Otherwise, we are returning two values: | |
1606 | // 2) If returning the same value for both, we only have one thing in the FP | |
1607 | // stack. Consider: RET FP1, FP1 | |
1608 | if (StackTop == 1) { | |
1609 | assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& | |
1610 | "Stack misconfiguration for RET!"); | |
1611 | ||
1612 | // Duplicate the TOS so that we return it twice. Just pick some other FPx | |
1613 | // register to hold it. | |
1a4d82fc | 1614 | unsigned NewReg = ScratchFPReg; |
223e47cc LB |
1615 | duplicateToTop(FirstFPRegOp, NewReg, MI); |
1616 | FirstFPRegOp = NewReg; | |
1617 | } | |
1618 | ||
1619 | /// Okay we know we have two different FPx operands now: | |
1620 | assert(StackTop == 2 && "Must have two values live!"); | |
1621 | ||
1622 | /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently | |
1623 | /// in ST(1). In this case, emit an fxch. | |
1624 | if (getStackEntry(0) == SecondFPRegOp) { | |
1625 | assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); | |
1626 | moveToTop(FirstFPRegOp, MI); | |
1627 | } | |
1628 | ||
1629 | /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in | |
1630 | /// ST(1). Just remove both from our understanding of the stack and return. | |
1631 | assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); | |
1632 | assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live"); | |
1633 | StackTop = 0; | |
1634 | return; | |
1635 | } | |
1636 | ||
1a4d82fc | 1637 | Inst = MBB->erase(Inst); // Remove the pseudo instruction |
223e47cc LB |
1638 | |
1639 | // We want to leave I pointing to the previous instruction, but what if we | |
1640 | // just erased the first instruction? | |
1a4d82fc | 1641 | if (Inst == MBB->begin()) { |
223e47cc | 1642 | DEBUG(dbgs() << "Inserting dummy KILL\n"); |
1a4d82fc | 1643 | Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL)); |
223e47cc | 1644 | } else |
1a4d82fc JJ |
1645 | --Inst; |
1646 | } | |
1647 | ||
1648 | void FPS::setKillFlags(MachineBasicBlock &MBB) const { | |
1649 | const TargetRegisterInfo *TRI = | |
1650 | MBB.getParent()->getSubtarget().getRegisterInfo(); | |
1651 | LivePhysRegs LPR(TRI); | |
1652 | ||
1653 | LPR.addLiveOuts(&MBB); | |
1654 | ||
1655 | for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); | |
1656 | I != E; ++I) { | |
1657 | if (I->isDebugValue()) | |
1658 | continue; | |
1659 | ||
1660 | std::bitset<8> Defs; | |
1661 | SmallVector<MachineOperand *, 2> Uses; | |
1662 | MachineInstr &MI = *I; | |
1663 | ||
1664 | for (auto &MO : I->operands()) { | |
1665 | if (!MO.isReg()) | |
1666 | continue; | |
1667 | ||
1668 | unsigned Reg = MO.getReg() - X86::FP0; | |
1669 | ||
1670 | if (Reg >= 8) | |
1671 | continue; | |
1672 | ||
1673 | if (MO.isDef()) { | |
1674 | Defs.set(Reg); | |
1675 | if (!LPR.contains(MO.getReg())) | |
1676 | MO.setIsDead(); | |
1677 | } else | |
1678 | Uses.push_back(&MO); | |
1679 | } | |
1680 | ||
1681 | for (auto *MO : Uses) | |
1682 | if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg())) | |
1683 | MO->setIsKill(); | |
1684 | ||
1685 | LPR.stepBackward(MI); | |
1686 | } | |
223e47cc | 1687 | } |