]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file contains the Base ARM implementation of the TargetInstrInfo class. | |
11 | // | |
12 | //===----------------------------------------------------------------------===// | |
13 | ||
14 | #include "ARMBaseInstrInfo.h" | |
15 | #include "ARM.h" | |
16 | #include "ARMBaseRegisterInfo.h" | |
17 | #include "ARMConstantPoolValue.h" | |
18 | #include "ARMHazardRecognizer.h" | |
19 | #include "ARMMachineFunctionInfo.h" | |
20 | #include "MCTargetDesc/ARMAddressingModes.h" | |
970d7e83 | 21 | #include "llvm/ADT/STLExtras.h" |
223e47cc LB |
22 | #include "llvm/CodeGen/LiveVariables.h" |
23 | #include "llvm/CodeGen/MachineConstantPool.h" | |
24 | #include "llvm/CodeGen/MachineFrameInfo.h" | |
25 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |
26 | #include "llvm/CodeGen/MachineJumpTableInfo.h" | |
27 | #include "llvm/CodeGen/MachineMemOperand.h" | |
28 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |
29 | #include "llvm/CodeGen/SelectionDAGNodes.h" | |
970d7e83 LB |
30 | #include "llvm/IR/Constants.h" |
31 | #include "llvm/IR/Function.h" | |
32 | #include "llvm/IR/GlobalValue.h" | |
223e47cc LB |
33 | #include "llvm/MC/MCAsmInfo.h" |
34 | #include "llvm/Support/BranchProbability.h" | |
35 | #include "llvm/Support/CommandLine.h" | |
36 | #include "llvm/Support/Debug.h" | |
37 | #include "llvm/Support/ErrorHandling.h" | |
223e47cc LB |
38 | |
39 | #define GET_INSTRINFO_CTOR | |
40 | #include "ARMGenInstrInfo.inc" | |
41 | ||
42 | using namespace llvm; | |
43 | ||
44 | static cl::opt<bool> | |
45 | EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, | |
46 | cl::desc("Enable ARM 2-addr to 3-addr conv")); | |
47 | ||
48 | static cl::opt<bool> | |
49 | WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), | |
50 | cl::desc("Widen ARM vmovs to vmovd when possible")); | |
51 | ||
52 | static cl::opt<unsigned> | |
53 | SwiftPartialUpdateClearance("swift-partial-update-clearance", | |
54 | cl::Hidden, cl::init(12), | |
55 | cl::desc("Clearance before partial register updates")); | |
56 | ||
57 | /// ARM_MLxEntry - Record information about MLA / MLS instructions. | |
58 | struct ARM_MLxEntry { | |
59 | uint16_t MLxOpc; // MLA / MLS opcode | |
60 | uint16_t MulOpc; // Expanded multiplication opcode | |
61 | uint16_t AddSubOpc; // Expanded add / sub opcode | |
62 | bool NegAcc; // True if the acc is negated before the add / sub. | |
63 | bool HasLane; // True if instruction has an extra "lane" operand. | |
64 | }; | |
65 | ||
66 | static const ARM_MLxEntry ARM_MLxTable[] = { | |
67 | // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane | |
68 | // fp scalar ops | |
69 | { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, | |
70 | { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, | |
71 | { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, | |
72 | { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, | |
73 | { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, | |
74 | { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, | |
75 | { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, | |
76 | { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, | |
77 | ||
78 | // fp SIMD ops | |
79 | { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, | |
80 | { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, | |
81 | { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, | |
82 | { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, | |
83 | { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, | |
84 | { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, | |
85 | { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, | |
86 | { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, | |
87 | }; | |
88 | ||
89 | ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) | |
90 | : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), | |
91 | Subtarget(STI) { | |
92 | for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { | |
93 | if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) | |
94 | assert(false && "Duplicated entries?"); | |
95 | MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); | |
96 | MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); | |
97 | } | |
98 | } | |
99 | ||
100 | // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl | |
101 | // currently defaults to no prepass hazard recognizer. | |
102 | ScheduleHazardRecognizer *ARMBaseInstrInfo:: | |
103 | CreateTargetHazardRecognizer(const TargetMachine *TM, | |
104 | const ScheduleDAG *DAG) const { | |
105 | if (usePreRAHazardRecognizer()) { | |
106 | const InstrItineraryData *II = TM->getInstrItineraryData(); | |
107 | return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); | |
108 | } | |
970d7e83 | 109 | return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); |
223e47cc LB |
110 | } |
111 | ||
112 | ScheduleHazardRecognizer *ARMBaseInstrInfo:: | |
113 | CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, | |
114 | const ScheduleDAG *DAG) const { | |
115 | if (Subtarget.isThumb2() || Subtarget.hasVFP2()) | |
116 | return (ScheduleHazardRecognizer *) | |
117 | new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); | |
970d7e83 | 118 | return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); |
223e47cc LB |
119 | } |
120 | ||
121 | MachineInstr * | |
122 | ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, | |
123 | MachineBasicBlock::iterator &MBBI, | |
124 | LiveVariables *LV) const { | |
125 | // FIXME: Thumb2 support. | |
126 | ||
127 | if (!EnableARM3Addr) | |
128 | return NULL; | |
129 | ||
130 | MachineInstr *MI = MBBI; | |
131 | MachineFunction &MF = *MI->getParent()->getParent(); | |
132 | uint64_t TSFlags = MI->getDesc().TSFlags; | |
133 | bool isPre = false; | |
134 | switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { | |
135 | default: return NULL; | |
136 | case ARMII::IndexModePre: | |
137 | isPre = true; | |
138 | break; | |
139 | case ARMII::IndexModePost: | |
140 | break; | |
141 | } | |
142 | ||
143 | // Try splitting an indexed load/store to an un-indexed one plus an add/sub | |
144 | // operation. | |
145 | unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); | |
146 | if (MemOpc == 0) | |
147 | return NULL; | |
148 | ||
149 | MachineInstr *UpdateMI = NULL; | |
150 | MachineInstr *MemMI = NULL; | |
151 | unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); | |
152 | const MCInstrDesc &MCID = MI->getDesc(); | |
153 | unsigned NumOps = MCID.getNumOperands(); | |
154 | bool isLoad = !MI->mayStore(); | |
155 | const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); | |
156 | const MachineOperand &Base = MI->getOperand(2); | |
157 | const MachineOperand &Offset = MI->getOperand(NumOps-3); | |
158 | unsigned WBReg = WB.getReg(); | |
159 | unsigned BaseReg = Base.getReg(); | |
160 | unsigned OffReg = Offset.getReg(); | |
161 | unsigned OffImm = MI->getOperand(NumOps-2).getImm(); | |
162 | ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); | |
163 | switch (AddrMode) { | |
164 | default: llvm_unreachable("Unknown indexed op!"); | |
165 | case ARMII::AddrMode2: { | |
166 | bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; | |
167 | unsigned Amt = ARM_AM::getAM2Offset(OffImm); | |
168 | if (OffReg == 0) { | |
169 | if (ARM_AM::getSOImmVal(Amt) == -1) | |
170 | // Can't encode it in a so_imm operand. This transformation will | |
171 | // add more than 1 instruction. Abandon! | |
172 | return NULL; | |
173 | UpdateMI = BuildMI(MF, MI->getDebugLoc(), | |
174 | get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) | |
175 | .addReg(BaseReg).addImm(Amt) | |
176 | .addImm(Pred).addReg(0).addReg(0); | |
177 | } else if (Amt != 0) { | |
178 | ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); | |
179 | unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); | |
180 | UpdateMI = BuildMI(MF, MI->getDebugLoc(), | |
181 | get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) | |
182 | .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) | |
183 | .addImm(Pred).addReg(0).addReg(0); | |
184 | } else | |
185 | UpdateMI = BuildMI(MF, MI->getDebugLoc(), | |
186 | get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) | |
187 | .addReg(BaseReg).addReg(OffReg) | |
188 | .addImm(Pred).addReg(0).addReg(0); | |
189 | break; | |
190 | } | |
191 | case ARMII::AddrMode3 : { | |
192 | bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; | |
193 | unsigned Amt = ARM_AM::getAM3Offset(OffImm); | |
194 | if (OffReg == 0) | |
195 | // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. | |
196 | UpdateMI = BuildMI(MF, MI->getDebugLoc(), | |
197 | get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) | |
198 | .addReg(BaseReg).addImm(Amt) | |
199 | .addImm(Pred).addReg(0).addReg(0); | |
200 | else | |
201 | UpdateMI = BuildMI(MF, MI->getDebugLoc(), | |
202 | get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) | |
203 | .addReg(BaseReg).addReg(OffReg) | |
204 | .addImm(Pred).addReg(0).addReg(0); | |
205 | break; | |
206 | } | |
207 | } | |
208 | ||
209 | std::vector<MachineInstr*> NewMIs; | |
210 | if (isPre) { | |
211 | if (isLoad) | |
212 | MemMI = BuildMI(MF, MI->getDebugLoc(), | |
213 | get(MemOpc), MI->getOperand(0).getReg()) | |
214 | .addReg(WBReg).addImm(0).addImm(Pred); | |
215 | else | |
216 | MemMI = BuildMI(MF, MI->getDebugLoc(), | |
217 | get(MemOpc)).addReg(MI->getOperand(1).getReg()) | |
218 | .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); | |
219 | NewMIs.push_back(MemMI); | |
220 | NewMIs.push_back(UpdateMI); | |
221 | } else { | |
222 | if (isLoad) | |
223 | MemMI = BuildMI(MF, MI->getDebugLoc(), | |
224 | get(MemOpc), MI->getOperand(0).getReg()) | |
225 | .addReg(BaseReg).addImm(0).addImm(Pred); | |
226 | else | |
227 | MemMI = BuildMI(MF, MI->getDebugLoc(), | |
228 | get(MemOpc)).addReg(MI->getOperand(1).getReg()) | |
229 | .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); | |
230 | if (WB.isDead()) | |
231 | UpdateMI->getOperand(0).setIsDead(); | |
232 | NewMIs.push_back(UpdateMI); | |
233 | NewMIs.push_back(MemMI); | |
234 | } | |
235 | ||
236 | // Transfer LiveVariables states, kill / dead info. | |
237 | if (LV) { | |
238 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |
239 | MachineOperand &MO = MI->getOperand(i); | |
240 | if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { | |
241 | unsigned Reg = MO.getReg(); | |
242 | ||
243 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); | |
244 | if (MO.isDef()) { | |
245 | MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; | |
246 | if (MO.isDead()) | |
247 | LV->addVirtualRegisterDead(Reg, NewMI); | |
248 | } | |
249 | if (MO.isUse() && MO.isKill()) { | |
250 | for (unsigned j = 0; j < 2; ++j) { | |
251 | // Look at the two new MI's in reverse order. | |
252 | MachineInstr *NewMI = NewMIs[j]; | |
253 | if (!NewMI->readsRegister(Reg)) | |
254 | continue; | |
255 | LV->addVirtualRegisterKilled(Reg, NewMI); | |
256 | if (VI.removeKill(MI)) | |
257 | VI.Kills.push_back(NewMI); | |
258 | break; | |
259 | } | |
260 | } | |
261 | } | |
262 | } | |
263 | } | |
264 | ||
265 | MFI->insert(MBBI, NewMIs[1]); | |
266 | MFI->insert(MBBI, NewMIs[0]); | |
267 | return NewMIs[0]; | |
268 | } | |
269 | ||
270 | // Branch analysis. | |
271 | bool | |
272 | ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, | |
273 | MachineBasicBlock *&FBB, | |
274 | SmallVectorImpl<MachineOperand> &Cond, | |
275 | bool AllowModify) const { | |
276 | // If the block has no terminators, it just falls into the block after it. | |
277 | MachineBasicBlock::iterator I = MBB.end(); | |
278 | if (I == MBB.begin()) | |
279 | return false; | |
280 | --I; | |
281 | while (I->isDebugValue()) { | |
282 | if (I == MBB.begin()) | |
283 | return false; | |
284 | --I; | |
285 | } | |
286 | if (!isUnpredicatedTerminator(I)) | |
287 | return false; | |
288 | ||
289 | // Get the last instruction in the block. | |
290 | MachineInstr *LastInst = I; | |
291 | ||
292 | // If there is only one terminator instruction, process it. | |
293 | unsigned LastOpc = LastInst->getOpcode(); | |
294 | if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { | |
295 | if (isUncondBranchOpcode(LastOpc)) { | |
296 | TBB = LastInst->getOperand(0).getMBB(); | |
297 | return false; | |
298 | } | |
299 | if (isCondBranchOpcode(LastOpc)) { | |
300 | // Block ends with fall-through condbranch. | |
301 | TBB = LastInst->getOperand(0).getMBB(); | |
302 | Cond.push_back(LastInst->getOperand(1)); | |
303 | Cond.push_back(LastInst->getOperand(2)); | |
304 | return false; | |
305 | } | |
306 | return true; // Can't handle indirect branch. | |
307 | } | |
308 | ||
309 | // Get the instruction before it if it is a terminator. | |
310 | MachineInstr *SecondLastInst = I; | |
311 | unsigned SecondLastOpc = SecondLastInst->getOpcode(); | |
312 | ||
313 | // If AllowModify is true and the block ends with two or more unconditional | |
314 | // branches, delete all but the first unconditional branch. | |
315 | if (AllowModify && isUncondBranchOpcode(LastOpc)) { | |
316 | while (isUncondBranchOpcode(SecondLastOpc)) { | |
317 | LastInst->eraseFromParent(); | |
318 | LastInst = SecondLastInst; | |
319 | LastOpc = LastInst->getOpcode(); | |
320 | if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { | |
321 | // Return now the only terminator is an unconditional branch. | |
322 | TBB = LastInst->getOperand(0).getMBB(); | |
323 | return false; | |
324 | } else { | |
325 | SecondLastInst = I; | |
326 | SecondLastOpc = SecondLastInst->getOpcode(); | |
327 | } | |
328 | } | |
329 | } | |
330 | ||
331 | // If there are three terminators, we don't know what sort of block this is. | |
332 | if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) | |
333 | return true; | |
334 | ||
335 | // If the block ends with a B and a Bcc, handle it. | |
336 | if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { | |
337 | TBB = SecondLastInst->getOperand(0).getMBB(); | |
338 | Cond.push_back(SecondLastInst->getOperand(1)); | |
339 | Cond.push_back(SecondLastInst->getOperand(2)); | |
340 | FBB = LastInst->getOperand(0).getMBB(); | |
341 | return false; | |
342 | } | |
343 | ||
344 | // If the block ends with two unconditional branches, handle it. The second | |
345 | // one is not executed, so remove it. | |
346 | if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { | |
347 | TBB = SecondLastInst->getOperand(0).getMBB(); | |
348 | I = LastInst; | |
349 | if (AllowModify) | |
350 | I->eraseFromParent(); | |
351 | return false; | |
352 | } | |
353 | ||
354 | // ...likewise if it ends with a branch table followed by an unconditional | |
355 | // branch. The branch folder can create these, and we must get rid of them for | |
356 | // correctness of Thumb constant islands. | |
357 | if ((isJumpTableBranchOpcode(SecondLastOpc) || | |
358 | isIndirectBranchOpcode(SecondLastOpc)) && | |
359 | isUncondBranchOpcode(LastOpc)) { | |
360 | I = LastInst; | |
361 | if (AllowModify) | |
362 | I->eraseFromParent(); | |
363 | return true; | |
364 | } | |
365 | ||
366 | // Otherwise, can't handle this. | |
367 | return true; | |
368 | } | |
369 | ||
370 | ||
371 | unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { | |
372 | MachineBasicBlock::iterator I = MBB.end(); | |
373 | if (I == MBB.begin()) return 0; | |
374 | --I; | |
375 | while (I->isDebugValue()) { | |
376 | if (I == MBB.begin()) | |
377 | return 0; | |
378 | --I; | |
379 | } | |
380 | if (!isUncondBranchOpcode(I->getOpcode()) && | |
381 | !isCondBranchOpcode(I->getOpcode())) | |
382 | return 0; | |
383 | ||
384 | // Remove the branch. | |
385 | I->eraseFromParent(); | |
386 | ||
387 | I = MBB.end(); | |
388 | ||
389 | if (I == MBB.begin()) return 1; | |
390 | --I; | |
391 | if (!isCondBranchOpcode(I->getOpcode())) | |
392 | return 1; | |
393 | ||
394 | // Remove the branch. | |
395 | I->eraseFromParent(); | |
396 | return 2; | |
397 | } | |
398 | ||
399 | unsigned | |
400 | ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, | |
401 | MachineBasicBlock *FBB, | |
402 | const SmallVectorImpl<MachineOperand> &Cond, | |
403 | DebugLoc DL) const { | |
404 | ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); | |
405 | int BOpc = !AFI->isThumbFunction() | |
406 | ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); | |
407 | int BccOpc = !AFI->isThumbFunction() | |
408 | ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); | |
409 | bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); | |
410 | ||
411 | // Shouldn't be a fall through. | |
412 | assert(TBB && "InsertBranch must not be told to insert a fallthrough"); | |
413 | assert((Cond.size() == 2 || Cond.size() == 0) && | |
414 | "ARM branch conditions have two components!"); | |
415 | ||
416 | if (FBB == 0) { | |
417 | if (Cond.empty()) { // Unconditional branch? | |
418 | if (isThumb) | |
419 | BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); | |
420 | else | |
421 | BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); | |
422 | } else | |
423 | BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) | |
424 | .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); | |
425 | return 1; | |
426 | } | |
427 | ||
428 | // Two-way conditional branch. | |
429 | BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) | |
430 | .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); | |
431 | if (isThumb) | |
432 | BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); | |
433 | else | |
434 | BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); | |
435 | return 2; | |
436 | } | |
437 | ||
438 | bool ARMBaseInstrInfo:: | |
439 | ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { | |
440 | ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); | |
441 | Cond[0].setImm(ARMCC::getOppositeCondition(CC)); | |
442 | return false; | |
443 | } | |
444 | ||
445 | bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { | |
446 | if (MI->isBundle()) { | |
447 | MachineBasicBlock::const_instr_iterator I = MI; | |
448 | MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); | |
449 | while (++I != E && I->isInsideBundle()) { | |
450 | int PIdx = I->findFirstPredOperandIdx(); | |
451 | if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) | |
452 | return true; | |
453 | } | |
454 | return false; | |
455 | } | |
456 | ||
457 | int PIdx = MI->findFirstPredOperandIdx(); | |
458 | return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; | |
459 | } | |
460 | ||
461 | bool ARMBaseInstrInfo:: | |
462 | PredicateInstruction(MachineInstr *MI, | |
463 | const SmallVectorImpl<MachineOperand> &Pred) const { | |
464 | unsigned Opc = MI->getOpcode(); | |
465 | if (isUncondBranchOpcode(Opc)) { | |
466 | MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); | |
970d7e83 LB |
467 | MachineInstrBuilder(*MI->getParent()->getParent(), MI) |
468 | .addImm(Pred[0].getImm()) | |
469 | .addReg(Pred[1].getReg()); | |
223e47cc LB |
470 | return true; |
471 | } | |
472 | ||
473 | int PIdx = MI->findFirstPredOperandIdx(); | |
474 | if (PIdx != -1) { | |
475 | MachineOperand &PMO = MI->getOperand(PIdx); | |
476 | PMO.setImm(Pred[0].getImm()); | |
477 | MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); | |
478 | return true; | |
479 | } | |
480 | return false; | |
481 | } | |
482 | ||
483 | bool ARMBaseInstrInfo:: | |
484 | SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, | |
485 | const SmallVectorImpl<MachineOperand> &Pred2) const { | |
486 | if (Pred1.size() > 2 || Pred2.size() > 2) | |
487 | return false; | |
488 | ||
489 | ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); | |
490 | ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); | |
491 | if (CC1 == CC2) | |
492 | return true; | |
493 | ||
494 | switch (CC1) { | |
495 | default: | |
496 | return false; | |
497 | case ARMCC::AL: | |
498 | return true; | |
499 | case ARMCC::HS: | |
500 | return CC2 == ARMCC::HI; | |
501 | case ARMCC::LS: | |
502 | return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; | |
503 | case ARMCC::GE: | |
504 | return CC2 == ARMCC::GT; | |
505 | case ARMCC::LE: | |
506 | return CC2 == ARMCC::LT; | |
507 | } | |
508 | } | |
509 | ||
510 | bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, | |
511 | std::vector<MachineOperand> &Pred) const { | |
512 | bool Found = false; | |
513 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |
514 | const MachineOperand &MO = MI->getOperand(i); | |
515 | if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || | |
516 | (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { | |
517 | Pred.push_back(MO); | |
518 | Found = true; | |
519 | } | |
520 | } | |
521 | ||
522 | return Found; | |
523 | } | |
524 | ||
525 | /// isPredicable - Return true if the specified instruction can be predicated. | |
526 | /// By default, this returns true for every instruction with a | |
527 | /// PredicateOperand. | |
528 | bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { | |
529 | if (!MI->isPredicable()) | |
530 | return false; | |
531 | ||
532 | if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { | |
533 | ARMFunctionInfo *AFI = | |
534 | MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); | |
535 | return AFI->isThumb2Function(); | |
536 | } | |
537 | return true; | |
538 | } | |
539 | ||
540 | /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. | |
541 | LLVM_ATTRIBUTE_NOINLINE | |
542 | static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, | |
543 | unsigned JTI); | |
544 | static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, | |
545 | unsigned JTI) { | |
546 | assert(JTI < JT.size()); | |
547 | return JT[JTI].MBBs.size(); | |
548 | } | |
549 | ||
550 | /// GetInstSize - Return the size of the specified MachineInstr. | |
551 | /// | |
552 | unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { | |
553 | const MachineBasicBlock &MBB = *MI->getParent(); | |
554 | const MachineFunction *MF = MBB.getParent(); | |
555 | const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); | |
556 | ||
557 | const MCInstrDesc &MCID = MI->getDesc(); | |
558 | if (MCID.getSize()) | |
559 | return MCID.getSize(); | |
560 | ||
561 | // If this machine instr is an inline asm, measure it. | |
562 | if (MI->getOpcode() == ARM::INLINEASM) | |
563 | return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); | |
564 | if (MI->isLabel()) | |
565 | return 0; | |
566 | unsigned Opc = MI->getOpcode(); | |
567 | switch (Opc) { | |
568 | case TargetOpcode::IMPLICIT_DEF: | |
569 | case TargetOpcode::KILL: | |
570 | case TargetOpcode::PROLOG_LABEL: | |
571 | case TargetOpcode::EH_LABEL: | |
572 | case TargetOpcode::DBG_VALUE: | |
573 | return 0; | |
574 | case TargetOpcode::BUNDLE: | |
575 | return getInstBundleLength(MI); | |
576 | case ARM::MOVi16_ga_pcrel: | |
577 | case ARM::MOVTi16_ga_pcrel: | |
578 | case ARM::t2MOVi16_ga_pcrel: | |
579 | case ARM::t2MOVTi16_ga_pcrel: | |
580 | return 4; | |
581 | case ARM::MOVi32imm: | |
582 | case ARM::t2MOVi32imm: | |
583 | return 8; | |
584 | case ARM::CONSTPOOL_ENTRY: | |
585 | // If this machine instr is a constant pool entry, its size is recorded as | |
586 | // operand #2. | |
587 | return MI->getOperand(2).getImm(); | |
588 | case ARM::Int_eh_sjlj_longjmp: | |
589 | return 16; | |
590 | case ARM::tInt_eh_sjlj_longjmp: | |
591 | return 10; | |
592 | case ARM::Int_eh_sjlj_setjmp: | |
593 | case ARM::Int_eh_sjlj_setjmp_nofp: | |
594 | return 20; | |
595 | case ARM::tInt_eh_sjlj_setjmp: | |
596 | case ARM::t2Int_eh_sjlj_setjmp: | |
597 | case ARM::t2Int_eh_sjlj_setjmp_nofp: | |
598 | return 12; | |
599 | case ARM::BR_JTr: | |
600 | case ARM::BR_JTm: | |
601 | case ARM::BR_JTadd: | |
602 | case ARM::tBR_JTr: | |
603 | case ARM::t2BR_JT: | |
604 | case ARM::t2TBB_JT: | |
605 | case ARM::t2TBH_JT: { | |
606 | // These are jumptable branches, i.e. a branch followed by an inlined | |
607 | // jumptable. The size is 4 + 4 * number of entries. For TBB, each | |
608 | // entry is one byte; TBH two byte each. | |
609 | unsigned EntrySize = (Opc == ARM::t2TBB_JT) | |
610 | ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); | |
611 | unsigned NumOps = MCID.getNumOperands(); | |
612 | MachineOperand JTOP = | |
613 | MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); | |
614 | unsigned JTI = JTOP.getIndex(); | |
615 | const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); | |
616 | assert(MJTI != 0); | |
617 | const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); | |
618 | assert(JTI < JT.size()); | |
619 | // Thumb instructions are 2 byte aligned, but JT entries are 4 byte | |
620 | // 4 aligned. The assembler / linker may add 2 byte padding just before | |
621 | // the JT entries. The size does not include this padding; the | |
622 | // constant islands pass does separate bookkeeping for it. | |
623 | // FIXME: If we know the size of the function is less than (1 << 16) *2 | |
624 | // bytes, we can use 16-bit entries instead. Then there won't be an | |
625 | // alignment issue. | |
626 | unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; | |
627 | unsigned NumEntries = getNumJTEntries(JT, JTI); | |
628 | if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) | |
629 | // Make sure the instruction that follows TBB is 2-byte aligned. | |
630 | // FIXME: Constant island pass should insert an "ALIGN" instruction | |
631 | // instead. | |
632 | ++NumEntries; | |
633 | return NumEntries * EntrySize + InstSize; | |
634 | } | |
635 | default: | |
636 | // Otherwise, pseudo-instruction sizes are zero. | |
637 | return 0; | |
638 | } | |
639 | } | |
640 | ||
641 | unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { | |
642 | unsigned Size = 0; | |
643 | MachineBasicBlock::const_instr_iterator I = MI; | |
644 | MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); | |
645 | while (++I != E && I->isInsideBundle()) { | |
646 | assert(!I->isBundle() && "No nested bundle!"); | |
647 | Size += GetInstSizeInBytes(&*I); | |
648 | } | |
649 | return Size; | |
650 | } | |
651 | ||
652 | void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, | |
653 | MachineBasicBlock::iterator I, DebugLoc DL, | |
654 | unsigned DestReg, unsigned SrcReg, | |
655 | bool KillSrc) const { | |
656 | bool GPRDest = ARM::GPRRegClass.contains(DestReg); | |
657 | bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); | |
658 | ||
659 | if (GPRDest && GPRSrc) { | |
660 | AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) | |
661 | .addReg(SrcReg, getKillRegState(KillSrc)))); | |
662 | return; | |
663 | } | |
664 | ||
665 | bool SPRDest = ARM::SPRRegClass.contains(DestReg); | |
666 | bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); | |
667 | ||
668 | unsigned Opc = 0; | |
669 | if (SPRDest && SPRSrc) | |
670 | Opc = ARM::VMOVS; | |
671 | else if (GPRDest && SPRSrc) | |
672 | Opc = ARM::VMOVRS; | |
673 | else if (SPRDest && GPRSrc) | |
674 | Opc = ARM::VMOVSR; | |
675 | else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) | |
676 | Opc = ARM::VMOVD; | |
677 | else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) | |
678 | Opc = ARM::VORRq; | |
679 | ||
680 | if (Opc) { | |
681 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); | |
682 | MIB.addReg(SrcReg, getKillRegState(KillSrc)); | |
683 | if (Opc == ARM::VORRq) | |
684 | MIB.addReg(SrcReg, getKillRegState(KillSrc)); | |
685 | AddDefaultPred(MIB); | |
686 | return; | |
687 | } | |
688 | ||
689 | // Handle register classes that require multiple instructions. | |
690 | unsigned BeginIdx = 0; | |
691 | unsigned SubRegs = 0; | |
692 | int Spacing = 1; | |
693 | ||
694 | // Use VORRq when possible. | |
695 | if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) | |
696 | Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; | |
697 | else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) | |
698 | Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; | |
699 | // Fall back to VMOVD. | |
700 | else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) | |
701 | Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; | |
702 | else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) | |
703 | Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; | |
704 | else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) | |
705 | Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; | |
970d7e83 LB |
706 | else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) |
707 | Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; | |
223e47cc LB |
708 | |
709 | else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) | |
710 | Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; | |
711 | else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) | |
712 | Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; | |
713 | else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) | |
714 | Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; | |
715 | ||
716 | assert(Opc && "Impossible reg-to-reg copy"); | |
717 | ||
718 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |
719 | MachineInstrBuilder Mov; | |
720 | ||
721 | // Copy register tuples backward when the first Dest reg overlaps with SrcReg. | |
722 | if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { | |
723 | BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); | |
724 | Spacing = -Spacing; | |
725 | } | |
726 | #ifndef NDEBUG | |
727 | SmallSet<unsigned, 4> DstRegs; | |
728 | #endif | |
729 | for (unsigned i = 0; i != SubRegs; ++i) { | |
730 | unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); | |
731 | unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); | |
732 | assert(Dst && Src && "Bad sub-register"); | |
733 | #ifndef NDEBUG | |
734 | assert(!DstRegs.count(Src) && "destructive vector copy"); | |
735 | DstRegs.insert(Dst); | |
736 | #endif | |
737 | Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) | |
738 | .addReg(Src); | |
739 | // VORR takes two source operands. | |
740 | if (Opc == ARM::VORRq) | |
741 | Mov.addReg(Src); | |
742 | Mov = AddDefaultPred(Mov); | |
743 | } | |
744 | // Add implicit super-register defs and kills to the last instruction. | |
745 | Mov->addRegisterDefined(DestReg, TRI); | |
746 | if (KillSrc) | |
747 | Mov->addRegisterKilled(SrcReg, TRI); | |
748 | } | |
749 | ||
750 | static const | |
751 | MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, | |
752 | unsigned Reg, unsigned SubIdx, unsigned State, | |
753 | const TargetRegisterInfo *TRI) { | |
754 | if (!SubIdx) | |
755 | return MIB.addReg(Reg, State); | |
756 | ||
757 | if (TargetRegisterInfo::isPhysicalRegister(Reg)) | |
758 | return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); | |
759 | return MIB.addReg(Reg, State, SubIdx); | |
760 | } | |
761 | ||
762 | void ARMBaseInstrInfo:: | |
763 | storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, | |
764 | unsigned SrcReg, bool isKill, int FI, | |
765 | const TargetRegisterClass *RC, | |
766 | const TargetRegisterInfo *TRI) const { | |
767 | DebugLoc DL; | |
768 | if (I != MBB.end()) DL = I->getDebugLoc(); | |
769 | MachineFunction &MF = *MBB.getParent(); | |
770 | MachineFrameInfo &MFI = *MF.getFrameInfo(); | |
771 | unsigned Align = MFI.getObjectAlignment(FI); | |
772 | ||
773 | MachineMemOperand *MMO = | |
774 | MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), | |
775 | MachineMemOperand::MOStore, | |
776 | MFI.getObjectSize(FI), | |
777 | Align); | |
778 | ||
779 | switch (RC->getSize()) { | |
780 | case 4: | |
781 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { | |
782 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) | |
783 | .addReg(SrcReg, getKillRegState(isKill)) | |
784 | .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); | |
785 | } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { | |
786 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) | |
787 | .addReg(SrcReg, getKillRegState(isKill)) | |
788 | .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); | |
789 | } else | |
790 | llvm_unreachable("Unknown reg class!"); | |
791 | break; | |
792 | case 8: | |
793 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { | |
794 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) | |
795 | .addReg(SrcReg, getKillRegState(isKill)) | |
796 | .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); | |
970d7e83 LB |
797 | } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
798 | MachineInstrBuilder MIB = | |
799 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) | |
800 | .addFrameIndex(FI)) | |
801 | .addMemOperand(MMO); | |
802 | MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); | |
803 | AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); | |
223e47cc LB |
804 | } else |
805 | llvm_unreachable("Unknown reg class!"); | |
806 | break; | |
807 | case 16: | |
808 | if (ARM::DPairRegClass.hasSubClassEq(RC)) { | |
809 | // Use aligned spills if the stack can be realigned. | |
810 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { | |
811 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) | |
812 | .addFrameIndex(FI).addImm(16) | |
813 | .addReg(SrcReg, getKillRegState(isKill)) | |
814 | .addMemOperand(MMO)); | |
815 | } else { | |
816 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) | |
817 | .addReg(SrcReg, getKillRegState(isKill)) | |
818 | .addFrameIndex(FI) | |
819 | .addMemOperand(MMO)); | |
820 | } | |
821 | } else | |
822 | llvm_unreachable("Unknown reg class!"); | |
823 | break; | |
824 | case 24: | |
825 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { | |
826 | // Use aligned spills if the stack can be realigned. | |
827 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { | |
828 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) | |
829 | .addFrameIndex(FI).addImm(16) | |
830 | .addReg(SrcReg, getKillRegState(isKill)) | |
831 | .addMemOperand(MMO)); | |
832 | } else { | |
833 | MachineInstrBuilder MIB = | |
834 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) | |
835 | .addFrameIndex(FI)) | |
836 | .addMemOperand(MMO); | |
837 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); | |
838 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); | |
839 | AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); | |
840 | } | |
841 | } else | |
842 | llvm_unreachable("Unknown reg class!"); | |
843 | break; | |
844 | case 32: | |
845 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { | |
846 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { | |
847 | // FIXME: It's possible to only store part of the QQ register if the | |
848 | // spilled def has a sub-register index. | |
849 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) | |
850 | .addFrameIndex(FI).addImm(16) | |
851 | .addReg(SrcReg, getKillRegState(isKill)) | |
852 | .addMemOperand(MMO)); | |
853 | } else { | |
854 | MachineInstrBuilder MIB = | |
855 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) | |
856 | .addFrameIndex(FI)) | |
857 | .addMemOperand(MMO); | |
858 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); | |
859 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); | |
860 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); | |
861 | AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); | |
862 | } | |
863 | } else | |
864 | llvm_unreachable("Unknown reg class!"); | |
865 | break; | |
866 | case 64: | |
867 | if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { | |
868 | MachineInstrBuilder MIB = | |
869 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) | |
870 | .addFrameIndex(FI)) | |
871 | .addMemOperand(MMO); | |
872 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); | |
873 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); | |
874 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); | |
875 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); | |
876 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); | |
877 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); | |
878 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); | |
879 | AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); | |
880 | } else | |
881 | llvm_unreachable("Unknown reg class!"); | |
882 | break; | |
883 | default: | |
884 | llvm_unreachable("Unknown reg class!"); | |
885 | } | |
886 | } | |
887 | ||
888 | unsigned | |
889 | ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, | |
890 | int &FrameIndex) const { | |
891 | switch (MI->getOpcode()) { | |
892 | default: break; | |
893 | case ARM::STRrs: | |
894 | case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. | |
895 | if (MI->getOperand(1).isFI() && | |
896 | MI->getOperand(2).isReg() && | |
897 | MI->getOperand(3).isImm() && | |
898 | MI->getOperand(2).getReg() == 0 && | |
899 | MI->getOperand(3).getImm() == 0) { | |
900 | FrameIndex = MI->getOperand(1).getIndex(); | |
901 | return MI->getOperand(0).getReg(); | |
902 | } | |
903 | break; | |
904 | case ARM::STRi12: | |
905 | case ARM::t2STRi12: | |
906 | case ARM::tSTRspi: | |
907 | case ARM::VSTRD: | |
908 | case ARM::VSTRS: | |
909 | if (MI->getOperand(1).isFI() && | |
910 | MI->getOperand(2).isImm() && | |
911 | MI->getOperand(2).getImm() == 0) { | |
912 | FrameIndex = MI->getOperand(1).getIndex(); | |
913 | return MI->getOperand(0).getReg(); | |
914 | } | |
915 | break; | |
916 | case ARM::VST1q64: | |
917 | case ARM::VST1d64TPseudo: | |
918 | case ARM::VST1d64QPseudo: | |
919 | if (MI->getOperand(0).isFI() && | |
920 | MI->getOperand(2).getSubReg() == 0) { | |
921 | FrameIndex = MI->getOperand(0).getIndex(); | |
922 | return MI->getOperand(2).getReg(); | |
923 | } | |
924 | break; | |
925 | case ARM::VSTMQIA: | |
926 | if (MI->getOperand(1).isFI() && | |
927 | MI->getOperand(0).getSubReg() == 0) { | |
928 | FrameIndex = MI->getOperand(1).getIndex(); | |
929 | return MI->getOperand(0).getReg(); | |
930 | } | |
931 | break; | |
932 | } | |
933 | ||
934 | return 0; | |
935 | } | |
936 | ||
937 | unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, | |
938 | int &FrameIndex) const { | |
939 | const MachineMemOperand *Dummy; | |
940 | return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); | |
941 | } | |
942 | ||
943 | void ARMBaseInstrInfo:: | |
944 | loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, | |
945 | unsigned DestReg, int FI, | |
946 | const TargetRegisterClass *RC, | |
947 | const TargetRegisterInfo *TRI) const { | |
948 | DebugLoc DL; | |
949 | if (I != MBB.end()) DL = I->getDebugLoc(); | |
950 | MachineFunction &MF = *MBB.getParent(); | |
970d7e83 | 951 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
223e47cc LB |
952 | MachineFrameInfo &MFI = *MF.getFrameInfo(); |
953 | unsigned Align = MFI.getObjectAlignment(FI); | |
954 | MachineMemOperand *MMO = | |
955 | MF.getMachineMemOperand( | |
956 | MachinePointerInfo::getFixedStack(FI), | |
957 | MachineMemOperand::MOLoad, | |
958 | MFI.getObjectSize(FI), | |
959 | Align); | |
960 | ||
961 | switch (RC->getSize()) { | |
962 | case 4: | |
963 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { | |
964 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) | |
965 | .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); | |
966 | ||
967 | } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { | |
968 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) | |
969 | .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); | |
970 | } else | |
971 | llvm_unreachable("Unknown reg class!"); | |
972 | break; | |
973 | case 8: | |
974 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { | |
975 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) | |
976 | .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); | |
970d7e83 LB |
977 | } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
978 | unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA; | |
979 | MachineInstrBuilder MIB = | |
980 | AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc)) | |
981 | .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); | |
982 | MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); | |
983 | MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); | |
984 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) | |
985 | MIB.addReg(DestReg, RegState::ImplicitDefine); | |
223e47cc LB |
986 | } else |
987 | llvm_unreachable("Unknown reg class!"); | |
988 | break; | |
989 | case 16: | |
990 | if (ARM::DPairRegClass.hasSubClassEq(RC)) { | |
991 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { | |
992 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) | |
993 | .addFrameIndex(FI).addImm(16) | |
994 | .addMemOperand(MMO)); | |
995 | } else { | |
996 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) | |
997 | .addFrameIndex(FI) | |
998 | .addMemOperand(MMO)); | |
999 | } | |
1000 | } else | |
1001 | llvm_unreachable("Unknown reg class!"); | |
1002 | break; | |
1003 | case 24: | |
1004 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { | |
1005 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { | |
1006 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) | |
1007 | .addFrameIndex(FI).addImm(16) | |
1008 | .addMemOperand(MMO)); | |
1009 | } else { | |
1010 | MachineInstrBuilder MIB = | |
1011 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) | |
1012 | .addFrameIndex(FI) | |
1013 | .addMemOperand(MMO)); | |
1014 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); | |
1015 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); | |
1016 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); | |
1017 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) | |
1018 | MIB.addReg(DestReg, RegState::ImplicitDefine); | |
1019 | } | |
1020 | } else | |
1021 | llvm_unreachable("Unknown reg class!"); | |
1022 | break; | |
1023 | case 32: | |
1024 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { | |
1025 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { | |
1026 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) | |
1027 | .addFrameIndex(FI).addImm(16) | |
1028 | .addMemOperand(MMO)); | |
1029 | } else { | |
1030 | MachineInstrBuilder MIB = | |
1031 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) | |
1032 | .addFrameIndex(FI)) | |
1033 | .addMemOperand(MMO); | |
1034 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); | |
1035 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); | |
1036 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); | |
1037 | MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); | |
1038 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) | |
1039 | MIB.addReg(DestReg, RegState::ImplicitDefine); | |
1040 | } | |
1041 | } else | |
1042 | llvm_unreachable("Unknown reg class!"); | |
1043 | break; | |
1044 | case 64: | |
1045 | if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { | |
1046 | MachineInstrBuilder MIB = | |
1047 | AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) | |
1048 | .addFrameIndex(FI)) | |
1049 | .addMemOperand(MMO); | |
1050 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); | |
1051 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); | |
1052 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); | |
1053 | MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); | |
1054 | MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); | |
1055 | MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); | |
1056 | MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); | |
1057 | MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); | |
1058 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) | |
1059 | MIB.addReg(DestReg, RegState::ImplicitDefine); | |
1060 | } else | |
1061 | llvm_unreachable("Unknown reg class!"); | |
1062 | break; | |
1063 | default: | |
1064 | llvm_unreachable("Unknown regclass!"); | |
1065 | } | |
1066 | } | |
1067 | ||
1068 | unsigned | |
1069 | ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, | |
1070 | int &FrameIndex) const { | |
1071 | switch (MI->getOpcode()) { | |
1072 | default: break; | |
1073 | case ARM::LDRrs: | |
1074 | case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. | |
1075 | if (MI->getOperand(1).isFI() && | |
1076 | MI->getOperand(2).isReg() && | |
1077 | MI->getOperand(3).isImm() && | |
1078 | MI->getOperand(2).getReg() == 0 && | |
1079 | MI->getOperand(3).getImm() == 0) { | |
1080 | FrameIndex = MI->getOperand(1).getIndex(); | |
1081 | return MI->getOperand(0).getReg(); | |
1082 | } | |
1083 | break; | |
1084 | case ARM::LDRi12: | |
1085 | case ARM::t2LDRi12: | |
1086 | case ARM::tLDRspi: | |
1087 | case ARM::VLDRD: | |
1088 | case ARM::VLDRS: | |
1089 | if (MI->getOperand(1).isFI() && | |
1090 | MI->getOperand(2).isImm() && | |
1091 | MI->getOperand(2).getImm() == 0) { | |
1092 | FrameIndex = MI->getOperand(1).getIndex(); | |
1093 | return MI->getOperand(0).getReg(); | |
1094 | } | |
1095 | break; | |
1096 | case ARM::VLD1q64: | |
1097 | case ARM::VLD1d64TPseudo: | |
1098 | case ARM::VLD1d64QPseudo: | |
1099 | if (MI->getOperand(1).isFI() && | |
1100 | MI->getOperand(0).getSubReg() == 0) { | |
1101 | FrameIndex = MI->getOperand(1).getIndex(); | |
1102 | return MI->getOperand(0).getReg(); | |
1103 | } | |
1104 | break; | |
1105 | case ARM::VLDMQIA: | |
1106 | if (MI->getOperand(1).isFI() && | |
1107 | MI->getOperand(0).getSubReg() == 0) { | |
1108 | FrameIndex = MI->getOperand(1).getIndex(); | |
1109 | return MI->getOperand(0).getReg(); | |
1110 | } | |
1111 | break; | |
1112 | } | |
1113 | ||
1114 | return 0; | |
1115 | } | |
1116 | ||
1117 | unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, | |
1118 | int &FrameIndex) const { | |
1119 | const MachineMemOperand *Dummy; | |
1120 | return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); | |
1121 | } | |
1122 | ||
1123 | bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ | |
1124 | // This hook gets to expand COPY instructions before they become | |
1125 | // copyPhysReg() calls. Look for VMOVS instructions that can legally be | |
1126 | // widened to VMOVD. We prefer the VMOVD when possible because it may be | |
1127 | // changed into a VORR that can go down the NEON pipeline. | |
970d7e83 | 1128 | if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) |
223e47cc LB |
1129 | return false; |
1130 | ||
1131 | // Look for a copy between even S-registers. That is where we keep floats | |
1132 | // when using NEON v2f32 instructions for f32 arithmetic. | |
1133 | unsigned DstRegS = MI->getOperand(0).getReg(); | |
1134 | unsigned SrcRegS = MI->getOperand(1).getReg(); | |
1135 | if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) | |
1136 | return false; | |
1137 | ||
1138 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |
1139 | unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, | |
1140 | &ARM::DPRRegClass); | |
1141 | unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, | |
1142 | &ARM::DPRRegClass); | |
1143 | if (!DstRegD || !SrcRegD) | |
1144 | return false; | |
1145 | ||
1146 | // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only | |
1147 | // legal if the COPY already defines the full DstRegD, and it isn't a | |
1148 | // sub-register insertion. | |
1149 | if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) | |
1150 | return false; | |
1151 | ||
1152 | // A dead copy shouldn't show up here, but reject it just in case. | |
1153 | if (MI->getOperand(0).isDead()) | |
1154 | return false; | |
1155 | ||
1156 | // All clear, widen the COPY. | |
1157 | DEBUG(dbgs() << "widening: " << *MI); | |
970d7e83 | 1158 | MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); |
223e47cc LB |
1159 | |
1160 | // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg | |
1161 | // or some other super-register. | |
1162 | int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); | |
1163 | if (ImpDefIdx != -1) | |
1164 | MI->RemoveOperand(ImpDefIdx); | |
1165 | ||
1166 | // Change the opcode and operands. | |
1167 | MI->setDesc(get(ARM::VMOVD)); | |
1168 | MI->getOperand(0).setReg(DstRegD); | |
1169 | MI->getOperand(1).setReg(SrcRegD); | |
970d7e83 | 1170 | AddDefaultPred(MIB); |
223e47cc LB |
1171 | |
1172 | // We are now reading SrcRegD instead of SrcRegS. This may upset the | |
1173 | // register scavenger and machine verifier, so we need to indicate that we | |
1174 | // are reading an undefined value from SrcRegD, but a proper value from | |
1175 | // SrcRegS. | |
1176 | MI->getOperand(1).setIsUndef(); | |
970d7e83 | 1177 | MIB.addReg(SrcRegS, RegState::Implicit); |
223e47cc LB |
1178 | |
1179 | // SrcRegD may actually contain an unrelated value in the ssub_1 | |
1180 | // sub-register. Don't kill it. Only kill the ssub_0 sub-register. | |
1181 | if (MI->getOperand(1).isKill()) { | |
1182 | MI->getOperand(1).setIsKill(false); | |
1183 | MI->addRegisterKilled(SrcRegS, TRI, true); | |
1184 | } | |
1185 | ||
1186 | DEBUG(dbgs() << "replaced by: " << *MI); | |
1187 | return true; | |
1188 | } | |
1189 | ||
1190 | MachineInstr* | |
1191 | ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, | |
1192 | int FrameIx, uint64_t Offset, | |
1193 | const MDNode *MDPtr, | |
1194 | DebugLoc DL) const { | |
1195 | MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) | |
1196 | .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); | |
1197 | return &*MIB; | |
1198 | } | |
1199 | ||
1200 | /// Create a copy of a const pool value. Update CPI to the new index and return | |
1201 | /// the label UID. | |
1202 | static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { | |
1203 | MachineConstantPool *MCP = MF.getConstantPool(); | |
1204 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | |
1205 | ||
1206 | const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; | |
1207 | assert(MCPE.isMachineConstantPoolEntry() && | |
1208 | "Expecting a machine constantpool entry!"); | |
1209 | ARMConstantPoolValue *ACPV = | |
1210 | static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); | |
1211 | ||
1212 | unsigned PCLabelId = AFI->createPICLabelUId(); | |
1213 | ARMConstantPoolValue *NewCPV = 0; | |
1214 | // FIXME: The below assumes PIC relocation model and that the function | |
1215 | // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and | |
1216 | // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR | |
1217 | // instructions, so that's probably OK, but is PIC always correct when | |
1218 | // we get here? | |
1219 | if (ACPV->isGlobalValue()) | |
1220 | NewCPV = ARMConstantPoolConstant:: | |
1221 | Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, | |
1222 | ARMCP::CPValue, 4); | |
1223 | else if (ACPV->isExtSymbol()) | |
1224 | NewCPV = ARMConstantPoolSymbol:: | |
1225 | Create(MF.getFunction()->getContext(), | |
1226 | cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); | |
1227 | else if (ACPV->isBlockAddress()) | |
1228 | NewCPV = ARMConstantPoolConstant:: | |
1229 | Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, | |
1230 | ARMCP::CPBlockAddress, 4); | |
1231 | else if (ACPV->isLSDA()) | |
1232 | NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, | |
1233 | ARMCP::CPLSDA, 4); | |
1234 | else if (ACPV->isMachineBasicBlock()) | |
1235 | NewCPV = ARMConstantPoolMBB:: | |
1236 | Create(MF.getFunction()->getContext(), | |
1237 | cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); | |
1238 | else | |
1239 | llvm_unreachable("Unexpected ARM constantpool value type!!"); | |
1240 | CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); | |
1241 | return PCLabelId; | |
1242 | } | |
1243 | ||
1244 | void ARMBaseInstrInfo:: | |
1245 | reMaterialize(MachineBasicBlock &MBB, | |
1246 | MachineBasicBlock::iterator I, | |
1247 | unsigned DestReg, unsigned SubIdx, | |
1248 | const MachineInstr *Orig, | |
1249 | const TargetRegisterInfo &TRI) const { | |
1250 | unsigned Opcode = Orig->getOpcode(); | |
1251 | switch (Opcode) { | |
1252 | default: { | |
1253 | MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); | |
1254 | MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); | |
1255 | MBB.insert(I, MI); | |
1256 | break; | |
1257 | } | |
1258 | case ARM::tLDRpci_pic: | |
1259 | case ARM::t2LDRpci_pic: { | |
1260 | MachineFunction &MF = *MBB.getParent(); | |
1261 | unsigned CPI = Orig->getOperand(1).getIndex(); | |
1262 | unsigned PCLabelId = duplicateCPV(MF, CPI); | |
1263 | MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), | |
1264 | DestReg) | |
1265 | .addConstantPoolIndex(CPI).addImm(PCLabelId); | |
1266 | MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); | |
1267 | break; | |
1268 | } | |
1269 | } | |
1270 | } | |
1271 | ||
1272 | MachineInstr * | |
1273 | ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { | |
970d7e83 | 1274 | MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); |
223e47cc LB |
1275 | switch(Orig->getOpcode()) { |
1276 | case ARM::tLDRpci_pic: | |
1277 | case ARM::t2LDRpci_pic: { | |
1278 | unsigned CPI = Orig->getOperand(1).getIndex(); | |
1279 | unsigned PCLabelId = duplicateCPV(MF, CPI); | |
1280 | Orig->getOperand(1).setIndex(CPI); | |
1281 | Orig->getOperand(2).setImm(PCLabelId); | |
1282 | break; | |
1283 | } | |
1284 | } | |
1285 | return MI; | |
1286 | } | |
1287 | ||
1288 | bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, | |
1289 | const MachineInstr *MI1, | |
1290 | const MachineRegisterInfo *MRI) const { | |
1291 | int Opcode = MI0->getOpcode(); | |
1292 | if (Opcode == ARM::t2LDRpci || | |
1293 | Opcode == ARM::t2LDRpci_pic || | |
1294 | Opcode == ARM::tLDRpci || | |
1295 | Opcode == ARM::tLDRpci_pic || | |
1296 | Opcode == ARM::MOV_ga_dyn || | |
1297 | Opcode == ARM::MOV_ga_pcrel || | |
1298 | Opcode == ARM::MOV_ga_pcrel_ldr || | |
1299 | Opcode == ARM::t2MOV_ga_dyn || | |
1300 | Opcode == ARM::t2MOV_ga_pcrel) { | |
1301 | if (MI1->getOpcode() != Opcode) | |
1302 | return false; | |
1303 | if (MI0->getNumOperands() != MI1->getNumOperands()) | |
1304 | return false; | |
1305 | ||
1306 | const MachineOperand &MO0 = MI0->getOperand(1); | |
1307 | const MachineOperand &MO1 = MI1->getOperand(1); | |
1308 | if (MO0.getOffset() != MO1.getOffset()) | |
1309 | return false; | |
1310 | ||
1311 | if (Opcode == ARM::MOV_ga_dyn || | |
1312 | Opcode == ARM::MOV_ga_pcrel || | |
1313 | Opcode == ARM::MOV_ga_pcrel_ldr || | |
1314 | Opcode == ARM::t2MOV_ga_dyn || | |
1315 | Opcode == ARM::t2MOV_ga_pcrel) | |
1316 | // Ignore the PC labels. | |
1317 | return MO0.getGlobal() == MO1.getGlobal(); | |
1318 | ||
1319 | const MachineFunction *MF = MI0->getParent()->getParent(); | |
1320 | const MachineConstantPool *MCP = MF->getConstantPool(); | |
1321 | int CPI0 = MO0.getIndex(); | |
1322 | int CPI1 = MO1.getIndex(); | |
1323 | const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; | |
1324 | const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; | |
1325 | bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); | |
1326 | bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); | |
1327 | if (isARMCP0 && isARMCP1) { | |
1328 | ARMConstantPoolValue *ACPV0 = | |
1329 | static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); | |
1330 | ARMConstantPoolValue *ACPV1 = | |
1331 | static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); | |
1332 | return ACPV0->hasSameValue(ACPV1); | |
1333 | } else if (!isARMCP0 && !isARMCP1) { | |
1334 | return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; | |
1335 | } | |
1336 | return false; | |
1337 | } else if (Opcode == ARM::PICLDR) { | |
1338 | if (MI1->getOpcode() != Opcode) | |
1339 | return false; | |
1340 | if (MI0->getNumOperands() != MI1->getNumOperands()) | |
1341 | return false; | |
1342 | ||
1343 | unsigned Addr0 = MI0->getOperand(1).getReg(); | |
1344 | unsigned Addr1 = MI1->getOperand(1).getReg(); | |
1345 | if (Addr0 != Addr1) { | |
1346 | if (!MRI || | |
1347 | !TargetRegisterInfo::isVirtualRegister(Addr0) || | |
1348 | !TargetRegisterInfo::isVirtualRegister(Addr1)) | |
1349 | return false; | |
1350 | ||
1351 | // This assumes SSA form. | |
1352 | MachineInstr *Def0 = MRI->getVRegDef(Addr0); | |
1353 | MachineInstr *Def1 = MRI->getVRegDef(Addr1); | |
1354 | // Check if the loaded value, e.g. a constantpool of a global address, are | |
1355 | // the same. | |
1356 | if (!produceSameValue(Def0, Def1, MRI)) | |
1357 | return false; | |
1358 | } | |
1359 | ||
1360 | for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { | |
1361 | // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg | |
1362 | const MachineOperand &MO0 = MI0->getOperand(i); | |
1363 | const MachineOperand &MO1 = MI1->getOperand(i); | |
1364 | if (!MO0.isIdenticalTo(MO1)) | |
1365 | return false; | |
1366 | } | |
1367 | return true; | |
1368 | } | |
1369 | ||
1370 | return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); | |
1371 | } | |
1372 | ||
1373 | /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to | |
1374 | /// determine if two loads are loading from the same base address. It should | |
1375 | /// only return true if the base pointers are the same and the only differences | |
1376 | /// between the two addresses is the offset. It also returns the offsets by | |
1377 | /// reference. | |
970d7e83 LB |
1378 | /// |
1379 | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched | |
1380 | /// is permanently disabled. | |
223e47cc LB |
1381 | bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, |
1382 | int64_t &Offset1, | |
1383 | int64_t &Offset2) const { | |
1384 | // Don't worry about Thumb: just ARM and Thumb2. | |
1385 | if (Subtarget.isThumb1Only()) return false; | |
1386 | ||
1387 | if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) | |
1388 | return false; | |
1389 | ||
1390 | switch (Load1->getMachineOpcode()) { | |
1391 | default: | |
1392 | return false; | |
1393 | case ARM::LDRi12: | |
1394 | case ARM::LDRBi12: | |
1395 | case ARM::LDRD: | |
1396 | case ARM::LDRH: | |
1397 | case ARM::LDRSB: | |
1398 | case ARM::LDRSH: | |
1399 | case ARM::VLDRD: | |
1400 | case ARM::VLDRS: | |
1401 | case ARM::t2LDRi8: | |
1402 | case ARM::t2LDRDi8: | |
1403 | case ARM::t2LDRSHi8: | |
1404 | case ARM::t2LDRi12: | |
1405 | case ARM::t2LDRSHi12: | |
1406 | break; | |
1407 | } | |
1408 | ||
1409 | switch (Load2->getMachineOpcode()) { | |
1410 | default: | |
1411 | return false; | |
1412 | case ARM::LDRi12: | |
1413 | case ARM::LDRBi12: | |
1414 | case ARM::LDRD: | |
1415 | case ARM::LDRH: | |
1416 | case ARM::LDRSB: | |
1417 | case ARM::LDRSH: | |
1418 | case ARM::VLDRD: | |
1419 | case ARM::VLDRS: | |
1420 | case ARM::t2LDRi8: | |
1421 | case ARM::t2LDRSHi8: | |
1422 | case ARM::t2LDRi12: | |
1423 | case ARM::t2LDRSHi12: | |
1424 | break; | |
1425 | } | |
1426 | ||
1427 | // Check if base addresses and chain operands match. | |
1428 | if (Load1->getOperand(0) != Load2->getOperand(0) || | |
1429 | Load1->getOperand(4) != Load2->getOperand(4)) | |
1430 | return false; | |
1431 | ||
1432 | // Index should be Reg0. | |
1433 | if (Load1->getOperand(3) != Load2->getOperand(3)) | |
1434 | return false; | |
1435 | ||
1436 | // Determine the offsets. | |
1437 | if (isa<ConstantSDNode>(Load1->getOperand(1)) && | |
1438 | isa<ConstantSDNode>(Load2->getOperand(1))) { | |
1439 | Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); | |
1440 | Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); | |
1441 | return true; | |
1442 | } | |
1443 | ||
1444 | return false; | |
1445 | } | |
1446 | ||
1447 | /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to | |
1448 | /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should | |
1449 | /// be scheduled togther. On some targets if two loads are loading from | |
1450 | /// addresses in the same cache line, it's better if they are scheduled | |
1451 | /// together. This function takes two integers that represent the load offsets | |
1452 | /// from the common base address. It returns true if it decides it's desirable | |
1453 | /// to schedule the two loads together. "NumLoads" is the number of loads that | |
1454 | /// have already been scheduled after Load1. | |
970d7e83 LB |
1455 | /// |
1456 | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched | |
1457 | /// is permanently disabled. | |
223e47cc LB |
1458 | bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, |
1459 | int64_t Offset1, int64_t Offset2, | |
1460 | unsigned NumLoads) const { | |
1461 | // Don't worry about Thumb: just ARM and Thumb2. | |
1462 | if (Subtarget.isThumb1Only()) return false; | |
1463 | ||
1464 | assert(Offset2 > Offset1); | |
1465 | ||
1466 | if ((Offset2 - Offset1) / 8 > 64) | |
1467 | return false; | |
1468 | ||
1469 | if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) | |
1470 | return false; // FIXME: overly conservative? | |
1471 | ||
1472 | // Four loads in a row should be sufficient. | |
1473 | if (NumLoads >= 3) | |
1474 | return false; | |
1475 | ||
1476 | return true; | |
1477 | } | |
1478 | ||
1479 | bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, | |
1480 | const MachineBasicBlock *MBB, | |
1481 | const MachineFunction &MF) const { | |
1482 | // Debug info is never a scheduling boundary. It's necessary to be explicit | |
1483 | // due to the special treatment of IT instructions below, otherwise a | |
1484 | // dbg_value followed by an IT will result in the IT instruction being | |
1485 | // considered a scheduling hazard, which is wrong. It should be the actual | |
1486 | // instruction preceding the dbg_value instruction(s), just like it is | |
1487 | // when debug info is not present. | |
1488 | if (MI->isDebugValue()) | |
1489 | return false; | |
1490 | ||
1491 | // Terminators and labels can't be scheduled around. | |
1492 | if (MI->isTerminator() || MI->isLabel()) | |
1493 | return true; | |
1494 | ||
1495 | // Treat the start of the IT block as a scheduling boundary, but schedule | |
1496 | // t2IT along with all instructions following it. | |
1497 | // FIXME: This is a big hammer. But the alternative is to add all potential | |
1498 | // true and anti dependencies to IT block instructions as implicit operands | |
1499 | // to the t2IT instruction. The added compile time and complexity does not | |
1500 | // seem worth it. | |
1501 | MachineBasicBlock::const_iterator I = MI; | |
1502 | // Make sure to skip any dbg_value instructions | |
1503 | while (++I != MBB->end() && I->isDebugValue()) | |
1504 | ; | |
1505 | if (I != MBB->end() && I->getOpcode() == ARM::t2IT) | |
1506 | return true; | |
1507 | ||
1508 | // Don't attempt to schedule around any instruction that defines | |
1509 | // a stack-oriented pointer, as it's unlikely to be profitable. This | |
1510 | // saves compile time, because it doesn't require every single | |
1511 | // stack slot reference to depend on the instruction that does the | |
1512 | // modification. | |
1513 | // Calls don't actually change the stack pointer, even if they have imp-defs. | |
1514 | // No ARM calling conventions change the stack pointer. (X86 calling | |
1515 | // conventions sometimes do). | |
1516 | if (!MI->isCall() && MI->definesRegister(ARM::SP)) | |
1517 | return true; | |
1518 | ||
1519 | return false; | |
1520 | } | |
1521 | ||
1522 | bool ARMBaseInstrInfo:: | |
1523 | isProfitableToIfCvt(MachineBasicBlock &MBB, | |
1524 | unsigned NumCycles, unsigned ExtraPredCycles, | |
1525 | const BranchProbability &Probability) const { | |
1526 | if (!NumCycles) | |
1527 | return false; | |
1528 | ||
1529 | // Attempt to estimate the relative costs of predication versus branching. | |
1530 | unsigned UnpredCost = Probability.getNumerator() * NumCycles; | |
1531 | UnpredCost /= Probability.getDenominator(); | |
1532 | UnpredCost += 1; // The branch itself | |
1533 | UnpredCost += Subtarget.getMispredictionPenalty() / 10; | |
1534 | ||
1535 | return (NumCycles + ExtraPredCycles) <= UnpredCost; | |
1536 | } | |
1537 | ||
1538 | bool ARMBaseInstrInfo:: | |
1539 | isProfitableToIfCvt(MachineBasicBlock &TMBB, | |
1540 | unsigned TCycles, unsigned TExtra, | |
1541 | MachineBasicBlock &FMBB, | |
1542 | unsigned FCycles, unsigned FExtra, | |
1543 | const BranchProbability &Probability) const { | |
1544 | if (!TCycles || !FCycles) | |
1545 | return false; | |
1546 | ||
1547 | // Attempt to estimate the relative costs of predication versus branching. | |
1548 | unsigned TUnpredCost = Probability.getNumerator() * TCycles; | |
1549 | TUnpredCost /= Probability.getDenominator(); | |
1550 | ||
1551 | uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); | |
1552 | unsigned FUnpredCost = Comp * FCycles; | |
1553 | FUnpredCost /= Probability.getDenominator(); | |
1554 | ||
1555 | unsigned UnpredCost = TUnpredCost + FUnpredCost; | |
1556 | UnpredCost += 1; // The branch itself | |
1557 | UnpredCost += Subtarget.getMispredictionPenalty() / 10; | |
1558 | ||
1559 | return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; | |
1560 | } | |
1561 | ||
1562 | bool | |
1563 | ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, | |
1564 | MachineBasicBlock &FMBB) const { | |
1565 | // Reduce false anti-dependencies to let Swift's out-of-order execution | |
1566 | // engine do its thing. | |
1567 | return Subtarget.isSwift(); | |
1568 | } | |
1569 | ||
1570 | /// getInstrPredicate - If instruction is predicated, returns its predicate | |
1571 | /// condition, otherwise returns AL. It also returns the condition code | |
1572 | /// register by reference. | |
1573 | ARMCC::CondCodes | |
1574 | llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { | |
1575 | int PIdx = MI->findFirstPredOperandIdx(); | |
1576 | if (PIdx == -1) { | |
1577 | PredReg = 0; | |
1578 | return ARMCC::AL; | |
1579 | } | |
1580 | ||
1581 | PredReg = MI->getOperand(PIdx+1).getReg(); | |
1582 | return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); | |
1583 | } | |
1584 | ||
1585 | ||
1586 | int llvm::getMatchingCondBranchOpcode(int Opc) { | |
1587 | if (Opc == ARM::B) | |
1588 | return ARM::Bcc; | |
1589 | if (Opc == ARM::tB) | |
1590 | return ARM::tBcc; | |
1591 | if (Opc == ARM::t2B) | |
1592 | return ARM::t2Bcc; | |
1593 | ||
1594 | llvm_unreachable("Unknown unconditional branch opcode!"); | |
1595 | } | |
1596 | ||
1597 | /// commuteInstruction - Handle commutable instructions. | |
1598 | MachineInstr * | |
1599 | ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { | |
1600 | switch (MI->getOpcode()) { | |
1601 | case ARM::MOVCCr: | |
1602 | case ARM::t2MOVCCr: { | |
1603 | // MOVCC can be commuted by inverting the condition. | |
1604 | unsigned PredReg = 0; | |
1605 | ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); | |
1606 | // MOVCC AL can't be inverted. Shouldn't happen. | |
1607 | if (CC == ARMCC::AL || PredReg != ARM::CPSR) | |
1608 | return NULL; | |
970d7e83 | 1609 | MI = TargetInstrInfo::commuteInstruction(MI, NewMI); |
223e47cc LB |
1610 | if (!MI) |
1611 | return NULL; | |
1612 | // After swapping the MOVCC operands, also invert the condition. | |
1613 | MI->getOperand(MI->findFirstPredOperandIdx()) | |
1614 | .setImm(ARMCC::getOppositeCondition(CC)); | |
1615 | return MI; | |
1616 | } | |
1617 | } | |
970d7e83 | 1618 | return TargetInstrInfo::commuteInstruction(MI, NewMI); |
223e47cc LB |
1619 | } |
1620 | ||
1621 | /// Identify instructions that can be folded into a MOVCC instruction, and | |
1622 | /// return the defining instruction. | |
1623 | static MachineInstr *canFoldIntoMOVCC(unsigned Reg, | |
1624 | const MachineRegisterInfo &MRI, | |
1625 | const TargetInstrInfo *TII) { | |
1626 | if (!TargetRegisterInfo::isVirtualRegister(Reg)) | |
1627 | return 0; | |
1628 | if (!MRI.hasOneNonDBGUse(Reg)) | |
1629 | return 0; | |
1630 | MachineInstr *MI = MRI.getVRegDef(Reg); | |
1631 | if (!MI) | |
1632 | return 0; | |
1633 | // MI is folded into the MOVCC by predicating it. | |
1634 | if (!MI->isPredicable()) | |
1635 | return 0; | |
1636 | // Check if MI has any non-dead defs or physreg uses. This also detects | |
1637 | // predicated instructions which will be reading CPSR. | |
1638 | for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { | |
1639 | const MachineOperand &MO = MI->getOperand(i); | |
1640 | // Reject frame index operands, PEI can't handle the predicated pseudos. | |
1641 | if (MO.isFI() || MO.isCPI() || MO.isJTI()) | |
1642 | return 0; | |
1643 | if (!MO.isReg()) | |
1644 | continue; | |
1645 | // MI can't have any tied operands, that would conflict with predication. | |
1646 | if (MO.isTied()) | |
1647 | return 0; | |
1648 | if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) | |
1649 | return 0; | |
1650 | if (MO.isDef() && !MO.isDead()) | |
1651 | return 0; | |
1652 | } | |
1653 | bool DontMoveAcrossStores = true; | |
1654 | if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) | |
1655 | return 0; | |
1656 | return MI; | |
1657 | } | |
1658 | ||
1659 | bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, | |
1660 | SmallVectorImpl<MachineOperand> &Cond, | |
1661 | unsigned &TrueOp, unsigned &FalseOp, | |
1662 | bool &Optimizable) const { | |
1663 | assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && | |
1664 | "Unknown select instruction"); | |
1665 | // MOVCC operands: | |
1666 | // 0: Def. | |
1667 | // 1: True use. | |
1668 | // 2: False use. | |
1669 | // 3: Condition code. | |
1670 | // 4: CPSR use. | |
1671 | TrueOp = 1; | |
1672 | FalseOp = 2; | |
1673 | Cond.push_back(MI->getOperand(3)); | |
1674 | Cond.push_back(MI->getOperand(4)); | |
1675 | // We can always fold a def. | |
1676 | Optimizable = true; | |
1677 | return false; | |
1678 | } | |
1679 | ||
1680 | MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, | |
1681 | bool PreferFalse) const { | |
1682 | assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && | |
1683 | "Unknown select instruction"); | |
1684 | const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); | |
1685 | MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); | |
1686 | bool Invert = !DefMI; | |
1687 | if (!DefMI) | |
1688 | DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); | |
1689 | if (!DefMI) | |
1690 | return 0; | |
1691 | ||
1692 | // Create a new predicated version of DefMI. | |
1693 | // Rfalse is the first use. | |
1694 | MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), | |
1695 | DefMI->getDesc(), | |
1696 | MI->getOperand(0).getReg()); | |
1697 | ||
1698 | // Copy all the DefMI operands, excluding its (null) predicate. | |
1699 | const MCInstrDesc &DefDesc = DefMI->getDesc(); | |
1700 | for (unsigned i = 1, e = DefDesc.getNumOperands(); | |
1701 | i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) | |
1702 | NewMI.addOperand(DefMI->getOperand(i)); | |
1703 | ||
1704 | unsigned CondCode = MI->getOperand(3).getImm(); | |
1705 | if (Invert) | |
1706 | NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); | |
1707 | else | |
1708 | NewMI.addImm(CondCode); | |
1709 | NewMI.addOperand(MI->getOperand(4)); | |
1710 | ||
1711 | // DefMI is not the -S version that sets CPSR, so add an optional %noreg. | |
1712 | if (NewMI->hasOptionalDef()) | |
1713 | AddDefaultCC(NewMI); | |
1714 | ||
1715 | // The output register value when the predicate is false is an implicit | |
1716 | // register operand tied to the first def. | |
1717 | // The tie makes the register allocator ensure the FalseReg is allocated the | |
1718 | // same register as operand 0. | |
1719 | MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); | |
1720 | FalseReg.setImplicit(); | |
970d7e83 | 1721 | NewMI.addOperand(FalseReg); |
223e47cc LB |
1722 | NewMI->tieOperands(0, NewMI->getNumOperands() - 1); |
1723 | ||
1724 | // The caller will erase MI, but not DefMI. | |
1725 | DefMI->eraseFromParent(); | |
1726 | return NewMI; | |
1727 | } | |
1728 | ||
1729 | /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the | |
1730 | /// instruction is encoded with an 'S' bit is determined by the optional CPSR | |
1731 | /// def operand. | |
1732 | /// | |
1733 | /// This will go away once we can teach tblgen how to set the optional CPSR def | |
1734 | /// operand itself. | |
1735 | struct AddSubFlagsOpcodePair { | |
1736 | uint16_t PseudoOpc; | |
1737 | uint16_t MachineOpc; | |
1738 | }; | |
1739 | ||
1740 | static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { | |
1741 | {ARM::ADDSri, ARM::ADDri}, | |
1742 | {ARM::ADDSrr, ARM::ADDrr}, | |
1743 | {ARM::ADDSrsi, ARM::ADDrsi}, | |
1744 | {ARM::ADDSrsr, ARM::ADDrsr}, | |
1745 | ||
1746 | {ARM::SUBSri, ARM::SUBri}, | |
1747 | {ARM::SUBSrr, ARM::SUBrr}, | |
1748 | {ARM::SUBSrsi, ARM::SUBrsi}, | |
1749 | {ARM::SUBSrsr, ARM::SUBrsr}, | |
1750 | ||
1751 | {ARM::RSBSri, ARM::RSBri}, | |
1752 | {ARM::RSBSrsi, ARM::RSBrsi}, | |
1753 | {ARM::RSBSrsr, ARM::RSBrsr}, | |
1754 | ||
1755 | {ARM::t2ADDSri, ARM::t2ADDri}, | |
1756 | {ARM::t2ADDSrr, ARM::t2ADDrr}, | |
1757 | {ARM::t2ADDSrs, ARM::t2ADDrs}, | |
1758 | ||
1759 | {ARM::t2SUBSri, ARM::t2SUBri}, | |
1760 | {ARM::t2SUBSrr, ARM::t2SUBrr}, | |
1761 | {ARM::t2SUBSrs, ARM::t2SUBrs}, | |
1762 | ||
1763 | {ARM::t2RSBSri, ARM::t2RSBri}, | |
1764 | {ARM::t2RSBSrs, ARM::t2RSBrs}, | |
1765 | }; | |
1766 | ||
1767 | unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { | |
1768 | for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i) | |
1769 | if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc) | |
1770 | return AddSubFlagsOpcodeMap[i].MachineOpc; | |
1771 | return 0; | |
1772 | } | |
1773 | ||
1774 | void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, | |
1775 | MachineBasicBlock::iterator &MBBI, DebugLoc dl, | |
1776 | unsigned DestReg, unsigned BaseReg, int NumBytes, | |
1777 | ARMCC::CondCodes Pred, unsigned PredReg, | |
1778 | const ARMBaseInstrInfo &TII, unsigned MIFlags) { | |
1779 | bool isSub = NumBytes < 0; | |
1780 | if (isSub) NumBytes = -NumBytes; | |
1781 | ||
1782 | while (NumBytes) { | |
1783 | unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); | |
1784 | unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); | |
1785 | assert(ThisVal && "Didn't extract field correctly"); | |
1786 | ||
1787 | // We will handle these bits from offset, clear them. | |
1788 | NumBytes &= ~ThisVal; | |
1789 | ||
1790 | assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); | |
1791 | ||
1792 | // Build the new ADD / SUB. | |
1793 | unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; | |
1794 | BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) | |
1795 | .addReg(BaseReg, RegState::Kill).addImm(ThisVal) | |
1796 | .addImm((unsigned)Pred).addReg(PredReg).addReg(0) | |
1797 | .setMIFlags(MIFlags); | |
1798 | BaseReg = DestReg; | |
1799 | } | |
1800 | } | |
1801 | ||
1802 | bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, | |
1803 | unsigned FrameReg, int &Offset, | |
1804 | const ARMBaseInstrInfo &TII) { | |
1805 | unsigned Opcode = MI.getOpcode(); | |
1806 | const MCInstrDesc &Desc = MI.getDesc(); | |
1807 | unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); | |
1808 | bool isSub = false; | |
1809 | ||
1810 | // Memory operands in inline assembly always use AddrMode2. | |
1811 | if (Opcode == ARM::INLINEASM) | |
1812 | AddrMode = ARMII::AddrMode2; | |
1813 | ||
1814 | if (Opcode == ARM::ADDri) { | |
1815 | Offset += MI.getOperand(FrameRegIdx+1).getImm(); | |
1816 | if (Offset == 0) { | |
1817 | // Turn it into a move. | |
1818 | MI.setDesc(TII.get(ARM::MOVr)); | |
1819 | MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); | |
1820 | MI.RemoveOperand(FrameRegIdx+1); | |
1821 | Offset = 0; | |
1822 | return true; | |
1823 | } else if (Offset < 0) { | |
1824 | Offset = -Offset; | |
1825 | isSub = true; | |
1826 | MI.setDesc(TII.get(ARM::SUBri)); | |
1827 | } | |
1828 | ||
1829 | // Common case: small offset, fits into instruction. | |
1830 | if (ARM_AM::getSOImmVal(Offset) != -1) { | |
1831 | // Replace the FrameIndex with sp / fp | |
1832 | MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); | |
1833 | MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); | |
1834 | Offset = 0; | |
1835 | return true; | |
1836 | } | |
1837 | ||
1838 | // Otherwise, pull as much of the immedidate into this ADDri/SUBri | |
1839 | // as possible. | |
1840 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); | |
1841 | unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); | |
1842 | ||
1843 | // We will handle these bits from offset, clear them. | |
1844 | Offset &= ~ThisImmVal; | |
1845 | ||
1846 | // Get the properly encoded SOImmVal field. | |
1847 | assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && | |
1848 | "Bit extraction didn't work?"); | |
1849 | MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); | |
1850 | } else { | |
1851 | unsigned ImmIdx = 0; | |
1852 | int InstrOffs = 0; | |
1853 | unsigned NumBits = 0; | |
1854 | unsigned Scale = 1; | |
1855 | switch (AddrMode) { | |
1856 | case ARMII::AddrMode_i12: { | |
1857 | ImmIdx = FrameRegIdx + 1; | |
1858 | InstrOffs = MI.getOperand(ImmIdx).getImm(); | |
1859 | NumBits = 12; | |
1860 | break; | |
1861 | } | |
1862 | case ARMII::AddrMode2: { | |
1863 | ImmIdx = FrameRegIdx+2; | |
1864 | InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); | |
1865 | if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) | |
1866 | InstrOffs *= -1; | |
1867 | NumBits = 12; | |
1868 | break; | |
1869 | } | |
1870 | case ARMII::AddrMode3: { | |
1871 | ImmIdx = FrameRegIdx+2; | |
1872 | InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); | |
1873 | if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) | |
1874 | InstrOffs *= -1; | |
1875 | NumBits = 8; | |
1876 | break; | |
1877 | } | |
1878 | case ARMII::AddrMode4: | |
1879 | case ARMII::AddrMode6: | |
1880 | // Can't fold any offset even if it's zero. | |
1881 | return false; | |
1882 | case ARMII::AddrMode5: { | |
1883 | ImmIdx = FrameRegIdx+1; | |
1884 | InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); | |
1885 | if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) | |
1886 | InstrOffs *= -1; | |
1887 | NumBits = 8; | |
1888 | Scale = 4; | |
1889 | break; | |
1890 | } | |
1891 | default: | |
1892 | llvm_unreachable("Unsupported addressing mode!"); | |
1893 | } | |
1894 | ||
1895 | Offset += InstrOffs * Scale; | |
1896 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); | |
1897 | if (Offset < 0) { | |
1898 | Offset = -Offset; | |
1899 | isSub = true; | |
1900 | } | |
1901 | ||
1902 | // Attempt to fold address comp. if opcode has offset bits | |
1903 | if (NumBits > 0) { | |
1904 | // Common case: small offset, fits into instruction. | |
1905 | MachineOperand &ImmOp = MI.getOperand(ImmIdx); | |
1906 | int ImmedOffset = Offset / Scale; | |
1907 | unsigned Mask = (1 << NumBits) - 1; | |
1908 | if ((unsigned)Offset <= Mask * Scale) { | |
1909 | // Replace the FrameIndex with sp | |
1910 | MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); | |
1911 | // FIXME: When addrmode2 goes away, this will simplify (like the | |
1912 | // T2 version), as the LDR.i12 versions don't need the encoding | |
1913 | // tricks for the offset value. | |
1914 | if (isSub) { | |
1915 | if (AddrMode == ARMII::AddrMode_i12) | |
1916 | ImmedOffset = -ImmedOffset; | |
1917 | else | |
1918 | ImmedOffset |= 1 << NumBits; | |
1919 | } | |
1920 | ImmOp.ChangeToImmediate(ImmedOffset); | |
1921 | Offset = 0; | |
1922 | return true; | |
1923 | } | |
1924 | ||
1925 | // Otherwise, it didn't fit. Pull in what we can to simplify the immed. | |
1926 | ImmedOffset = ImmedOffset & Mask; | |
1927 | if (isSub) { | |
1928 | if (AddrMode == ARMII::AddrMode_i12) | |
1929 | ImmedOffset = -ImmedOffset; | |
1930 | else | |
1931 | ImmedOffset |= 1 << NumBits; | |
1932 | } | |
1933 | ImmOp.ChangeToImmediate(ImmedOffset); | |
1934 | Offset &= ~(Mask*Scale); | |
1935 | } | |
1936 | } | |
1937 | ||
1938 | Offset = (isSub) ? -Offset : Offset; | |
1939 | return Offset == 0; | |
1940 | } | |
1941 | ||
1942 | /// analyzeCompare - For a comparison instruction, return the source registers | |
1943 | /// in SrcReg and SrcReg2 if having two register operands, and the value it | |
1944 | /// compares against in CmpValue. Return true if the comparison instruction | |
1945 | /// can be analyzed. | |
1946 | bool ARMBaseInstrInfo:: | |
1947 | analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, | |
1948 | int &CmpMask, int &CmpValue) const { | |
1949 | switch (MI->getOpcode()) { | |
1950 | default: break; | |
1951 | case ARM::CMPri: | |
1952 | case ARM::t2CMPri: | |
1953 | SrcReg = MI->getOperand(0).getReg(); | |
1954 | SrcReg2 = 0; | |
1955 | CmpMask = ~0; | |
1956 | CmpValue = MI->getOperand(1).getImm(); | |
1957 | return true; | |
1958 | case ARM::CMPrr: | |
1959 | case ARM::t2CMPrr: | |
1960 | SrcReg = MI->getOperand(0).getReg(); | |
1961 | SrcReg2 = MI->getOperand(1).getReg(); | |
1962 | CmpMask = ~0; | |
1963 | CmpValue = 0; | |
1964 | return true; | |
1965 | case ARM::TSTri: | |
1966 | case ARM::t2TSTri: | |
1967 | SrcReg = MI->getOperand(0).getReg(); | |
1968 | SrcReg2 = 0; | |
1969 | CmpMask = MI->getOperand(1).getImm(); | |
1970 | CmpValue = 0; | |
1971 | return true; | |
1972 | } | |
1973 | ||
1974 | return false; | |
1975 | } | |
1976 | ||
1977 | /// isSuitableForMask - Identify a suitable 'and' instruction that | |
1978 | /// operates on the given source register and applies the same mask | |
1979 | /// as a 'tst' instruction. Provide a limited look-through for copies. | |
1980 | /// When successful, MI will hold the found instruction. | |
1981 | static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, | |
1982 | int CmpMask, bool CommonUse) { | |
1983 | switch (MI->getOpcode()) { | |
1984 | case ARM::ANDri: | |
1985 | case ARM::t2ANDri: | |
1986 | if (CmpMask != MI->getOperand(2).getImm()) | |
1987 | return false; | |
1988 | if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) | |
1989 | return true; | |
1990 | break; | |
1991 | case ARM::COPY: { | |
1992 | // Walk down one instruction which is potentially an 'and'. | |
1993 | const MachineInstr &Copy = *MI; | |
1994 | MachineBasicBlock::iterator AND( | |
1995 | llvm::next(MachineBasicBlock::iterator(MI))); | |
1996 | if (AND == MI->getParent()->end()) return false; | |
1997 | MI = AND; | |
1998 | return isSuitableForMask(MI, Copy.getOperand(0).getReg(), | |
1999 | CmpMask, true); | |
2000 | } | |
2001 | } | |
2002 | ||
2003 | return false; | |
2004 | } | |
2005 | ||
2006 | /// getSwappedCondition - assume the flags are set by MI(a,b), return | |
2007 | /// the condition code if we modify the instructions such that flags are | |
2008 | /// set by MI(b,a). | |
2009 | inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { | |
2010 | switch (CC) { | |
2011 | default: return ARMCC::AL; | |
2012 | case ARMCC::EQ: return ARMCC::EQ; | |
2013 | case ARMCC::NE: return ARMCC::NE; | |
2014 | case ARMCC::HS: return ARMCC::LS; | |
2015 | case ARMCC::LO: return ARMCC::HI; | |
2016 | case ARMCC::HI: return ARMCC::LO; | |
2017 | case ARMCC::LS: return ARMCC::HS; | |
2018 | case ARMCC::GE: return ARMCC::LE; | |
2019 | case ARMCC::LT: return ARMCC::GT; | |
2020 | case ARMCC::GT: return ARMCC::LT; | |
2021 | case ARMCC::LE: return ARMCC::GE; | |
2022 | } | |
2023 | } | |
2024 | ||
2025 | /// isRedundantFlagInstr - check whether the first instruction, whose only | |
2026 | /// purpose is to update flags, can be made redundant. | |
2027 | /// CMPrr can be made redundant by SUBrr if the operands are the same. | |
2028 | /// CMPri can be made redundant by SUBri if the operands are the same. | |
2029 | /// This function can be extended later on. | |
2030 | inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, | |
2031 | unsigned SrcReg2, int ImmValue, | |
2032 | MachineInstr *OI) { | |
2033 | if ((CmpI->getOpcode() == ARM::CMPrr || | |
2034 | CmpI->getOpcode() == ARM::t2CMPrr) && | |
2035 | (OI->getOpcode() == ARM::SUBrr || | |
2036 | OI->getOpcode() == ARM::t2SUBrr) && | |
2037 | ((OI->getOperand(1).getReg() == SrcReg && | |
2038 | OI->getOperand(2).getReg() == SrcReg2) || | |
2039 | (OI->getOperand(1).getReg() == SrcReg2 && | |
2040 | OI->getOperand(2).getReg() == SrcReg))) | |
2041 | return true; | |
2042 | ||
2043 | if ((CmpI->getOpcode() == ARM::CMPri || | |
2044 | CmpI->getOpcode() == ARM::t2CMPri) && | |
2045 | (OI->getOpcode() == ARM::SUBri || | |
2046 | OI->getOpcode() == ARM::t2SUBri) && | |
2047 | OI->getOperand(1).getReg() == SrcReg && | |
2048 | OI->getOperand(2).getImm() == ImmValue) | |
2049 | return true; | |
2050 | return false; | |
2051 | } | |
2052 | ||
2053 | /// optimizeCompareInstr - Convert the instruction supplying the argument to the | |
2054 | /// comparison into one that sets the zero bit in the flags register; | |
2055 | /// Remove a redundant Compare instruction if an earlier instruction can set the | |
2056 | /// flags in the same way as Compare. | |
2057 | /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two | |
2058 | /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the | |
2059 | /// condition code of instructions which use the flags. | |
2060 | bool ARMBaseInstrInfo:: | |
2061 | optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, | |
2062 | int CmpMask, int CmpValue, | |
2063 | const MachineRegisterInfo *MRI) const { | |
2064 | // Get the unique definition of SrcReg. | |
2065 | MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); | |
2066 | if (!MI) return false; | |
2067 | ||
2068 | // Masked compares sometimes use the same register as the corresponding 'and'. | |
2069 | if (CmpMask != ~0) { | |
2070 | if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { | |
2071 | MI = 0; | |
2072 | for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), | |
2073 | UE = MRI->use_end(); UI != UE; ++UI) { | |
2074 | if (UI->getParent() != CmpInstr->getParent()) continue; | |
2075 | MachineInstr *PotentialAND = &*UI; | |
2076 | if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || | |
2077 | isPredicated(PotentialAND)) | |
2078 | continue; | |
2079 | MI = PotentialAND; | |
2080 | break; | |
2081 | } | |
2082 | if (!MI) return false; | |
2083 | } | |
2084 | } | |
2085 | ||
2086 | // Get ready to iterate backward from CmpInstr. | |
2087 | MachineBasicBlock::iterator I = CmpInstr, E = MI, | |
2088 | B = CmpInstr->getParent()->begin(); | |
2089 | ||
2090 | // Early exit if CmpInstr is at the beginning of the BB. | |
2091 | if (I == B) return false; | |
2092 | ||
2093 | // There are two possible candidates which can be changed to set CPSR: | |
2094 | // One is MI, the other is a SUB instruction. | |
2095 | // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). | |
2096 | // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). | |
2097 | MachineInstr *Sub = NULL; | |
2098 | if (SrcReg2 != 0) | |
2099 | // MI is not a candidate for CMPrr. | |
2100 | MI = NULL; | |
2101 | else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { | |
2102 | // Conservatively refuse to convert an instruction which isn't in the same | |
2103 | // BB as the comparison. | |
2104 | // For CMPri, we need to check Sub, thus we can't return here. | |
2105 | if (CmpInstr->getOpcode() == ARM::CMPri || | |
2106 | CmpInstr->getOpcode() == ARM::t2CMPri) | |
2107 | MI = NULL; | |
2108 | else | |
2109 | return false; | |
2110 | } | |
2111 | ||
2112 | // Check that CPSR isn't set between the comparison instruction and the one we | |
2113 | // want to change. At the same time, search for Sub. | |
2114 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |
2115 | --I; | |
2116 | for (; I != E; --I) { | |
2117 | const MachineInstr &Instr = *I; | |
2118 | ||
2119 | if (Instr.modifiesRegister(ARM::CPSR, TRI) || | |
2120 | Instr.readsRegister(ARM::CPSR, TRI)) | |
2121 | // This instruction modifies or uses CPSR after the one we want to | |
2122 | // change. We can't do this transformation. | |
2123 | return false; | |
2124 | ||
2125 | // Check whether CmpInstr can be made redundant by the current instruction. | |
2126 | if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { | |
2127 | Sub = &*I; | |
2128 | break; | |
2129 | } | |
2130 | ||
2131 | if (I == B) | |
2132 | // The 'and' is below the comparison instruction. | |
2133 | return false; | |
2134 | } | |
2135 | ||
2136 | // Return false if no candidates exist. | |
2137 | if (!MI && !Sub) | |
2138 | return false; | |
2139 | ||
2140 | // The single candidate is called MI. | |
2141 | if (!MI) MI = Sub; | |
2142 | ||
2143 | // We can't use a predicated instruction - it doesn't always write the flags. | |
2144 | if (isPredicated(MI)) | |
2145 | return false; | |
2146 | ||
2147 | switch (MI->getOpcode()) { | |
2148 | default: break; | |
2149 | case ARM::RSBrr: | |
2150 | case ARM::RSBri: | |
2151 | case ARM::RSCrr: | |
2152 | case ARM::RSCri: | |
2153 | case ARM::ADDrr: | |
2154 | case ARM::ADDri: | |
2155 | case ARM::ADCrr: | |
2156 | case ARM::ADCri: | |
2157 | case ARM::SUBrr: | |
2158 | case ARM::SUBri: | |
2159 | case ARM::SBCrr: | |
2160 | case ARM::SBCri: | |
2161 | case ARM::t2RSBri: | |
2162 | case ARM::t2ADDrr: | |
2163 | case ARM::t2ADDri: | |
2164 | case ARM::t2ADCrr: | |
2165 | case ARM::t2ADCri: | |
2166 | case ARM::t2SUBrr: | |
2167 | case ARM::t2SUBri: | |
2168 | case ARM::t2SBCrr: | |
2169 | case ARM::t2SBCri: | |
2170 | case ARM::ANDrr: | |
2171 | case ARM::ANDri: | |
2172 | case ARM::t2ANDrr: | |
2173 | case ARM::t2ANDri: | |
2174 | case ARM::ORRrr: | |
2175 | case ARM::ORRri: | |
2176 | case ARM::t2ORRrr: | |
2177 | case ARM::t2ORRri: | |
2178 | case ARM::EORrr: | |
2179 | case ARM::EORri: | |
2180 | case ARM::t2EORrr: | |
2181 | case ARM::t2EORri: { | |
2182 | // Scan forward for the use of CPSR | |
2183 | // When checking against MI: if it's a conditional code requires | |
2184 | // checking of V bit, then this is not safe to do. | |
2185 | // It is safe to remove CmpInstr if CPSR is redefined or killed. | |
2186 | // If we are done with the basic block, we need to check whether CPSR is | |
2187 | // live-out. | |
2188 | SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> | |
2189 | OperandsToUpdate; | |
2190 | bool isSafe = false; | |
2191 | I = CmpInstr; | |
2192 | E = CmpInstr->getParent()->end(); | |
2193 | while (!isSafe && ++I != E) { | |
2194 | const MachineInstr &Instr = *I; | |
2195 | for (unsigned IO = 0, EO = Instr.getNumOperands(); | |
2196 | !isSafe && IO != EO; ++IO) { | |
2197 | const MachineOperand &MO = Instr.getOperand(IO); | |
2198 | if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { | |
2199 | isSafe = true; | |
2200 | break; | |
2201 | } | |
2202 | if (!MO.isReg() || MO.getReg() != ARM::CPSR) | |
2203 | continue; | |
2204 | if (MO.isDef()) { | |
2205 | isSafe = true; | |
2206 | break; | |
2207 | } | |
2208 | // Condition code is after the operand before CPSR. | |
2209 | ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); | |
2210 | if (Sub) { | |
2211 | ARMCC::CondCodes NewCC = getSwappedCondition(CC); | |
2212 | if (NewCC == ARMCC::AL) | |
2213 | return false; | |
2214 | // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based | |
2215 | // on CMP needs to be updated to be based on SUB. | |
2216 | // Push the condition code operands to OperandsToUpdate. | |
2217 | // If it is safe to remove CmpInstr, the condition code of these | |
2218 | // operands will be modified. | |
2219 | if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && | |
2220 | Sub->getOperand(2).getReg() == SrcReg) | |
2221 | OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), | |
2222 | NewCC)); | |
2223 | } | |
2224 | else | |
2225 | switch (CC) { | |
2226 | default: | |
2227 | // CPSR can be used multiple times, we should continue. | |
2228 | break; | |
2229 | case ARMCC::VS: | |
2230 | case ARMCC::VC: | |
2231 | case ARMCC::GE: | |
2232 | case ARMCC::LT: | |
2233 | case ARMCC::GT: | |
2234 | case ARMCC::LE: | |
2235 | return false; | |
2236 | } | |
2237 | } | |
2238 | } | |
2239 | ||
2240 | // If CPSR is not killed nor re-defined, we should check whether it is | |
2241 | // live-out. If it is live-out, do not optimize. | |
2242 | if (!isSafe) { | |
2243 | MachineBasicBlock *MBB = CmpInstr->getParent(); | |
2244 | for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), | |
2245 | SE = MBB->succ_end(); SI != SE; ++SI) | |
2246 | if ((*SI)->isLiveIn(ARM::CPSR)) | |
2247 | return false; | |
2248 | } | |
2249 | ||
2250 | // Toggle the optional operand to CPSR. | |
2251 | MI->getOperand(5).setReg(ARM::CPSR); | |
2252 | MI->getOperand(5).setIsDef(true); | |
2253 | assert(!isPredicated(MI) && "Can't use flags from predicated instruction"); | |
2254 | CmpInstr->eraseFromParent(); | |
2255 | ||
2256 | // Modify the condition code of operands in OperandsToUpdate. | |
2257 | // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to | |
2258 | // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. | |
2259 | for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) | |
2260 | OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); | |
2261 | return true; | |
2262 | } | |
2263 | } | |
2264 | ||
2265 | return false; | |
2266 | } | |
2267 | ||
2268 | bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, | |
2269 | MachineInstr *DefMI, unsigned Reg, | |
2270 | MachineRegisterInfo *MRI) const { | |
2271 | // Fold large immediates into add, sub, or, xor. | |
2272 | unsigned DefOpc = DefMI->getOpcode(); | |
2273 | if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) | |
2274 | return false; | |
2275 | if (!DefMI->getOperand(1).isImm()) | |
2276 | // Could be t2MOVi32imm <ga:xx> | |
2277 | return false; | |
2278 | ||
2279 | if (!MRI->hasOneNonDBGUse(Reg)) | |
2280 | return false; | |
2281 | ||
2282 | const MCInstrDesc &DefMCID = DefMI->getDesc(); | |
2283 | if (DefMCID.hasOptionalDef()) { | |
2284 | unsigned NumOps = DefMCID.getNumOperands(); | |
2285 | const MachineOperand &MO = DefMI->getOperand(NumOps-1); | |
2286 | if (MO.getReg() == ARM::CPSR && !MO.isDead()) | |
2287 | // If DefMI defines CPSR and it is not dead, it's obviously not safe | |
2288 | // to delete DefMI. | |
2289 | return false; | |
2290 | } | |
2291 | ||
2292 | const MCInstrDesc &UseMCID = UseMI->getDesc(); | |
2293 | if (UseMCID.hasOptionalDef()) { | |
2294 | unsigned NumOps = UseMCID.getNumOperands(); | |
2295 | if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) | |
2296 | // If the instruction sets the flag, do not attempt this optimization | |
2297 | // since it may change the semantics of the code. | |
2298 | return false; | |
2299 | } | |
2300 | ||
2301 | unsigned UseOpc = UseMI->getOpcode(); | |
2302 | unsigned NewUseOpc = 0; | |
2303 | uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); | |
2304 | uint32_t SOImmValV1 = 0, SOImmValV2 = 0; | |
2305 | bool Commute = false; | |
2306 | switch (UseOpc) { | |
2307 | default: return false; | |
2308 | case ARM::SUBrr: | |
2309 | case ARM::ADDrr: | |
2310 | case ARM::ORRrr: | |
2311 | case ARM::EORrr: | |
2312 | case ARM::t2SUBrr: | |
2313 | case ARM::t2ADDrr: | |
2314 | case ARM::t2ORRrr: | |
2315 | case ARM::t2EORrr: { | |
2316 | Commute = UseMI->getOperand(2).getReg() != Reg; | |
2317 | switch (UseOpc) { | |
2318 | default: break; | |
2319 | case ARM::SUBrr: { | |
2320 | if (Commute) | |
2321 | return false; | |
2322 | ImmVal = -ImmVal; | |
2323 | NewUseOpc = ARM::SUBri; | |
2324 | // Fallthrough | |
2325 | } | |
2326 | case ARM::ADDrr: | |
2327 | case ARM::ORRrr: | |
2328 | case ARM::EORrr: { | |
2329 | if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) | |
2330 | return false; | |
2331 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); | |
2332 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); | |
2333 | switch (UseOpc) { | |
2334 | default: break; | |
2335 | case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; | |
2336 | case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; | |
2337 | case ARM::EORrr: NewUseOpc = ARM::EORri; break; | |
2338 | } | |
2339 | break; | |
2340 | } | |
2341 | case ARM::t2SUBrr: { | |
2342 | if (Commute) | |
2343 | return false; | |
2344 | ImmVal = -ImmVal; | |
2345 | NewUseOpc = ARM::t2SUBri; | |
2346 | // Fallthrough | |
2347 | } | |
2348 | case ARM::t2ADDrr: | |
2349 | case ARM::t2ORRrr: | |
2350 | case ARM::t2EORrr: { | |
2351 | if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) | |
2352 | return false; | |
2353 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); | |
2354 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); | |
2355 | switch (UseOpc) { | |
2356 | default: break; | |
2357 | case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; | |
2358 | case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; | |
2359 | case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; | |
2360 | } | |
2361 | break; | |
2362 | } | |
2363 | } | |
2364 | } | |
2365 | } | |
2366 | ||
2367 | unsigned OpIdx = Commute ? 2 : 1; | |
2368 | unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); | |
2369 | bool isKill = UseMI->getOperand(OpIdx).isKill(); | |
2370 | unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); | |
2371 | AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), | |
2372 | UseMI, UseMI->getDebugLoc(), | |
2373 | get(NewUseOpc), NewReg) | |
2374 | .addReg(Reg1, getKillRegState(isKill)) | |
2375 | .addImm(SOImmValV1))); | |
2376 | UseMI->setDesc(get(NewUseOpc)); | |
2377 | UseMI->getOperand(1).setReg(NewReg); | |
2378 | UseMI->getOperand(1).setIsKill(); | |
2379 | UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); | |
2380 | DefMI->eraseFromParent(); | |
2381 | return true; | |
2382 | } | |
2383 | ||
2384 | static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, | |
2385 | const MachineInstr *MI) { | |
2386 | switch (MI->getOpcode()) { | |
2387 | default: { | |
2388 | const MCInstrDesc &Desc = MI->getDesc(); | |
2389 | int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); | |
2390 | assert(UOps >= 0 && "bad # UOps"); | |
2391 | return UOps; | |
2392 | } | |
2393 | ||
2394 | case ARM::LDRrs: | |
2395 | case ARM::LDRBrs: | |
2396 | case ARM::STRrs: | |
2397 | case ARM::STRBrs: { | |
2398 | unsigned ShOpVal = MI->getOperand(3).getImm(); | |
2399 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; | |
2400 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); | |
2401 | if (!isSub && | |
2402 | (ShImm == 0 || | |
2403 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && | |
2404 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) | |
2405 | return 1; | |
2406 | return 2; | |
2407 | } | |
2408 | ||
2409 | case ARM::LDRH: | |
2410 | case ARM::STRH: { | |
2411 | if (!MI->getOperand(2).getReg()) | |
2412 | return 1; | |
2413 | ||
2414 | unsigned ShOpVal = MI->getOperand(3).getImm(); | |
2415 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; | |
2416 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); | |
2417 | if (!isSub && | |
2418 | (ShImm == 0 || | |
2419 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && | |
2420 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) | |
2421 | return 1; | |
2422 | return 2; | |
2423 | } | |
2424 | ||
2425 | case ARM::LDRSB: | |
2426 | case ARM::LDRSH: | |
2427 | return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; | |
2428 | ||
2429 | case ARM::LDRSB_POST: | |
2430 | case ARM::LDRSH_POST: { | |
2431 | unsigned Rt = MI->getOperand(0).getReg(); | |
2432 | unsigned Rm = MI->getOperand(3).getReg(); | |
2433 | return (Rt == Rm) ? 4 : 3; | |
2434 | } | |
2435 | ||
2436 | case ARM::LDR_PRE_REG: | |
2437 | case ARM::LDRB_PRE_REG: { | |
2438 | unsigned Rt = MI->getOperand(0).getReg(); | |
2439 | unsigned Rm = MI->getOperand(3).getReg(); | |
2440 | if (Rt == Rm) | |
2441 | return 3; | |
2442 | unsigned ShOpVal = MI->getOperand(4).getImm(); | |
2443 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; | |
2444 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); | |
2445 | if (!isSub && | |
2446 | (ShImm == 0 || | |
2447 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && | |
2448 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) | |
2449 | return 2; | |
2450 | return 3; | |
2451 | } | |
2452 | ||
2453 | case ARM::STR_PRE_REG: | |
2454 | case ARM::STRB_PRE_REG: { | |
2455 | unsigned ShOpVal = MI->getOperand(4).getImm(); | |
2456 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; | |
2457 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); | |
2458 | if (!isSub && | |
2459 | (ShImm == 0 || | |
2460 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && | |
2461 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) | |
2462 | return 2; | |
2463 | return 3; | |
2464 | } | |
2465 | ||
2466 | case ARM::LDRH_PRE: | |
2467 | case ARM::STRH_PRE: { | |
2468 | unsigned Rt = MI->getOperand(0).getReg(); | |
2469 | unsigned Rm = MI->getOperand(3).getReg(); | |
2470 | if (!Rm) | |
2471 | return 2; | |
2472 | if (Rt == Rm) | |
2473 | return 3; | |
2474 | return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) | |
2475 | ? 3 : 2; | |
2476 | } | |
2477 | ||
2478 | case ARM::LDR_POST_REG: | |
2479 | case ARM::LDRB_POST_REG: | |
2480 | case ARM::LDRH_POST: { | |
2481 | unsigned Rt = MI->getOperand(0).getReg(); | |
2482 | unsigned Rm = MI->getOperand(3).getReg(); | |
2483 | return (Rt == Rm) ? 3 : 2; | |
2484 | } | |
2485 | ||
2486 | case ARM::LDR_PRE_IMM: | |
2487 | case ARM::LDRB_PRE_IMM: | |
2488 | case ARM::LDR_POST_IMM: | |
2489 | case ARM::LDRB_POST_IMM: | |
2490 | case ARM::STRB_POST_IMM: | |
2491 | case ARM::STRB_POST_REG: | |
2492 | case ARM::STRB_PRE_IMM: | |
2493 | case ARM::STRH_POST: | |
2494 | case ARM::STR_POST_IMM: | |
2495 | case ARM::STR_POST_REG: | |
2496 | case ARM::STR_PRE_IMM: | |
2497 | return 2; | |
2498 | ||
2499 | case ARM::LDRSB_PRE: | |
2500 | case ARM::LDRSH_PRE: { | |
2501 | unsigned Rm = MI->getOperand(3).getReg(); | |
2502 | if (Rm == 0) | |
2503 | return 3; | |
2504 | unsigned Rt = MI->getOperand(0).getReg(); | |
2505 | if (Rt == Rm) | |
2506 | return 4; | |
2507 | unsigned ShOpVal = MI->getOperand(4).getImm(); | |
2508 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; | |
2509 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); | |
2510 | if (!isSub && | |
2511 | (ShImm == 0 || | |
2512 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && | |
2513 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) | |
2514 | return 3; | |
2515 | return 4; | |
2516 | } | |
2517 | ||
2518 | case ARM::LDRD: { | |
2519 | unsigned Rt = MI->getOperand(0).getReg(); | |
2520 | unsigned Rn = MI->getOperand(2).getReg(); | |
2521 | unsigned Rm = MI->getOperand(3).getReg(); | |
2522 | if (Rm) | |
2523 | return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; | |
2524 | return (Rt == Rn) ? 3 : 2; | |
2525 | } | |
2526 | ||
2527 | case ARM::STRD: { | |
2528 | unsigned Rm = MI->getOperand(3).getReg(); | |
2529 | if (Rm) | |
2530 | return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; | |
2531 | return 2; | |
2532 | } | |
2533 | ||
2534 | case ARM::LDRD_POST: | |
2535 | case ARM::t2LDRD_POST: | |
2536 | return 3; | |
2537 | ||
2538 | case ARM::STRD_POST: | |
2539 | case ARM::t2STRD_POST: | |
2540 | return 4; | |
2541 | ||
2542 | case ARM::LDRD_PRE: { | |
2543 | unsigned Rt = MI->getOperand(0).getReg(); | |
2544 | unsigned Rn = MI->getOperand(3).getReg(); | |
2545 | unsigned Rm = MI->getOperand(4).getReg(); | |
2546 | if (Rm) | |
2547 | return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; | |
2548 | return (Rt == Rn) ? 4 : 3; | |
2549 | } | |
2550 | ||
2551 | case ARM::t2LDRD_PRE: { | |
2552 | unsigned Rt = MI->getOperand(0).getReg(); | |
2553 | unsigned Rn = MI->getOperand(3).getReg(); | |
2554 | return (Rt == Rn) ? 4 : 3; | |
2555 | } | |
2556 | ||
2557 | case ARM::STRD_PRE: { | |
2558 | unsigned Rm = MI->getOperand(4).getReg(); | |
2559 | if (Rm) | |
2560 | return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; | |
2561 | return 3; | |
2562 | } | |
2563 | ||
2564 | case ARM::t2STRD_PRE: | |
2565 | return 3; | |
2566 | ||
2567 | case ARM::t2LDR_POST: | |
2568 | case ARM::t2LDRB_POST: | |
2569 | case ARM::t2LDRB_PRE: | |
2570 | case ARM::t2LDRSBi12: | |
2571 | case ARM::t2LDRSBi8: | |
2572 | case ARM::t2LDRSBpci: | |
2573 | case ARM::t2LDRSBs: | |
2574 | case ARM::t2LDRH_POST: | |
2575 | case ARM::t2LDRH_PRE: | |
2576 | case ARM::t2LDRSBT: | |
2577 | case ARM::t2LDRSB_POST: | |
2578 | case ARM::t2LDRSB_PRE: | |
2579 | case ARM::t2LDRSH_POST: | |
2580 | case ARM::t2LDRSH_PRE: | |
2581 | case ARM::t2LDRSHi12: | |
2582 | case ARM::t2LDRSHi8: | |
2583 | case ARM::t2LDRSHpci: | |
2584 | case ARM::t2LDRSHs: | |
2585 | return 2; | |
2586 | ||
2587 | case ARM::t2LDRDi8: { | |
2588 | unsigned Rt = MI->getOperand(0).getReg(); | |
2589 | unsigned Rn = MI->getOperand(2).getReg(); | |
2590 | return (Rt == Rn) ? 3 : 2; | |
2591 | } | |
2592 | ||
2593 | case ARM::t2STRB_POST: | |
2594 | case ARM::t2STRB_PRE: | |
2595 | case ARM::t2STRBs: | |
2596 | case ARM::t2STRDi8: | |
2597 | case ARM::t2STRH_POST: | |
2598 | case ARM::t2STRH_PRE: | |
2599 | case ARM::t2STRHs: | |
2600 | case ARM::t2STR_POST: | |
2601 | case ARM::t2STR_PRE: | |
2602 | case ARM::t2STRs: | |
2603 | return 2; | |
2604 | } | |
2605 | } | |
2606 | ||
2607 | // Return the number of 32-bit words loaded by LDM or stored by STM. If this | |
2608 | // can't be easily determined return 0 (missing MachineMemOperand). | |
2609 | // | |
2610 | // FIXME: The current MachineInstr design does not support relying on machine | |
2611 | // mem operands to determine the width of a memory access. Instead, we expect | |
2612 | // the target to provide this information based on the instruction opcode and | |
2613 | // operands. However, using MachineMemOperand is a the best solution now for | |
2614 | // two reasons: | |
2615 | // | |
2616 | // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI | |
2617 | // operands. This is much more dangerous than using the MachineMemOperand | |
2618 | // sizes because CodeGen passes can insert/remove optional machine operands. In | |
2619 | // fact, it's totally incorrect for preRA passes and appears to be wrong for | |
2620 | // postRA passes as well. | |
2621 | // | |
2622 | // 2) getNumLDMAddresses is only used by the scheduling machine model and any | |
2623 | // machine model that calls this should handle the unknown (zero size) case. | |
2624 | // | |
2625 | // Long term, we should require a target hook that verifies MachineMemOperand | |
2626 | // sizes during MC lowering. That target hook should be local to MC lowering | |
2627 | // because we can't ensure that it is aware of other MI forms. Doing this will | |
2628 | // ensure that MachineMemOperands are correctly propagated through all passes. | |
2629 | unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const { | |
2630 | unsigned Size = 0; | |
2631 | for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), | |
2632 | E = MI->memoperands_end(); I != E; ++I) { | |
2633 | Size += (*I)->getSize(); | |
2634 | } | |
2635 | return Size / 4; | |
2636 | } | |
2637 | ||
2638 | unsigned | |
2639 | ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, | |
2640 | const MachineInstr *MI) const { | |
2641 | if (!ItinData || ItinData->isEmpty()) | |
2642 | return 1; | |
2643 | ||
2644 | const MCInstrDesc &Desc = MI->getDesc(); | |
2645 | unsigned Class = Desc.getSchedClass(); | |
2646 | int ItinUOps = ItinData->getNumMicroOps(Class); | |
2647 | if (ItinUOps >= 0) { | |
2648 | if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) | |
2649 | return getNumMicroOpsSwiftLdSt(ItinData, MI); | |
2650 | ||
2651 | return ItinUOps; | |
2652 | } | |
2653 | ||
2654 | unsigned Opc = MI->getOpcode(); | |
2655 | switch (Opc) { | |
2656 | default: | |
2657 | llvm_unreachable("Unexpected multi-uops instruction!"); | |
2658 | case ARM::VLDMQIA: | |
2659 | case ARM::VSTMQIA: | |
2660 | return 2; | |
2661 | ||
2662 | // The number of uOps for load / store multiple are determined by the number | |
2663 | // registers. | |
2664 | // | |
2665 | // On Cortex-A8, each pair of register loads / stores can be scheduled on the | |
2666 | // same cycle. The scheduling for the first load / store must be done | |
2667 | // separately by assuming the address is not 64-bit aligned. | |
2668 | // | |
2669 | // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address | |
2670 | // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON | |
2671 | // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. | |
2672 | case ARM::VLDMDIA: | |
2673 | case ARM::VLDMDIA_UPD: | |
2674 | case ARM::VLDMDDB_UPD: | |
2675 | case ARM::VLDMSIA: | |
2676 | case ARM::VLDMSIA_UPD: | |
2677 | case ARM::VLDMSDB_UPD: | |
2678 | case ARM::VSTMDIA: | |
2679 | case ARM::VSTMDIA_UPD: | |
2680 | case ARM::VSTMDDB_UPD: | |
2681 | case ARM::VSTMSIA: | |
2682 | case ARM::VSTMSIA_UPD: | |
2683 | case ARM::VSTMSDB_UPD: { | |
2684 | unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); | |
2685 | return (NumRegs / 2) + (NumRegs % 2) + 1; | |
2686 | } | |
2687 | ||
2688 | case ARM::LDMIA_RET: | |
2689 | case ARM::LDMIA: | |
2690 | case ARM::LDMDA: | |
2691 | case ARM::LDMDB: | |
2692 | case ARM::LDMIB: | |
2693 | case ARM::LDMIA_UPD: | |
2694 | case ARM::LDMDA_UPD: | |
2695 | case ARM::LDMDB_UPD: | |
2696 | case ARM::LDMIB_UPD: | |
2697 | case ARM::STMIA: | |
2698 | case ARM::STMDA: | |
2699 | case ARM::STMDB: | |
2700 | case ARM::STMIB: | |
2701 | case ARM::STMIA_UPD: | |
2702 | case ARM::STMDA_UPD: | |
2703 | case ARM::STMDB_UPD: | |
2704 | case ARM::STMIB_UPD: | |
2705 | case ARM::tLDMIA: | |
2706 | case ARM::tLDMIA_UPD: | |
2707 | case ARM::tSTMIA_UPD: | |
2708 | case ARM::tPOP_RET: | |
2709 | case ARM::tPOP: | |
2710 | case ARM::tPUSH: | |
2711 | case ARM::t2LDMIA_RET: | |
2712 | case ARM::t2LDMIA: | |
2713 | case ARM::t2LDMDB: | |
2714 | case ARM::t2LDMIA_UPD: | |
2715 | case ARM::t2LDMDB_UPD: | |
2716 | case ARM::t2STMIA: | |
2717 | case ARM::t2STMDB: | |
2718 | case ARM::t2STMIA_UPD: | |
2719 | case ARM::t2STMDB_UPD: { | |
2720 | unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; | |
2721 | if (Subtarget.isSwift()) { | |
223e47cc LB |
2722 | int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. |
2723 | switch (Opc) { | |
2724 | default: break; | |
2725 | case ARM::VLDMDIA_UPD: | |
2726 | case ARM::VLDMDDB_UPD: | |
2727 | case ARM::VLDMSIA_UPD: | |
2728 | case ARM::VLDMSDB_UPD: | |
2729 | case ARM::VSTMDIA_UPD: | |
2730 | case ARM::VSTMDDB_UPD: | |
2731 | case ARM::VSTMSIA_UPD: | |
2732 | case ARM::VSTMSDB_UPD: | |
2733 | case ARM::LDMIA_UPD: | |
2734 | case ARM::LDMDA_UPD: | |
2735 | case ARM::LDMDB_UPD: | |
2736 | case ARM::LDMIB_UPD: | |
2737 | case ARM::STMIA_UPD: | |
2738 | case ARM::STMDA_UPD: | |
2739 | case ARM::STMDB_UPD: | |
2740 | case ARM::STMIB_UPD: | |
2741 | case ARM::tLDMIA_UPD: | |
2742 | case ARM::tSTMIA_UPD: | |
2743 | case ARM::t2LDMIA_UPD: | |
2744 | case ARM::t2LDMDB_UPD: | |
2745 | case ARM::t2STMIA_UPD: | |
2746 | case ARM::t2STMDB_UPD: | |
2747 | ++UOps; // One for base register writeback. | |
2748 | break; | |
2749 | case ARM::LDMIA_RET: | |
2750 | case ARM::tPOP_RET: | |
2751 | case ARM::t2LDMIA_RET: | |
2752 | UOps += 2; // One for base reg wb, one for write to pc. | |
2753 | break; | |
2754 | } | |
2755 | return UOps; | |
2756 | } else if (Subtarget.isCortexA8()) { | |
2757 | if (NumRegs < 4) | |
2758 | return 2; | |
2759 | // 4 registers would be issued: 2, 2. | |
2760 | // 5 registers would be issued: 2, 2, 1. | |
2761 | int A8UOps = (NumRegs / 2); | |
2762 | if (NumRegs % 2) | |
2763 | ++A8UOps; | |
2764 | return A8UOps; | |
2765 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { | |
2766 | int A9UOps = (NumRegs / 2); | |
2767 | // If there are odd number of registers or if it's not 64-bit aligned, | |
2768 | // then it takes an extra AGU (Address Generation Unit) cycle. | |
2769 | if ((NumRegs % 2) || | |
2770 | !MI->hasOneMemOperand() || | |
2771 | (*MI->memoperands_begin())->getAlignment() < 8) | |
2772 | ++A9UOps; | |
2773 | return A9UOps; | |
2774 | } else { | |
2775 | // Assume the worst. | |
2776 | return NumRegs; | |
2777 | } | |
2778 | } | |
2779 | } | |
2780 | } | |
2781 | ||
2782 | int | |
2783 | ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, | |
2784 | const MCInstrDesc &DefMCID, | |
2785 | unsigned DefClass, | |
2786 | unsigned DefIdx, unsigned DefAlign) const { | |
2787 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; | |
2788 | if (RegNo <= 0) | |
2789 | // Def is the address writeback. | |
2790 | return ItinData->getOperandCycle(DefClass, DefIdx); | |
2791 | ||
2792 | int DefCycle; | |
2793 | if (Subtarget.isCortexA8()) { | |
2794 | // (regno / 2) + (regno % 2) + 1 | |
2795 | DefCycle = RegNo / 2 + 1; | |
2796 | if (RegNo % 2) | |
2797 | ++DefCycle; | |
2798 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { | |
2799 | DefCycle = RegNo; | |
2800 | bool isSLoad = false; | |
2801 | ||
2802 | switch (DefMCID.getOpcode()) { | |
2803 | default: break; | |
2804 | case ARM::VLDMSIA: | |
2805 | case ARM::VLDMSIA_UPD: | |
2806 | case ARM::VLDMSDB_UPD: | |
2807 | isSLoad = true; | |
2808 | break; | |
2809 | } | |
2810 | ||
2811 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, | |
2812 | // then it takes an extra cycle. | |
2813 | if ((isSLoad && (RegNo % 2)) || DefAlign < 8) | |
2814 | ++DefCycle; | |
2815 | } else { | |
2816 | // Assume the worst. | |
2817 | DefCycle = RegNo + 2; | |
2818 | } | |
2819 | ||
2820 | return DefCycle; | |
2821 | } | |
2822 | ||
2823 | int | |
2824 | ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, | |
2825 | const MCInstrDesc &DefMCID, | |
2826 | unsigned DefClass, | |
2827 | unsigned DefIdx, unsigned DefAlign) const { | |
2828 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; | |
2829 | if (RegNo <= 0) | |
2830 | // Def is the address writeback. | |
2831 | return ItinData->getOperandCycle(DefClass, DefIdx); | |
2832 | ||
2833 | int DefCycle; | |
2834 | if (Subtarget.isCortexA8()) { | |
2835 | // 4 registers would be issued: 1, 2, 1. | |
2836 | // 5 registers would be issued: 1, 2, 2. | |
2837 | DefCycle = RegNo / 2; | |
2838 | if (DefCycle < 1) | |
2839 | DefCycle = 1; | |
2840 | // Result latency is issue cycle + 2: E2. | |
2841 | DefCycle += 2; | |
2842 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { | |
2843 | DefCycle = (RegNo / 2); | |
2844 | // If there are odd number of registers or if it's not 64-bit aligned, | |
2845 | // then it takes an extra AGU (Address Generation Unit) cycle. | |
2846 | if ((RegNo % 2) || DefAlign < 8) | |
2847 | ++DefCycle; | |
2848 | // Result latency is AGU cycles + 2. | |
2849 | DefCycle += 2; | |
2850 | } else { | |
2851 | // Assume the worst. | |
2852 | DefCycle = RegNo + 2; | |
2853 | } | |
2854 | ||
2855 | return DefCycle; | |
2856 | } | |
2857 | ||
2858 | int | |
2859 | ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, | |
2860 | const MCInstrDesc &UseMCID, | |
2861 | unsigned UseClass, | |
2862 | unsigned UseIdx, unsigned UseAlign) const { | |
2863 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; | |
2864 | if (RegNo <= 0) | |
2865 | return ItinData->getOperandCycle(UseClass, UseIdx); | |
2866 | ||
2867 | int UseCycle; | |
2868 | if (Subtarget.isCortexA8()) { | |
2869 | // (regno / 2) + (regno % 2) + 1 | |
2870 | UseCycle = RegNo / 2 + 1; | |
2871 | if (RegNo % 2) | |
2872 | ++UseCycle; | |
2873 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { | |
2874 | UseCycle = RegNo; | |
2875 | bool isSStore = false; | |
2876 | ||
2877 | switch (UseMCID.getOpcode()) { | |
2878 | default: break; | |
2879 | case ARM::VSTMSIA: | |
2880 | case ARM::VSTMSIA_UPD: | |
2881 | case ARM::VSTMSDB_UPD: | |
2882 | isSStore = true; | |
2883 | break; | |
2884 | } | |
2885 | ||
2886 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, | |
2887 | // then it takes an extra cycle. | |
2888 | if ((isSStore && (RegNo % 2)) || UseAlign < 8) | |
2889 | ++UseCycle; | |
2890 | } else { | |
2891 | // Assume the worst. | |
2892 | UseCycle = RegNo + 2; | |
2893 | } | |
2894 | ||
2895 | return UseCycle; | |
2896 | } | |
2897 | ||
2898 | int | |
2899 | ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, | |
2900 | const MCInstrDesc &UseMCID, | |
2901 | unsigned UseClass, | |
2902 | unsigned UseIdx, unsigned UseAlign) const { | |
2903 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; | |
2904 | if (RegNo <= 0) | |
2905 | return ItinData->getOperandCycle(UseClass, UseIdx); | |
2906 | ||
2907 | int UseCycle; | |
2908 | if (Subtarget.isCortexA8()) { | |
2909 | UseCycle = RegNo / 2; | |
2910 | if (UseCycle < 2) | |
2911 | UseCycle = 2; | |
2912 | // Read in E3. | |
2913 | UseCycle += 2; | |
2914 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { | |
2915 | UseCycle = (RegNo / 2); | |
2916 | // If there are odd number of registers or if it's not 64-bit aligned, | |
2917 | // then it takes an extra AGU (Address Generation Unit) cycle. | |
2918 | if ((RegNo % 2) || UseAlign < 8) | |
2919 | ++UseCycle; | |
2920 | } else { | |
2921 | // Assume the worst. | |
2922 | UseCycle = 1; | |
2923 | } | |
2924 | return UseCycle; | |
2925 | } | |
2926 | ||
2927 | int | |
2928 | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, | |
2929 | const MCInstrDesc &DefMCID, | |
2930 | unsigned DefIdx, unsigned DefAlign, | |
2931 | const MCInstrDesc &UseMCID, | |
2932 | unsigned UseIdx, unsigned UseAlign) const { | |
2933 | unsigned DefClass = DefMCID.getSchedClass(); | |
2934 | unsigned UseClass = UseMCID.getSchedClass(); | |
2935 | ||
2936 | if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) | |
2937 | return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); | |
2938 | ||
2939 | // This may be a def / use of a variable_ops instruction, the operand | |
2940 | // latency might be determinable dynamically. Let the target try to | |
2941 | // figure it out. | |
2942 | int DefCycle = -1; | |
2943 | bool LdmBypass = false; | |
2944 | switch (DefMCID.getOpcode()) { | |
2945 | default: | |
2946 | DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); | |
2947 | break; | |
2948 | ||
2949 | case ARM::VLDMDIA: | |
2950 | case ARM::VLDMDIA_UPD: | |
2951 | case ARM::VLDMDDB_UPD: | |
2952 | case ARM::VLDMSIA: | |
2953 | case ARM::VLDMSIA_UPD: | |
2954 | case ARM::VLDMSDB_UPD: | |
2955 | DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); | |
2956 | break; | |
2957 | ||
2958 | case ARM::LDMIA_RET: | |
2959 | case ARM::LDMIA: | |
2960 | case ARM::LDMDA: | |
2961 | case ARM::LDMDB: | |
2962 | case ARM::LDMIB: | |
2963 | case ARM::LDMIA_UPD: | |
2964 | case ARM::LDMDA_UPD: | |
2965 | case ARM::LDMDB_UPD: | |
2966 | case ARM::LDMIB_UPD: | |
2967 | case ARM::tLDMIA: | |
2968 | case ARM::tLDMIA_UPD: | |
2969 | case ARM::tPUSH: | |
2970 | case ARM::t2LDMIA_RET: | |
2971 | case ARM::t2LDMIA: | |
2972 | case ARM::t2LDMDB: | |
2973 | case ARM::t2LDMIA_UPD: | |
2974 | case ARM::t2LDMDB_UPD: | |
2975 | LdmBypass = 1; | |
2976 | DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); | |
2977 | break; | |
2978 | } | |
2979 | ||
2980 | if (DefCycle == -1) | |
2981 | // We can't seem to determine the result latency of the def, assume it's 2. | |
2982 | DefCycle = 2; | |
2983 | ||
2984 | int UseCycle = -1; | |
2985 | switch (UseMCID.getOpcode()) { | |
2986 | default: | |
2987 | UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); | |
2988 | break; | |
2989 | ||
2990 | case ARM::VSTMDIA: | |
2991 | case ARM::VSTMDIA_UPD: | |
2992 | case ARM::VSTMDDB_UPD: | |
2993 | case ARM::VSTMSIA: | |
2994 | case ARM::VSTMSIA_UPD: | |
2995 | case ARM::VSTMSDB_UPD: | |
2996 | UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); | |
2997 | break; | |
2998 | ||
2999 | case ARM::STMIA: | |
3000 | case ARM::STMDA: | |
3001 | case ARM::STMDB: | |
3002 | case ARM::STMIB: | |
3003 | case ARM::STMIA_UPD: | |
3004 | case ARM::STMDA_UPD: | |
3005 | case ARM::STMDB_UPD: | |
3006 | case ARM::STMIB_UPD: | |
3007 | case ARM::tSTMIA_UPD: | |
3008 | case ARM::tPOP_RET: | |
3009 | case ARM::tPOP: | |
3010 | case ARM::t2STMIA: | |
3011 | case ARM::t2STMDB: | |
3012 | case ARM::t2STMIA_UPD: | |
3013 | case ARM::t2STMDB_UPD: | |
3014 | UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); | |
3015 | break; | |
3016 | } | |
3017 | ||
3018 | if (UseCycle == -1) | |
3019 | // Assume it's read in the first stage. | |
3020 | UseCycle = 1; | |
3021 | ||
3022 | UseCycle = DefCycle - UseCycle + 1; | |
3023 | if (UseCycle > 0) { | |
3024 | if (LdmBypass) { | |
3025 | // It's a variable_ops instruction so we can't use DefIdx here. Just use | |
3026 | // first def operand. | |
3027 | if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, | |
3028 | UseClass, UseIdx)) | |
3029 | --UseCycle; | |
3030 | } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, | |
3031 | UseClass, UseIdx)) { | |
3032 | --UseCycle; | |
3033 | } | |
3034 | } | |
3035 | ||
3036 | return UseCycle; | |
3037 | } | |
3038 | ||
3039 | static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, | |
3040 | const MachineInstr *MI, unsigned Reg, | |
3041 | unsigned &DefIdx, unsigned &Dist) { | |
3042 | Dist = 0; | |
3043 | ||
3044 | MachineBasicBlock::const_iterator I = MI; ++I; | |
3045 | MachineBasicBlock::const_instr_iterator II = | |
3046 | llvm::prior(I.getInstrIterator()); | |
3047 | assert(II->isInsideBundle() && "Empty bundle?"); | |
3048 | ||
3049 | int Idx = -1; | |
3050 | while (II->isInsideBundle()) { | |
3051 | Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); | |
3052 | if (Idx != -1) | |
3053 | break; | |
3054 | --II; | |
3055 | ++Dist; | |
3056 | } | |
3057 | ||
3058 | assert(Idx != -1 && "Cannot find bundled definition!"); | |
3059 | DefIdx = Idx; | |
3060 | return II; | |
3061 | } | |
3062 | ||
3063 | static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, | |
3064 | const MachineInstr *MI, unsigned Reg, | |
3065 | unsigned &UseIdx, unsigned &Dist) { | |
3066 | Dist = 0; | |
3067 | ||
3068 | MachineBasicBlock::const_instr_iterator II = MI; ++II; | |
3069 | assert(II->isInsideBundle() && "Empty bundle?"); | |
3070 | MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); | |
3071 | ||
3072 | // FIXME: This doesn't properly handle multiple uses. | |
3073 | int Idx = -1; | |
3074 | while (II != E && II->isInsideBundle()) { | |
3075 | Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); | |
3076 | if (Idx != -1) | |
3077 | break; | |
3078 | if (II->getOpcode() != ARM::t2IT) | |
3079 | ++Dist; | |
3080 | ++II; | |
3081 | } | |
3082 | ||
3083 | if (Idx == -1) { | |
3084 | Dist = 0; | |
3085 | return 0; | |
3086 | } | |
3087 | ||
3088 | UseIdx = Idx; | |
3089 | return II; | |
3090 | } | |
3091 | ||
3092 | /// Return the number of cycles to add to (or subtract from) the static | |
3093 | /// itinerary based on the def opcode and alignment. The caller will ensure that | |
3094 | /// adjusted latency is at least one cycle. | |
3095 | static int adjustDefLatency(const ARMSubtarget &Subtarget, | |
3096 | const MachineInstr *DefMI, | |
3097 | const MCInstrDesc *DefMCID, unsigned DefAlign) { | |
3098 | int Adjust = 0; | |
3099 | if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { | |
3100 | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] | |
3101 | // variants are one cycle cheaper. | |
3102 | switch (DefMCID->getOpcode()) { | |
3103 | default: break; | |
3104 | case ARM::LDRrs: | |
3105 | case ARM::LDRBrs: { | |
3106 | unsigned ShOpVal = DefMI->getOperand(3).getImm(); | |
3107 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); | |
3108 | if (ShImm == 0 || | |
3109 | (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) | |
3110 | --Adjust; | |
3111 | break; | |
3112 | } | |
3113 | case ARM::t2LDRs: | |
3114 | case ARM::t2LDRBs: | |
3115 | case ARM::t2LDRHs: | |
3116 | case ARM::t2LDRSHs: { | |
3117 | // Thumb2 mode: lsl only. | |
3118 | unsigned ShAmt = DefMI->getOperand(3).getImm(); | |
3119 | if (ShAmt == 0 || ShAmt == 2) | |
3120 | --Adjust; | |
3121 | break; | |
3122 | } | |
3123 | } | |
3124 | } else if (Subtarget.isSwift()) { | |
3125 | // FIXME: Properly handle all of the latency adjustments for address | |
3126 | // writeback. | |
3127 | switch (DefMCID->getOpcode()) { | |
3128 | default: break; | |
3129 | case ARM::LDRrs: | |
3130 | case ARM::LDRBrs: { | |
3131 | unsigned ShOpVal = DefMI->getOperand(3).getImm(); | |
3132 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; | |
3133 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); | |
3134 | if (!isSub && | |
3135 | (ShImm == 0 || | |
3136 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && | |
3137 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) | |
3138 | Adjust -= 2; | |
3139 | else if (!isSub && | |
3140 | ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) | |
3141 | --Adjust; | |
3142 | break; | |
3143 | } | |
3144 | case ARM::t2LDRs: | |
3145 | case ARM::t2LDRBs: | |
3146 | case ARM::t2LDRHs: | |
3147 | case ARM::t2LDRSHs: { | |
3148 | // Thumb2 mode: lsl only. | |
3149 | unsigned ShAmt = DefMI->getOperand(3).getImm(); | |
3150 | if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) | |
3151 | Adjust -= 2; | |
3152 | break; | |
3153 | } | |
3154 | } | |
3155 | } | |
3156 | ||
3157 | if (DefAlign < 8 && Subtarget.isLikeA9()) { | |
3158 | switch (DefMCID->getOpcode()) { | |
3159 | default: break; | |
3160 | case ARM::VLD1q8: | |
3161 | case ARM::VLD1q16: | |
3162 | case ARM::VLD1q32: | |
3163 | case ARM::VLD1q64: | |
3164 | case ARM::VLD1q8wb_fixed: | |
3165 | case ARM::VLD1q16wb_fixed: | |
3166 | case ARM::VLD1q32wb_fixed: | |
3167 | case ARM::VLD1q64wb_fixed: | |
3168 | case ARM::VLD1q8wb_register: | |
3169 | case ARM::VLD1q16wb_register: | |
3170 | case ARM::VLD1q32wb_register: | |
3171 | case ARM::VLD1q64wb_register: | |
3172 | case ARM::VLD2d8: | |
3173 | case ARM::VLD2d16: | |
3174 | case ARM::VLD2d32: | |
3175 | case ARM::VLD2q8: | |
3176 | case ARM::VLD2q16: | |
3177 | case ARM::VLD2q32: | |
3178 | case ARM::VLD2d8wb_fixed: | |
3179 | case ARM::VLD2d16wb_fixed: | |
3180 | case ARM::VLD2d32wb_fixed: | |
3181 | case ARM::VLD2q8wb_fixed: | |
3182 | case ARM::VLD2q16wb_fixed: | |
3183 | case ARM::VLD2q32wb_fixed: | |
3184 | case ARM::VLD2d8wb_register: | |
3185 | case ARM::VLD2d16wb_register: | |
3186 | case ARM::VLD2d32wb_register: | |
3187 | case ARM::VLD2q8wb_register: | |
3188 | case ARM::VLD2q16wb_register: | |
3189 | case ARM::VLD2q32wb_register: | |
3190 | case ARM::VLD3d8: | |
3191 | case ARM::VLD3d16: | |
3192 | case ARM::VLD3d32: | |
3193 | case ARM::VLD1d64T: | |
3194 | case ARM::VLD3d8_UPD: | |
3195 | case ARM::VLD3d16_UPD: | |
3196 | case ARM::VLD3d32_UPD: | |
3197 | case ARM::VLD1d64Twb_fixed: | |
3198 | case ARM::VLD1d64Twb_register: | |
3199 | case ARM::VLD3q8_UPD: | |
3200 | case ARM::VLD3q16_UPD: | |
3201 | case ARM::VLD3q32_UPD: | |
3202 | case ARM::VLD4d8: | |
3203 | case ARM::VLD4d16: | |
3204 | case ARM::VLD4d32: | |
3205 | case ARM::VLD1d64Q: | |
3206 | case ARM::VLD4d8_UPD: | |
3207 | case ARM::VLD4d16_UPD: | |
3208 | case ARM::VLD4d32_UPD: | |
3209 | case ARM::VLD1d64Qwb_fixed: | |
3210 | case ARM::VLD1d64Qwb_register: | |
3211 | case ARM::VLD4q8_UPD: | |
3212 | case ARM::VLD4q16_UPD: | |
3213 | case ARM::VLD4q32_UPD: | |
3214 | case ARM::VLD1DUPq8: | |
3215 | case ARM::VLD1DUPq16: | |
3216 | case ARM::VLD1DUPq32: | |
3217 | case ARM::VLD1DUPq8wb_fixed: | |
3218 | case ARM::VLD1DUPq16wb_fixed: | |
3219 | case ARM::VLD1DUPq32wb_fixed: | |
3220 | case ARM::VLD1DUPq8wb_register: | |
3221 | case ARM::VLD1DUPq16wb_register: | |
3222 | case ARM::VLD1DUPq32wb_register: | |
3223 | case ARM::VLD2DUPd8: | |
3224 | case ARM::VLD2DUPd16: | |
3225 | case ARM::VLD2DUPd32: | |
3226 | case ARM::VLD2DUPd8wb_fixed: | |
3227 | case ARM::VLD2DUPd16wb_fixed: | |
3228 | case ARM::VLD2DUPd32wb_fixed: | |
3229 | case ARM::VLD2DUPd8wb_register: | |
3230 | case ARM::VLD2DUPd16wb_register: | |
3231 | case ARM::VLD2DUPd32wb_register: | |
3232 | case ARM::VLD4DUPd8: | |
3233 | case ARM::VLD4DUPd16: | |
3234 | case ARM::VLD4DUPd32: | |
3235 | case ARM::VLD4DUPd8_UPD: | |
3236 | case ARM::VLD4DUPd16_UPD: | |
3237 | case ARM::VLD4DUPd32_UPD: | |
3238 | case ARM::VLD1LNd8: | |
3239 | case ARM::VLD1LNd16: | |
3240 | case ARM::VLD1LNd32: | |
3241 | case ARM::VLD1LNd8_UPD: | |
3242 | case ARM::VLD1LNd16_UPD: | |
3243 | case ARM::VLD1LNd32_UPD: | |
3244 | case ARM::VLD2LNd8: | |
3245 | case ARM::VLD2LNd16: | |
3246 | case ARM::VLD2LNd32: | |
3247 | case ARM::VLD2LNq16: | |
3248 | case ARM::VLD2LNq32: | |
3249 | case ARM::VLD2LNd8_UPD: | |
3250 | case ARM::VLD2LNd16_UPD: | |
3251 | case ARM::VLD2LNd32_UPD: | |
3252 | case ARM::VLD2LNq16_UPD: | |
3253 | case ARM::VLD2LNq32_UPD: | |
3254 | case ARM::VLD4LNd8: | |
3255 | case ARM::VLD4LNd16: | |
3256 | case ARM::VLD4LNd32: | |
3257 | case ARM::VLD4LNq16: | |
3258 | case ARM::VLD4LNq32: | |
3259 | case ARM::VLD4LNd8_UPD: | |
3260 | case ARM::VLD4LNd16_UPD: | |
3261 | case ARM::VLD4LNd32_UPD: | |
3262 | case ARM::VLD4LNq16_UPD: | |
3263 | case ARM::VLD4LNq32_UPD: | |
3264 | // If the address is not 64-bit aligned, the latencies of these | |
3265 | // instructions increases by one. | |
3266 | ++Adjust; | |
3267 | break; | |
3268 | } | |
3269 | } | |
3270 | return Adjust; | |
3271 | } | |
3272 | ||
3273 | ||
3274 | ||
3275 | int | |
3276 | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, | |
3277 | const MachineInstr *DefMI, unsigned DefIdx, | |
3278 | const MachineInstr *UseMI, | |
3279 | unsigned UseIdx) const { | |
3280 | // No operand latency. The caller may fall back to getInstrLatency. | |
3281 | if (!ItinData || ItinData->isEmpty()) | |
3282 | return -1; | |
3283 | ||
3284 | const MachineOperand &DefMO = DefMI->getOperand(DefIdx); | |
3285 | unsigned Reg = DefMO.getReg(); | |
3286 | const MCInstrDesc *DefMCID = &DefMI->getDesc(); | |
3287 | const MCInstrDesc *UseMCID = &UseMI->getDesc(); | |
3288 | ||
3289 | unsigned DefAdj = 0; | |
3290 | if (DefMI->isBundle()) { | |
3291 | DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); | |
3292 | DefMCID = &DefMI->getDesc(); | |
3293 | } | |
3294 | if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || | |
3295 | DefMI->isRegSequence() || DefMI->isImplicitDef()) { | |
3296 | return 1; | |
3297 | } | |
3298 | ||
3299 | unsigned UseAdj = 0; | |
3300 | if (UseMI->isBundle()) { | |
3301 | unsigned NewUseIdx; | |
3302 | const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, | |
3303 | Reg, NewUseIdx, UseAdj); | |
3304 | if (!NewUseMI) | |
3305 | return -1; | |
3306 | ||
3307 | UseMI = NewUseMI; | |
3308 | UseIdx = NewUseIdx; | |
3309 | UseMCID = &UseMI->getDesc(); | |
3310 | } | |
3311 | ||
3312 | if (Reg == ARM::CPSR) { | |
3313 | if (DefMI->getOpcode() == ARM::FMSTAT) { | |
3314 | // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) | |
3315 | return Subtarget.isLikeA9() ? 1 : 20; | |
3316 | } | |
3317 | ||
3318 | // CPSR set and branch can be paired in the same cycle. | |
3319 | if (UseMI->isBranch()) | |
3320 | return 0; | |
3321 | ||
3322 | // Otherwise it takes the instruction latency (generally one). | |
3323 | unsigned Latency = getInstrLatency(ItinData, DefMI); | |
3324 | ||
3325 | // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to | |
3326 | // its uses. Instructions which are otherwise scheduled between them may | |
3327 | // incur a code size penalty (not able to use the CPSR setting 16-bit | |
3328 | // instructions). | |
3329 | if (Latency > 0 && Subtarget.isThumb2()) { | |
3330 | const MachineFunction *MF = DefMI->getParent()->getParent(); | |
970d7e83 LB |
3331 | if (MF->getFunction()->getAttributes(). |
3332 | hasAttribute(AttributeSet::FunctionIndex, | |
3333 | Attribute::OptimizeForSize)) | |
223e47cc LB |
3334 | --Latency; |
3335 | } | |
3336 | return Latency; | |
3337 | } | |
3338 | ||
3339 | if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit()) | |
3340 | return -1; | |
3341 | ||
3342 | unsigned DefAlign = DefMI->hasOneMemOperand() | |
3343 | ? (*DefMI->memoperands_begin())->getAlignment() : 0; | |
3344 | unsigned UseAlign = UseMI->hasOneMemOperand() | |
3345 | ? (*UseMI->memoperands_begin())->getAlignment() : 0; | |
3346 | ||
3347 | // Get the itinerary's latency if possible, and handle variable_ops. | |
3348 | int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, | |
3349 | *UseMCID, UseIdx, UseAlign); | |
3350 | // Unable to find operand latency. The caller may resort to getInstrLatency. | |
3351 | if (Latency < 0) | |
3352 | return Latency; | |
3353 | ||
3354 | // Adjust for IT block position. | |
3355 | int Adj = DefAdj + UseAdj; | |
3356 | ||
3357 | // Adjust for dynamic def-side opcode variants not captured by the itinerary. | |
3358 | Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); | |
3359 | if (Adj >= 0 || (int)Latency > -Adj) { | |
3360 | return Latency + Adj; | |
3361 | } | |
3362 | // Return the itinerary latency, which may be zero but not less than zero. | |
3363 | return Latency; | |
3364 | } | |
3365 | ||
3366 | int | |
3367 | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, | |
3368 | SDNode *DefNode, unsigned DefIdx, | |
3369 | SDNode *UseNode, unsigned UseIdx) const { | |
3370 | if (!DefNode->isMachineOpcode()) | |
3371 | return 1; | |
3372 | ||
3373 | const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); | |
3374 | ||
3375 | if (isZeroCost(DefMCID.Opcode)) | |
3376 | return 0; | |
3377 | ||
3378 | if (!ItinData || ItinData->isEmpty()) | |
3379 | return DefMCID.mayLoad() ? 3 : 1; | |
3380 | ||
3381 | if (!UseNode->isMachineOpcode()) { | |
3382 | int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); | |
3383 | if (Subtarget.isLikeA9() || Subtarget.isSwift()) | |
3384 | return Latency <= 2 ? 1 : Latency - 1; | |
3385 | else | |
3386 | return Latency <= 3 ? 1 : Latency - 2; | |
3387 | } | |
3388 | ||
3389 | const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); | |
3390 | const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); | |
3391 | unsigned DefAlign = !DefMN->memoperands_empty() | |
3392 | ? (*DefMN->memoperands_begin())->getAlignment() : 0; | |
3393 | const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); | |
3394 | unsigned UseAlign = !UseMN->memoperands_empty() | |
3395 | ? (*UseMN->memoperands_begin())->getAlignment() : 0; | |
3396 | int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, | |
3397 | UseMCID, UseIdx, UseAlign); | |
3398 | ||
3399 | if (Latency > 1 && | |
3400 | (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { | |
3401 | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] | |
3402 | // variants are one cycle cheaper. | |
3403 | switch (DefMCID.getOpcode()) { | |
3404 | default: break; | |
3405 | case ARM::LDRrs: | |
3406 | case ARM::LDRBrs: { | |
3407 | unsigned ShOpVal = | |
3408 | cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); | |
3409 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); | |
3410 | if (ShImm == 0 || | |
3411 | (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) | |
3412 | --Latency; | |
3413 | break; | |
3414 | } | |
3415 | case ARM::t2LDRs: | |
3416 | case ARM::t2LDRBs: | |
3417 | case ARM::t2LDRHs: | |
3418 | case ARM::t2LDRSHs: { | |
3419 | // Thumb2 mode: lsl only. | |
3420 | unsigned ShAmt = | |
3421 | cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); | |
3422 | if (ShAmt == 0 || ShAmt == 2) | |
3423 | --Latency; | |
3424 | break; | |
3425 | } | |
3426 | } | |
3427 | } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { | |
3428 | // FIXME: Properly handle all of the latency adjustments for address | |
3429 | // writeback. | |
3430 | switch (DefMCID.getOpcode()) { | |
3431 | default: break; | |
3432 | case ARM::LDRrs: | |
3433 | case ARM::LDRBrs: { | |
3434 | unsigned ShOpVal = | |
3435 | cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); | |
3436 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); | |
3437 | if (ShImm == 0 || | |
3438 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && | |
3439 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) | |
3440 | Latency -= 2; | |
3441 | else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) | |
3442 | --Latency; | |
3443 | break; | |
3444 | } | |
3445 | case ARM::t2LDRs: | |
3446 | case ARM::t2LDRBs: | |
3447 | case ARM::t2LDRHs: | |
3448 | case ARM::t2LDRSHs: { | |
3449 | // Thumb2 mode: lsl 0-3 only. | |
3450 | Latency -= 2; | |
3451 | break; | |
3452 | } | |
3453 | } | |
3454 | } | |
3455 | ||
3456 | if (DefAlign < 8 && Subtarget.isLikeA9()) | |
3457 | switch (DefMCID.getOpcode()) { | |
3458 | default: break; | |
3459 | case ARM::VLD1q8: | |
3460 | case ARM::VLD1q16: | |
3461 | case ARM::VLD1q32: | |
3462 | case ARM::VLD1q64: | |
3463 | case ARM::VLD1q8wb_register: | |
3464 | case ARM::VLD1q16wb_register: | |
3465 | case ARM::VLD1q32wb_register: | |
3466 | case ARM::VLD1q64wb_register: | |
3467 | case ARM::VLD1q8wb_fixed: | |
3468 | case ARM::VLD1q16wb_fixed: | |
3469 | case ARM::VLD1q32wb_fixed: | |
3470 | case ARM::VLD1q64wb_fixed: | |
3471 | case ARM::VLD2d8: | |
3472 | case ARM::VLD2d16: | |
3473 | case ARM::VLD2d32: | |
3474 | case ARM::VLD2q8Pseudo: | |
3475 | case ARM::VLD2q16Pseudo: | |
3476 | case ARM::VLD2q32Pseudo: | |
3477 | case ARM::VLD2d8wb_fixed: | |
3478 | case ARM::VLD2d16wb_fixed: | |
3479 | case ARM::VLD2d32wb_fixed: | |
3480 | case ARM::VLD2q8PseudoWB_fixed: | |
3481 | case ARM::VLD2q16PseudoWB_fixed: | |
3482 | case ARM::VLD2q32PseudoWB_fixed: | |
3483 | case ARM::VLD2d8wb_register: | |
3484 | case ARM::VLD2d16wb_register: | |
3485 | case ARM::VLD2d32wb_register: | |
3486 | case ARM::VLD2q8PseudoWB_register: | |
3487 | case ARM::VLD2q16PseudoWB_register: | |
3488 | case ARM::VLD2q32PseudoWB_register: | |
3489 | case ARM::VLD3d8Pseudo: | |
3490 | case ARM::VLD3d16Pseudo: | |
3491 | case ARM::VLD3d32Pseudo: | |
3492 | case ARM::VLD1d64TPseudo: | |
3493 | case ARM::VLD3d8Pseudo_UPD: | |
3494 | case ARM::VLD3d16Pseudo_UPD: | |
3495 | case ARM::VLD3d32Pseudo_UPD: | |
3496 | case ARM::VLD3q8Pseudo_UPD: | |
3497 | case ARM::VLD3q16Pseudo_UPD: | |
3498 | case ARM::VLD3q32Pseudo_UPD: | |
3499 | case ARM::VLD3q8oddPseudo: | |
3500 | case ARM::VLD3q16oddPseudo: | |
3501 | case ARM::VLD3q32oddPseudo: | |
3502 | case ARM::VLD3q8oddPseudo_UPD: | |
3503 | case ARM::VLD3q16oddPseudo_UPD: | |
3504 | case ARM::VLD3q32oddPseudo_UPD: | |
3505 | case ARM::VLD4d8Pseudo: | |
3506 | case ARM::VLD4d16Pseudo: | |
3507 | case ARM::VLD4d32Pseudo: | |
3508 | case ARM::VLD1d64QPseudo: | |
3509 | case ARM::VLD4d8Pseudo_UPD: | |
3510 | case ARM::VLD4d16Pseudo_UPD: | |
3511 | case ARM::VLD4d32Pseudo_UPD: | |
3512 | case ARM::VLD4q8Pseudo_UPD: | |
3513 | case ARM::VLD4q16Pseudo_UPD: | |
3514 | case ARM::VLD4q32Pseudo_UPD: | |
3515 | case ARM::VLD4q8oddPseudo: | |
3516 | case ARM::VLD4q16oddPseudo: | |
3517 | case ARM::VLD4q32oddPseudo: | |
3518 | case ARM::VLD4q8oddPseudo_UPD: | |
3519 | case ARM::VLD4q16oddPseudo_UPD: | |
3520 | case ARM::VLD4q32oddPseudo_UPD: | |
3521 | case ARM::VLD1DUPq8: | |
3522 | case ARM::VLD1DUPq16: | |
3523 | case ARM::VLD1DUPq32: | |
3524 | case ARM::VLD1DUPq8wb_fixed: | |
3525 | case ARM::VLD1DUPq16wb_fixed: | |
3526 | case ARM::VLD1DUPq32wb_fixed: | |
3527 | case ARM::VLD1DUPq8wb_register: | |
3528 | case ARM::VLD1DUPq16wb_register: | |
3529 | case ARM::VLD1DUPq32wb_register: | |
3530 | case ARM::VLD2DUPd8: | |
3531 | case ARM::VLD2DUPd16: | |
3532 | case ARM::VLD2DUPd32: | |
3533 | case ARM::VLD2DUPd8wb_fixed: | |
3534 | case ARM::VLD2DUPd16wb_fixed: | |
3535 | case ARM::VLD2DUPd32wb_fixed: | |
3536 | case ARM::VLD2DUPd8wb_register: | |
3537 | case ARM::VLD2DUPd16wb_register: | |
3538 | case ARM::VLD2DUPd32wb_register: | |
3539 | case ARM::VLD4DUPd8Pseudo: | |
3540 | case ARM::VLD4DUPd16Pseudo: | |
3541 | case ARM::VLD4DUPd32Pseudo: | |
3542 | case ARM::VLD4DUPd8Pseudo_UPD: | |
3543 | case ARM::VLD4DUPd16Pseudo_UPD: | |
3544 | case ARM::VLD4DUPd32Pseudo_UPD: | |
3545 | case ARM::VLD1LNq8Pseudo: | |
3546 | case ARM::VLD1LNq16Pseudo: | |
3547 | case ARM::VLD1LNq32Pseudo: | |
3548 | case ARM::VLD1LNq8Pseudo_UPD: | |
3549 | case ARM::VLD1LNq16Pseudo_UPD: | |
3550 | case ARM::VLD1LNq32Pseudo_UPD: | |
3551 | case ARM::VLD2LNd8Pseudo: | |
3552 | case ARM::VLD2LNd16Pseudo: | |
3553 | case ARM::VLD2LNd32Pseudo: | |
3554 | case ARM::VLD2LNq16Pseudo: | |
3555 | case ARM::VLD2LNq32Pseudo: | |
3556 | case ARM::VLD2LNd8Pseudo_UPD: | |
3557 | case ARM::VLD2LNd16Pseudo_UPD: | |
3558 | case ARM::VLD2LNd32Pseudo_UPD: | |
3559 | case ARM::VLD2LNq16Pseudo_UPD: | |
3560 | case ARM::VLD2LNq32Pseudo_UPD: | |
3561 | case ARM::VLD4LNd8Pseudo: | |
3562 | case ARM::VLD4LNd16Pseudo: | |
3563 | case ARM::VLD4LNd32Pseudo: | |
3564 | case ARM::VLD4LNq16Pseudo: | |
3565 | case ARM::VLD4LNq32Pseudo: | |
3566 | case ARM::VLD4LNd8Pseudo_UPD: | |
3567 | case ARM::VLD4LNd16Pseudo_UPD: | |
3568 | case ARM::VLD4LNd32Pseudo_UPD: | |
3569 | case ARM::VLD4LNq16Pseudo_UPD: | |
3570 | case ARM::VLD4LNq32Pseudo_UPD: | |
3571 | // If the address is not 64-bit aligned, the latencies of these | |
3572 | // instructions increases by one. | |
3573 | ++Latency; | |
3574 | break; | |
3575 | } | |
3576 | ||
3577 | return Latency; | |
3578 | } | |
3579 | ||
223e47cc LB |
3580 | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
3581 | const MachineInstr *MI, | |
3582 | unsigned *PredCost) const { | |
3583 | if (MI->isCopyLike() || MI->isInsertSubreg() || | |
3584 | MI->isRegSequence() || MI->isImplicitDef()) | |
3585 | return 1; | |
3586 | ||
3587 | // An instruction scheduler typically runs on unbundled instructions, however | |
3588 | // other passes may query the latency of a bundled instruction. | |
3589 | if (MI->isBundle()) { | |
3590 | unsigned Latency = 0; | |
3591 | MachineBasicBlock::const_instr_iterator I = MI; | |
3592 | MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); | |
3593 | while (++I != E && I->isInsideBundle()) { | |
3594 | if (I->getOpcode() != ARM::t2IT) | |
3595 | Latency += getInstrLatency(ItinData, I, PredCost); | |
3596 | } | |
3597 | return Latency; | |
3598 | } | |
3599 | ||
3600 | const MCInstrDesc &MCID = MI->getDesc(); | |
3601 | if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { | |
3602 | // When predicated, CPSR is an additional source operand for CPSR updating | |
3603 | // instructions, this apparently increases their latencies. | |
3604 | *PredCost = 1; | |
3605 | } | |
3606 | // Be sure to call getStageLatency for an empty itinerary in case it has a | |
3607 | // valid MinLatency property. | |
3608 | if (!ItinData) | |
3609 | return MI->mayLoad() ? 3 : 1; | |
3610 | ||
3611 | unsigned Class = MCID.getSchedClass(); | |
3612 | ||
3613 | // For instructions with variable uops, use uops as latency. | |
3614 | if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) | |
3615 | return getNumMicroOps(ItinData, MI); | |
3616 | ||
3617 | // For the common case, fall back on the itinerary's latency. | |
3618 | unsigned Latency = ItinData->getStageLatency(Class); | |
3619 | ||
3620 | // Adjust for dynamic def-side opcode variants not captured by the itinerary. | |
3621 | unsigned DefAlign = MI->hasOneMemOperand() | |
3622 | ? (*MI->memoperands_begin())->getAlignment() : 0; | |
3623 | int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign); | |
3624 | if (Adj >= 0 || (int)Latency > -Adj) { | |
3625 | return Latency + Adj; | |
3626 | } | |
3627 | return Latency; | |
3628 | } | |
3629 | ||
3630 | int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, | |
3631 | SDNode *Node) const { | |
3632 | if (!Node->isMachineOpcode()) | |
3633 | return 1; | |
3634 | ||
3635 | if (!ItinData || ItinData->isEmpty()) | |
3636 | return 1; | |
3637 | ||
3638 | unsigned Opcode = Node->getMachineOpcode(); | |
3639 | switch (Opcode) { | |
3640 | default: | |
3641 | return ItinData->getStageLatency(get(Opcode).getSchedClass()); | |
3642 | case ARM::VLDMQIA: | |
3643 | case ARM::VSTMQIA: | |
3644 | return 2; | |
3645 | } | |
3646 | } | |
3647 | ||
3648 | bool ARMBaseInstrInfo:: | |
3649 | hasHighOperandLatency(const InstrItineraryData *ItinData, | |
3650 | const MachineRegisterInfo *MRI, | |
3651 | const MachineInstr *DefMI, unsigned DefIdx, | |
3652 | const MachineInstr *UseMI, unsigned UseIdx) const { | |
3653 | unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; | |
3654 | unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; | |
3655 | if (Subtarget.isCortexA8() && | |
3656 | (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) | |
3657 | // CortexA8 VFP instructions are not pipelined. | |
3658 | return true; | |
3659 | ||
3660 | // Hoist VFP / NEON instructions with 4 or higher latency. | |
3661 | int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx, | |
3662 | /*FindMin=*/false); | |
3663 | if (Latency < 0) | |
3664 | Latency = getInstrLatency(ItinData, DefMI); | |
3665 | if (Latency <= 3) | |
3666 | return false; | |
3667 | return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || | |
3668 | UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; | |
3669 | } | |
3670 | ||
3671 | bool ARMBaseInstrInfo:: | |
3672 | hasLowDefLatency(const InstrItineraryData *ItinData, | |
3673 | const MachineInstr *DefMI, unsigned DefIdx) const { | |
3674 | if (!ItinData || ItinData->isEmpty()) | |
3675 | return false; | |
3676 | ||
3677 | unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; | |
3678 | if (DDomain == ARMII::DomainGeneral) { | |
3679 | unsigned DefClass = DefMI->getDesc().getSchedClass(); | |
3680 | int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); | |
3681 | return (DefCycle != -1 && DefCycle <= 2); | |
3682 | } | |
3683 | return false; | |
3684 | } | |
3685 | ||
3686 | bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, | |
3687 | StringRef &ErrInfo) const { | |
3688 | if (convertAddSubFlagsOpcode(MI->getOpcode())) { | |
3689 | ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; | |
3690 | return false; | |
3691 | } | |
3692 | return true; | |
3693 | } | |
3694 | ||
3695 | bool | |
3696 | ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, | |
3697 | unsigned &AddSubOpc, | |
3698 | bool &NegAcc, bool &HasLane) const { | |
3699 | DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); | |
3700 | if (I == MLxEntryMap.end()) | |
3701 | return false; | |
3702 | ||
3703 | const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; | |
3704 | MulOpc = Entry.MulOpc; | |
3705 | AddSubOpc = Entry.AddSubOpc; | |
3706 | NegAcc = Entry.NegAcc; | |
3707 | HasLane = Entry.HasLane; | |
3708 | return true; | |
3709 | } | |
3710 | ||
3711 | //===----------------------------------------------------------------------===// | |
3712 | // Execution domains. | |
3713 | //===----------------------------------------------------------------------===// | |
3714 | // | |
3715 | // Some instructions go down the NEON pipeline, some go down the VFP pipeline, | |
3716 | // and some can go down both. The vmov instructions go down the VFP pipeline, | |
3717 | // but they can be changed to vorr equivalents that are executed by the NEON | |
3718 | // pipeline. | |
3719 | // | |
3720 | // We use the following execution domain numbering: | |
3721 | // | |
3722 | enum ARMExeDomain { | |
3723 | ExeGeneric = 0, | |
3724 | ExeVFP = 1, | |
3725 | ExeNEON = 2 | |
3726 | }; | |
3727 | // | |
3728 | // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h | |
3729 | // | |
3730 | std::pair<uint16_t, uint16_t> | |
3731 | ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { | |
3732 | // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON | |
3733 | // if they are not predicated. | |
3734 | if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) | |
3735 | return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); | |
3736 | ||
3737 | // A9-like cores are particularly picky about mixing the two and want these | |
3738 | // converted. | |
3739 | if (Subtarget.isLikeA9() && !isPredicated(MI) && | |
3740 | (MI->getOpcode() == ARM::VMOVRS || | |
3741 | MI->getOpcode() == ARM::VMOVSR || | |
3742 | MI->getOpcode() == ARM::VMOVS)) | |
3743 | return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); | |
3744 | ||
3745 | // No other instructions can be swizzled, so just determine their domain. | |
3746 | unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; | |
3747 | ||
3748 | if (Domain & ARMII::DomainNEON) | |
3749 | return std::make_pair(ExeNEON, 0); | |
3750 | ||
3751 | // Certain instructions can go either way on Cortex-A8. | |
3752 | // Treat them as NEON instructions. | |
3753 | if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) | |
3754 | return std::make_pair(ExeNEON, 0); | |
3755 | ||
3756 | if (Domain & ARMII::DomainVFP) | |
3757 | return std::make_pair(ExeVFP, 0); | |
3758 | ||
3759 | return std::make_pair(ExeGeneric, 0); | |
3760 | } | |
3761 | ||
3762 | static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, | |
3763 | unsigned SReg, unsigned &Lane) { | |
3764 | unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); | |
3765 | Lane = 0; | |
3766 | ||
3767 | if (DReg != ARM::NoRegister) | |
3768 | return DReg; | |
3769 | ||
3770 | Lane = 1; | |
3771 | DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); | |
3772 | ||
3773 | assert(DReg && "S-register with no D super-register?"); | |
3774 | return DReg; | |
3775 | } | |
3776 | ||
970d7e83 | 3777 | /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, |
223e47cc LB |
3778 | /// set ImplicitSReg to a register number that must be marked as implicit-use or |
3779 | /// zero if no register needs to be defined as implicit-use. | |
3780 | /// | |
3781 | /// If the function cannot determine if an SPR should be marked implicit use or | |
3782 | /// not, it returns false. | |
3783 | /// | |
3784 | /// This function handles cases where an instruction is being modified from taking | |
970d7e83 | 3785 | /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict |
223e47cc LB |
3786 | /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other |
3787 | /// lane of the DPR). | |
3788 | /// | |
3789 | /// If the other SPR is defined, an implicit-use of it should be added. Else, | |
3790 | /// (including the case where the DPR itself is defined), it should not. | |
970d7e83 | 3791 | /// |
223e47cc LB |
3792 | static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, |
3793 | MachineInstr *MI, | |
3794 | unsigned DReg, unsigned Lane, | |
3795 | unsigned &ImplicitSReg) { | |
3796 | // If the DPR is defined or used already, the other SPR lane will be chained | |
3797 | // correctly, so there is nothing to be done. | |
3798 | if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) { | |
3799 | ImplicitSReg = 0; | |
3800 | return true; | |
3801 | } | |
3802 | ||
3803 | // Otherwise we need to go searching to see if the SPR is set explicitly. | |
3804 | ImplicitSReg = TRI->getSubReg(DReg, | |
3805 | (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); | |
3806 | MachineBasicBlock::LivenessQueryResult LQR = | |
3807 | MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); | |
3808 | ||
3809 | if (LQR == MachineBasicBlock::LQR_Live) | |
3810 | return true; | |
3811 | else if (LQR == MachineBasicBlock::LQR_Unknown) | |
3812 | return false; | |
3813 | ||
3814 | // If the register is known not to be live, there is no need to add an | |
3815 | // implicit-use. | |
3816 | ImplicitSReg = 0; | |
3817 | return true; | |
3818 | } | |
3819 | ||
3820 | void | |
3821 | ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { | |
3822 | unsigned DstReg, SrcReg, DReg; | |
3823 | unsigned Lane; | |
970d7e83 | 3824 | MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); |
223e47cc LB |
3825 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
3826 | switch (MI->getOpcode()) { | |
3827 | default: | |
3828 | llvm_unreachable("cannot handle opcode!"); | |
3829 | break; | |
3830 | case ARM::VMOVD: | |
3831 | if (Domain != ExeNEON) | |
3832 | break; | |
3833 | ||
3834 | // Zap the predicate operands. | |
3835 | assert(!isPredicated(MI) && "Cannot predicate a VORRd"); | |
3836 | ||
3837 | // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) | |
3838 | DstReg = MI->getOperand(0).getReg(); | |
3839 | SrcReg = MI->getOperand(1).getReg(); | |
3840 | ||
3841 | for (unsigned i = MI->getDesc().getNumOperands(); i; --i) | |
3842 | MI->RemoveOperand(i-1); | |
3843 | ||
3844 | // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) | |
3845 | MI->setDesc(get(ARM::VORRd)); | |
3846 | AddDefaultPred(MIB.addReg(DstReg, RegState::Define) | |
3847 | .addReg(SrcReg) | |
3848 | .addReg(SrcReg)); | |
3849 | break; | |
3850 | case ARM::VMOVRS: | |
3851 | if (Domain != ExeNEON) | |
3852 | break; | |
3853 | assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); | |
3854 | ||
3855 | // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) | |
3856 | DstReg = MI->getOperand(0).getReg(); | |
3857 | SrcReg = MI->getOperand(1).getReg(); | |
3858 | ||
3859 | for (unsigned i = MI->getDesc().getNumOperands(); i; --i) | |
3860 | MI->RemoveOperand(i-1); | |
3861 | ||
3862 | DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); | |
3863 | ||
3864 | // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) | |
3865 | // Note that DSrc has been widened and the other lane may be undef, which | |
3866 | // contaminates the entire register. | |
3867 | MI->setDesc(get(ARM::VGETLNi32)); | |
3868 | AddDefaultPred(MIB.addReg(DstReg, RegState::Define) | |
3869 | .addReg(DReg, RegState::Undef) | |
3870 | .addImm(Lane)); | |
3871 | ||
3872 | // The old source should be an implicit use, otherwise we might think it | |
3873 | // was dead before here. | |
3874 | MIB.addReg(SrcReg, RegState::Implicit); | |
3875 | break; | |
3876 | case ARM::VMOVSR: { | |
3877 | if (Domain != ExeNEON) | |
3878 | break; | |
3879 | assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); | |
3880 | ||
3881 | // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) | |
3882 | DstReg = MI->getOperand(0).getReg(); | |
3883 | SrcReg = MI->getOperand(1).getReg(); | |
3884 | ||
3885 | DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); | |
3886 | ||
3887 | unsigned ImplicitSReg; | |
3888 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) | |
3889 | break; | |
3890 | ||
3891 | for (unsigned i = MI->getDesc().getNumOperands(); i; --i) | |
3892 | MI->RemoveOperand(i-1); | |
3893 | ||
3894 | // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) | |
3895 | // Again DDst may be undefined at the beginning of this instruction. | |
3896 | MI->setDesc(get(ARM::VSETLNi32)); | |
3897 | MIB.addReg(DReg, RegState::Define) | |
3898 | .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) | |
3899 | .addReg(SrcReg) | |
3900 | .addImm(Lane); | |
3901 | AddDefaultPred(MIB); | |
3902 | ||
3903 | // The narrower destination must be marked as set to keep previous chains | |
3904 | // in place. | |
3905 | MIB.addReg(DstReg, RegState::Define | RegState::Implicit); | |
3906 | if (ImplicitSReg != 0) | |
3907 | MIB.addReg(ImplicitSReg, RegState::Implicit); | |
3908 | break; | |
3909 | } | |
3910 | case ARM::VMOVS: { | |
3911 | if (Domain != ExeNEON) | |
3912 | break; | |
3913 | ||
3914 | // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) | |
3915 | DstReg = MI->getOperand(0).getReg(); | |
3916 | SrcReg = MI->getOperand(1).getReg(); | |
3917 | ||
3918 | unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; | |
3919 | DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); | |
3920 | DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); | |
3921 | ||
3922 | unsigned ImplicitSReg; | |
3923 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) | |
3924 | break; | |
3925 | ||
3926 | for (unsigned i = MI->getDesc().getNumOperands(); i; --i) | |
3927 | MI->RemoveOperand(i-1); | |
3928 | ||
3929 | if (DSrc == DDst) { | |
3930 | // Destination can be: | |
3931 | // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) | |
3932 | MI->setDesc(get(ARM::VDUPLN32d)); | |
3933 | MIB.addReg(DDst, RegState::Define) | |
3934 | .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) | |
3935 | .addImm(SrcLane); | |
3936 | AddDefaultPred(MIB); | |
3937 | ||
3938 | // Neither the source or the destination are naturally represented any | |
3939 | // more, so add them in manually. | |
3940 | MIB.addReg(DstReg, RegState::Implicit | RegState::Define); | |
3941 | MIB.addReg(SrcReg, RegState::Implicit); | |
3942 | if (ImplicitSReg != 0) | |
3943 | MIB.addReg(ImplicitSReg, RegState::Implicit); | |
3944 | break; | |
3945 | } | |
3946 | ||
3947 | // In general there's no single instruction that can perform an S <-> S | |
3948 | // move in NEON space, but a pair of VEXT instructions *can* do the | |
3949 | // job. It turns out that the VEXTs needed will only use DSrc once, with | |
3950 | // the position based purely on the combination of lane-0 and lane-1 | |
3951 | // involved. For example | |
3952 | // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 | |
3953 | // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 | |
3954 | // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 | |
3955 | // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 | |
3956 | // | |
3957 | // Pattern of the MachineInstrs is: | |
3958 | // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) | |
3959 | MachineInstrBuilder NewMIB; | |
3960 | NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), | |
3961 | get(ARM::VEXTd32), DDst); | |
3962 | ||
3963 | // On the first instruction, both DSrc and DDst may be <undef> if present. | |
3964 | // Specifically when the original instruction didn't have them as an | |
3965 | // <imp-use>. | |
3966 | unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; | |
3967 | bool CurUndef = !MI->readsRegister(CurReg, TRI); | |
3968 | NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); | |
3969 | ||
3970 | CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; | |
3971 | CurUndef = !MI->readsRegister(CurReg, TRI); | |
3972 | NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); | |
3973 | ||
3974 | NewMIB.addImm(1); | |
3975 | AddDefaultPred(NewMIB); | |
3976 | ||
3977 | if (SrcLane == DstLane) | |
3978 | NewMIB.addReg(SrcReg, RegState::Implicit); | |
3979 | ||
3980 | MI->setDesc(get(ARM::VEXTd32)); | |
3981 | MIB.addReg(DDst, RegState::Define); | |
3982 | ||
3983 | // On the second instruction, DDst has definitely been defined above, so | |
3984 | // it is not <undef>. DSrc, if present, can be <undef> as above. | |
3985 | CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; | |
3986 | CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); | |
3987 | MIB.addReg(CurReg, getUndefRegState(CurUndef)); | |
3988 | ||
3989 | CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; | |
3990 | CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); | |
3991 | MIB.addReg(CurReg, getUndefRegState(CurUndef)); | |
3992 | ||
3993 | MIB.addImm(1); | |
3994 | AddDefaultPred(MIB); | |
3995 | ||
3996 | if (SrcLane != DstLane) | |
3997 | MIB.addReg(SrcReg, RegState::Implicit); | |
3998 | ||
3999 | // As before, the original destination is no longer represented, add it | |
4000 | // implicitly. | |
4001 | MIB.addReg(DstReg, RegState::Define | RegState::Implicit); | |
4002 | if (ImplicitSReg != 0) | |
4003 | MIB.addReg(ImplicitSReg, RegState::Implicit); | |
4004 | break; | |
4005 | } | |
4006 | } | |
4007 | ||
4008 | } | |
4009 | ||
4010 | //===----------------------------------------------------------------------===// | |
4011 | // Partial register updates | |
4012 | //===----------------------------------------------------------------------===// | |
4013 | // | |
4014 | // Swift renames NEON registers with 64-bit granularity. That means any | |
4015 | // instruction writing an S-reg implicitly reads the containing D-reg. The | |
4016 | // problem is mostly avoided by translating f32 operations to v2f32 operations | |
4017 | // on D-registers, but f32 loads are still a problem. | |
4018 | // | |
4019 | // These instructions can load an f32 into a NEON register: | |
4020 | // | |
4021 | // VLDRS - Only writes S, partial D update. | |
4022 | // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. | |
4023 | // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. | |
4024 | // | |
4025 | // FCONSTD can be used as a dependency-breaking instruction. | |
4026 | ||
4027 | ||
4028 | unsigned ARMBaseInstrInfo:: | |
4029 | getPartialRegUpdateClearance(const MachineInstr *MI, | |
4030 | unsigned OpNum, | |
4031 | const TargetRegisterInfo *TRI) const { | |
4032 | // Only Swift has partial register update problems. | |
4033 | if (!SwiftPartialUpdateClearance || !Subtarget.isSwift()) | |
4034 | return 0; | |
4035 | ||
4036 | assert(TRI && "Need TRI instance"); | |
4037 | ||
4038 | const MachineOperand &MO = MI->getOperand(OpNum); | |
4039 | if (MO.readsReg()) | |
4040 | return 0; | |
4041 | unsigned Reg = MO.getReg(); | |
4042 | int UseOp = -1; | |
4043 | ||
4044 | switch(MI->getOpcode()) { | |
4045 | // Normal instructions writing only an S-register. | |
4046 | case ARM::VLDRS: | |
4047 | case ARM::FCONSTS: | |
4048 | case ARM::VMOVSR: | |
223e47cc LB |
4049 | case ARM::VMOVv8i8: |
4050 | case ARM::VMOVv4i16: | |
4051 | case ARM::VMOVv2i32: | |
4052 | case ARM::VMOVv2f32: | |
4053 | case ARM::VMOVv1i64: | |
4054 | UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); | |
4055 | break; | |
4056 | ||
4057 | // Explicitly reads the dependency. | |
4058 | case ARM::VLD1LNd32: | |
4059 | UseOp = 1; | |
4060 | break; | |
4061 | default: | |
4062 | return 0; | |
4063 | } | |
4064 | ||
4065 | // If this instruction actually reads a value from Reg, there is no unwanted | |
4066 | // dependency. | |
4067 | if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) | |
4068 | return 0; | |
4069 | ||
4070 | // We must be able to clobber the whole D-reg. | |
4071 | if (TargetRegisterInfo::isVirtualRegister(Reg)) { | |
4072 | // Virtual register must be a foo:ssub_0<def,undef> operand. | |
4073 | if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) | |
4074 | return 0; | |
4075 | } else if (ARM::SPRRegClass.contains(Reg)) { | |
4076 | // Physical register: MI must define the full D-reg. | |
4077 | unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, | |
4078 | &ARM::DPRRegClass); | |
4079 | if (!DReg || !MI->definesRegister(DReg, TRI)) | |
4080 | return 0; | |
4081 | } | |
4082 | ||
4083 | // MI has an unwanted D-register dependency. | |
4084 | // Avoid defs in the previous N instructrions. | |
4085 | return SwiftPartialUpdateClearance; | |
4086 | } | |
4087 | ||
4088 | // Break a partial register dependency after getPartialRegUpdateClearance | |
4089 | // returned non-zero. | |
4090 | void ARMBaseInstrInfo:: | |
4091 | breakPartialRegDependency(MachineBasicBlock::iterator MI, | |
4092 | unsigned OpNum, | |
4093 | const TargetRegisterInfo *TRI) const { | |
4094 | assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); | |
4095 | assert(TRI && "Need TRI instance"); | |
4096 | ||
4097 | const MachineOperand &MO = MI->getOperand(OpNum); | |
4098 | unsigned Reg = MO.getReg(); | |
4099 | assert(TargetRegisterInfo::isPhysicalRegister(Reg) && | |
4100 | "Can't break virtual register dependencies."); | |
4101 | unsigned DReg = Reg; | |
4102 | ||
4103 | // If MI defines an S-reg, find the corresponding D super-register. | |
4104 | if (ARM::SPRRegClass.contains(Reg)) { | |
4105 | DReg = ARM::D0 + (Reg - ARM::S0) / 2; | |
4106 | assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); | |
4107 | } | |
4108 | ||
4109 | assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); | |
4110 | assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); | |
4111 | ||
4112 | // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines | |
4113 | // the full D-register by loading the same value to both lanes. The | |
4114 | // instruction is micro-coded with 2 uops, so don't do this until we can | |
4115 | // properly schedule micro-coded instuctions. The dispatcher stalls cause | |
4116 | // too big regressions. | |
4117 | ||
4118 | // Insert the dependency-breaking FCONSTD before MI. | |
4119 | // 96 is the encoding of 0.5, but the actual value doesn't matter here. | |
4120 | AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), | |
4121 | get(ARM::FCONSTD), DReg).addImm(96)); | |
4122 | MI->addRegisterKilled(DReg, TRI, true); | |
4123 | } | |
4124 | ||
4125 | bool ARMBaseInstrInfo::hasNOP() const { | |
4126 | return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; | |
4127 | } |