]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file implements a wrapper around MCSchedModel that allows the interface | |
11 | // to benefit from information currently only available in TargetInstrInfo. | |
12 | // | |
13 | //===----------------------------------------------------------------------===// | |
14 | ||
15 | #include "llvm/CodeGen/TargetSchedule.h" | |
970d7e83 LB |
16 | #include "llvm/Support/CommandLine.h" |
17 | #include "llvm/Support/raw_ostream.h" | |
223e47cc LB |
18 | #include "llvm/Target/TargetInstrInfo.h" |
19 | #include "llvm/Target/TargetRegisterInfo.h" | |
20 | #include "llvm/Target/TargetSubtargetInfo.h" | |
223e47cc LB |
21 | |
22 | using namespace llvm; | |
23 | ||
970d7e83 | 24 | static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), |
223e47cc LB |
25 | cl::desc("Use TargetSchedModel for latency lookup")); |
26 | ||
27 | static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true), | |
28 | cl::desc("Use InstrItineraryData for latency lookup")); | |
29 | ||
970d7e83 LB |
30 | bool TargetSchedModel::hasInstrSchedModel() const { |
31 | return EnableSchedModel && SchedModel.hasInstrSchedModel(); | |
32 | } | |
33 | ||
34 | bool TargetSchedModel::hasInstrItineraries() const { | |
35 | return EnableSchedItins && !InstrItins.isEmpty(); | |
36 | } | |
37 | ||
38 | static unsigned gcd(unsigned Dividend, unsigned Divisor) { | |
39 | // Dividend and Divisor will be naturally swapped as needed. | |
40 | while(Divisor) { | |
41 | unsigned Rem = Dividend % Divisor; | |
42 | Dividend = Divisor; | |
43 | Divisor = Rem; | |
44 | }; | |
45 | return Dividend; | |
46 | } | |
47 | static unsigned lcm(unsigned A, unsigned B) { | |
48 | unsigned LCM = (uint64_t(A) * B) / gcd(A, B); | |
49 | assert((LCM >= A && LCM >= B) && "LCM overflow"); | |
50 | return LCM; | |
51 | } | |
52 | ||
223e47cc LB |
53 | void TargetSchedModel::init(const MCSchedModel &sm, |
54 | const TargetSubtargetInfo *sti, | |
55 | const TargetInstrInfo *tii) { | |
56 | SchedModel = sm; | |
57 | STI = sti; | |
58 | TII = tii; | |
59 | STI->initInstrItins(InstrItins); | |
970d7e83 LB |
60 | |
61 | unsigned NumRes = SchedModel.getNumProcResourceKinds(); | |
62 | ResourceFactors.resize(NumRes); | |
63 | ResourceLCM = SchedModel.IssueWidth; | |
64 | for (unsigned Idx = 0; Idx < NumRes; ++Idx) { | |
65 | unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; | |
66 | if (NumUnits > 0) | |
67 | ResourceLCM = lcm(ResourceLCM, NumUnits); | |
68 | } | |
69 | MicroOpFactor = ResourceLCM / SchedModel.IssueWidth; | |
70 | for (unsigned Idx = 0; Idx < NumRes; ++Idx) { | |
71 | unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; | |
72 | ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0; | |
73 | } | |
74 | } | |
75 | ||
76 | unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, | |
77 | const MCSchedClassDesc *SC) const { | |
78 | if (hasInstrItineraries()) { | |
79 | int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass()); | |
80 | return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI); | |
81 | } | |
82 | if (hasInstrSchedModel()) { | |
83 | if (!SC) | |
84 | SC = resolveSchedClass(MI); | |
85 | if (SC->isValid()) | |
86 | return SC->NumMicroOps; | |
87 | } | |
88 | return MI->isTransient() ? 0 : 1; | |
89 | } | |
90 | ||
91 | // The machine model may explicitly specify an invalid latency, which | |
92 | // effectively means infinite latency. Since users of the TargetSchedule API | |
93 | // don't know how to handle this, we convert it to a very large latency that is | |
94 | // easy to distinguish when debugging the DAG but won't induce overflow. | |
1a4d82fc | 95 | static unsigned capLatency(int Cycles) { |
970d7e83 | 96 | return Cycles >= 0 ? Cycles : 1000; |
223e47cc LB |
97 | } |
98 | ||
223e47cc LB |
99 | /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require |
100 | /// evaluation of predicates that depend on instruction operands or flags. | |
101 | const MCSchedClassDesc *TargetSchedModel:: | |
102 | resolveSchedClass(const MachineInstr *MI) const { | |
103 | ||
104 | // Get the definition's scheduling class descriptor from this machine model. | |
105 | unsigned SchedClass = MI->getDesc().getSchedClass(); | |
106 | const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); | |
1a4d82fc JJ |
107 | if (!SCDesc->isValid()) |
108 | return SCDesc; | |
223e47cc LB |
109 | |
110 | #ifndef NDEBUG | |
111 | unsigned NIter = 0; | |
112 | #endif | |
113 | while (SCDesc->isVariant()) { | |
114 | assert(++NIter < 6 && "Variants are nested deeper than the magic number"); | |
115 | ||
116 | SchedClass = STI->resolveSchedClass(SchedClass, MI, this); | |
117 | SCDesc = SchedModel.getSchedClassDesc(SchedClass); | |
118 | } | |
119 | return SCDesc; | |
120 | } | |
121 | ||
122 | /// Find the def index of this operand. This index maps to the machine model and | |
123 | /// is independent of use operands. Def operands may be reordered with uses or | |
124 | /// merged with uses without affecting the def index (e.g. before/after | |
125 | /// regalloc). However, an instruction's def operands must never be reordered | |
126 | /// with respect to each other. | |
127 | static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) { | |
128 | unsigned DefIdx = 0; | |
129 | for (unsigned i = 0; i != DefOperIdx; ++i) { | |
130 | const MachineOperand &MO = MI->getOperand(i); | |
131 | if (MO.isReg() && MO.isDef()) | |
132 | ++DefIdx; | |
133 | } | |
134 | return DefIdx; | |
135 | } | |
136 | ||
137 | /// Find the use index of this operand. This is independent of the instruction's | |
138 | /// def operands. | |
139 | /// | |
140 | /// Note that uses are not determined by the operand's isUse property, which | |
141 | /// is simply the inverse of isDef. Here we consider any readsReg operand to be | |
142 | /// a "use". The machine model allows an operand to be both a Def and Use. | |
143 | static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { | |
144 | unsigned UseIdx = 0; | |
145 | for (unsigned i = 0; i != UseOperIdx; ++i) { | |
146 | const MachineOperand &MO = MI->getOperand(i); | |
147 | if (MO.isReg() && MO.readsReg()) | |
148 | ++UseIdx; | |
149 | } | |
150 | return UseIdx; | |
151 | } | |
152 | ||
153 | // Top-level API for clients that know the operand indices. | |
154 | unsigned TargetSchedModel::computeOperandLatency( | |
155 | const MachineInstr *DefMI, unsigned DefOperIdx, | |
1a4d82fc | 156 | const MachineInstr *UseMI, unsigned UseOperIdx) const { |
223e47cc | 157 | |
1a4d82fc JJ |
158 | if (!hasInstrSchedModel() && !hasInstrItineraries()) |
159 | return TII->defaultDefLatency(SchedModel, DefMI); | |
223e47cc | 160 | |
970d7e83 LB |
161 | if (hasInstrItineraries()) { |
162 | int OperLatency = 0; | |
163 | if (UseMI) { | |
1a4d82fc JJ |
164 | OperLatency = TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, |
165 | UseMI, UseOperIdx); | |
223e47cc | 166 | } |
970d7e83 LB |
167 | else { |
168 | unsigned DefClass = DefMI->getDesc().getSchedClass(); | |
169 | OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx); | |
223e47cc | 170 | } |
970d7e83 LB |
171 | if (OperLatency >= 0) |
172 | return OperLatency; | |
173 | ||
174 | // No operand latency was found. | |
175 | unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI); | |
176 | ||
177 | // Expected latency is the max of the stage latency and itinerary props. | |
178 | // Rather than directly querying InstrItins stage latency, we call a TII | |
179 | // hook to allow subtargets to specialize latency. This hook is only | |
180 | // applicable to the InstrItins model. InstrSchedModel should model all | |
181 | // special cases without TII hooks. | |
1a4d82fc JJ |
182 | InstrLatency = std::max(InstrLatency, |
183 | TII->defaultDefLatency(SchedModel, DefMI)); | |
970d7e83 | 184 | return InstrLatency; |
223e47cc | 185 | } |
1a4d82fc | 186 | // hasInstrSchedModel() |
970d7e83 LB |
187 | const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); |
188 | unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); | |
189 | if (DefIdx < SCDesc->NumWriteLatencyEntries) { | |
190 | // Lookup the definition's write latency in SubtargetInfo. | |
191 | const MCWriteLatencyEntry *WLEntry = | |
192 | STI->getWriteLatencyEntry(SCDesc, DefIdx); | |
193 | unsigned WriteID = WLEntry->WriteResourceID; | |
1a4d82fc | 194 | unsigned Latency = capLatency(WLEntry->Cycles); |
970d7e83 LB |
195 | if (!UseMI) |
196 | return Latency; | |
223e47cc | 197 | |
970d7e83 LB |
198 | // Lookup the use's latency adjustment in SubtargetInfo. |
199 | const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI); | |
200 | if (UseDesc->NumReadAdvanceEntries == 0) | |
201 | return Latency; | |
202 | unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); | |
1a4d82fc JJ |
203 | int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); |
204 | if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap | |
205 | return 0; | |
206 | return Latency - Advance; | |
970d7e83 LB |
207 | } |
208 | // If DefIdx does not exist in the model (e.g. implicit defs), then return | |
209 | // unit latency (defaultDefLatency may be too conservative). | |
210 | #ifndef NDEBUG | |
211 | if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() | |
1a4d82fc JJ |
212 | && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() |
213 | && SchedModel.isComplete()) { | |
970d7e83 LB |
214 | std::string Err; |
215 | raw_string_ostream ss(Err); | |
216 | ss << "DefIdx " << DefIdx << " exceeds machine model writes for " | |
217 | << *DefMI; | |
218 | report_fatal_error(ss.str()); | |
223e47cc | 219 | } |
970d7e83 LB |
220 | #endif |
221 | // FIXME: Automatically giving all implicit defs defaultDefLatency is | |
222 | // undesirable. We should only do it for defs that are known to the MC | |
223 | // desc like flags. Truly implicit defs should get 1 cycle latency. | |
1a4d82fc JJ |
224 | return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI); |
225 | } | |
226 | ||
227 | unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const { | |
228 | assert(hasInstrSchedModel() && "Only call this function with a SchedModel"); | |
229 | ||
230 | unsigned SCIdx = TII->get(Opcode).getSchedClass(); | |
231 | const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx); | |
232 | unsigned Latency = 0; | |
233 | ||
234 | if (SCDesc->isValid() && !SCDesc->isVariant()) { | |
235 | for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; | |
236 | DefIdx != DefEnd; ++DefIdx) { | |
237 | // Lookup the definition's write latency in SubtargetInfo. | |
238 | const MCWriteLatencyEntry *WLEntry = | |
239 | STI->getWriteLatencyEntry(SCDesc, DefIdx); | |
240 | Latency = std::max(Latency, capLatency(WLEntry->Cycles)); | |
241 | } | |
242 | return Latency; | |
243 | } | |
244 | ||
245 | assert(Latency && "No MI sched latency"); | |
246 | return 0; | |
970d7e83 LB |
247 | } |
248 | ||
1a4d82fc JJ |
249 | unsigned |
250 | TargetSchedModel::computeInstrLatency(const MachineInstr *MI, | |
251 | bool UseDefaultDefLatency) const { | |
970d7e83 LB |
252 | // For the itinerary model, fall back to the old subtarget hook. |
253 | // Allow subtargets to compute Bundle latencies outside the machine model. | |
1a4d82fc JJ |
254 | if (hasInstrItineraries() || MI->isBundle() || |
255 | (!hasInstrSchedModel() && !UseDefaultDefLatency)) | |
970d7e83 LB |
256 | return TII->getInstrLatency(&InstrItins, MI); |
257 | ||
258 | if (hasInstrSchedModel()) { | |
259 | const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); | |
260 | if (SCDesc->isValid()) { | |
261 | unsigned Latency = 0; | |
262 | for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; | |
263 | DefIdx != DefEnd; ++DefIdx) { | |
264 | // Lookup the definition's write latency in SubtargetInfo. | |
265 | const MCWriteLatencyEntry *WLEntry = | |
266 | STI->getWriteLatencyEntry(SCDesc, DefIdx); | |
1a4d82fc | 267 | Latency = std::max(Latency, capLatency(WLEntry->Cycles)); |
970d7e83 LB |
268 | } |
269 | return Latency; | |
270 | } | |
223e47cc | 271 | } |
1a4d82fc | 272 | return TII->defaultDefLatency(SchedModel, MI); |
970d7e83 LB |
273 | } |
274 | ||
275 | unsigned TargetSchedModel:: | |
276 | computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, | |
277 | const MachineInstr *DepMI) const { | |
1a4d82fc | 278 | if (SchedModel.MicroOpBufferSize <= 1) |
970d7e83 LB |
279 | return 1; |
280 | ||
1a4d82fc | 281 | // MicroOpBufferSize > 1 indicates an out-of-order processor that can dispatch |
970d7e83 | 282 | // WAW dependencies in the same cycle. |
223e47cc | 283 | |
970d7e83 LB |
284 | // Treat predication as a data dependency for out-of-order cpus. In-order |
285 | // cpus do not need to treat predicated writes specially. | |
286 | // | |
287 | // TODO: The following hack exists because predication passes do not | |
288 | // correctly append imp-use operands, and readsReg() strangely returns false | |
289 | // for predicated defs. | |
290 | unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); | |
291 | const MachineFunction &MF = *DefMI->getParent()->getParent(); | |
1a4d82fc | 292 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
970d7e83 LB |
293 | if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI)) |
294 | return computeInstrLatency(DefMI); | |
223e47cc | 295 | |
970d7e83 LB |
296 | // If we have a per operand scheduling model, check if this def is writing |
297 | // an unbuffered resource. If so, it treated like an in-order cpu. | |
298 | if (hasInstrSchedModel()) { | |
299 | const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); | |
300 | if (SCDesc->isValid()) { | |
301 | for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), | |
302 | *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { | |
1a4d82fc | 303 | if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize) |
970d7e83 LB |
304 | return 1; |
305 | } | |
306 | } | |
307 | } | |
308 | return 0; | |
223e47cc | 309 | } |