]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | //==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file defines the itinerary class data for the ARM Cortex A53 processors. | |
11 | // | |
12 | //===----------------------------------------------------------------------===// | |
13 | ||
14 | // ===---------------------------------------------------------------------===// | |
15 | // The following definitions describe the simpler per-operand machine model. | |
16 | // This works with MachineScheduler. See MCSchedModel.h for details. | |
17 | ||
18 | // Cortex-A53 machine model for scheduling and other instruction cost heuristics. | |
19 | def CortexA53Model : SchedMachineModel { | |
20 | let MicroOpBufferSize = 0; // Explicitly set to zero since A53 is in-order. | |
21 | let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. | |
22 | let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency. | |
23 | let LoadLatency = 3; // Optimistic load latency assuming bypass. | |
24 | // This is overriden by OperandCycles if the | |
25 | // Itineraries are queried instead. | |
26 | let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation | |
27 | // Specification - Instruction Timings" | |
28 | // v 1.0 Spreadsheet | |
29 | } | |
30 | ||
31 | ||
32 | //===----------------------------------------------------------------------===// | |
33 | // Define each kind of processor resource and number available. | |
34 | ||
35 | // Modeling each pipeline as a ProcResource using the BufferSize = 0 since | |
36 | // Cortex-A53 is in-order. | |
37 | ||
38 | def A53UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU | |
39 | def A53UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC | |
40 | def A53UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division | |
41 | def A53UnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store | |
42 | def A53UnitB : ProcResource<1> { let BufferSize = 0; } // Branch | |
43 | def A53UnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU | |
44 | def A53UnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mult/Div/Sqrt | |
45 | ||
46 | ||
47 | //===----------------------------------------------------------------------===// | |
48 | // Subtarget-specific SchedWrite types which both map the ProcResources and | |
49 | // set the latency. | |
50 | ||
51 | let SchedModel = CortexA53Model in { | |
52 | ||
53 | // ALU - Despite having a full latency of 4, most of the ALU instructions can | |
54 | // forward a cycle earlier and then two cycles earlier in the case of a | |
55 | // shift-only instruction. These latencies will be incorrect when the | |
56 | // result cannot be forwarded, but modeling isn't rocket surgery. | |
57 | def : WriteRes<WriteImm, [A53UnitALU]> { let Latency = 3; } | |
58 | def : WriteRes<WriteI, [A53UnitALU]> { let Latency = 3; } | |
59 | def : WriteRes<WriteISReg, [A53UnitALU]> { let Latency = 3; } | |
60 | def : WriteRes<WriteIEReg, [A53UnitALU]> { let Latency = 3; } | |
61 | def : WriteRes<WriteIS, [A53UnitALU]> { let Latency = 2; } | |
62 | def : WriteRes<WriteExtr, [A53UnitALU]> { let Latency = 3; } | |
63 | ||
64 | // MAC | |
65 | def : WriteRes<WriteIM32, [A53UnitMAC]> { let Latency = 4; } | |
66 | def : WriteRes<WriteIM64, [A53UnitMAC]> { let Latency = 4; } | |
67 | ||
68 | // Div | |
69 | def : WriteRes<WriteID32, [A53UnitDiv]> { let Latency = 4; } | |
70 | def : WriteRes<WriteID64, [A53UnitDiv]> { let Latency = 4; } | |
71 | ||
72 | // Load | |
73 | def : WriteRes<WriteLD, [A53UnitLdSt]> { let Latency = 4; } | |
74 | def : WriteRes<WriteLDIdx, [A53UnitLdSt]> { let Latency = 4; } | |
75 | def : WriteRes<WriteLDHi, [A53UnitLdSt]> { let Latency = 4; } | |
76 | ||
77 | // Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd | |
78 | // below, choosing the median of 3 which makes the latency 6. | |
79 | // May model this more carefully in the future. The remaining | |
80 | // A53WriteVLD# types represent the 1-5 cycle issues explicitly. | |
81 | def : WriteRes<WriteVLD, [A53UnitLdSt]> { let Latency = 6; | |
82 | let ResourceCycles = [3]; } | |
83 | def A53WriteVLD1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; } | |
84 | def A53WriteVLD2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; | |
85 | let ResourceCycles = [2]; } | |
86 | def A53WriteVLD3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; | |
87 | let ResourceCycles = [3]; } | |
88 | def A53WriteVLD4 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 7; | |
89 | let ResourceCycles = [4]; } | |
90 | def A53WriteVLD5 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 8; | |
91 | let ResourceCycles = [5]; } | |
92 | ||
93 | // Pre/Post Indexing - Performed as part of address generation which is already | |
94 | // accounted for in the WriteST* latencies below | |
95 | def : WriteRes<WriteAdr, []> { let Latency = 0; } | |
96 | ||
97 | // Store | |
98 | def : WriteRes<WriteST, [A53UnitLdSt]> { let Latency = 4; } | |
99 | def : WriteRes<WriteSTP, [A53UnitLdSt]> { let Latency = 4; } | |
100 | def : WriteRes<WriteSTIdx, [A53UnitLdSt]> { let Latency = 4; } | |
101 | def : WriteRes<WriteSTX, [A53UnitLdSt]> { let Latency = 4; } | |
102 | ||
103 | // Vector Store - Similar to vector loads, can take 1-3 cycles to issue. | |
104 | def : WriteRes<WriteVST, [A53UnitLdSt]> { let Latency = 5; | |
105 | let ResourceCycles = [2];} | |
106 | def A53WriteVST1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; } | |
107 | def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; | |
108 | let ResourceCycles = [2]; } | |
109 | def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; | |
110 | let ResourceCycles = [3]; } | |
111 | ||
112 | // Branch | |
113 | def : WriteRes<WriteBr, [A53UnitB]>; | |
114 | def : WriteRes<WriteBrReg, [A53UnitB]>; | |
115 | def : WriteRes<WriteSys, [A53UnitB]>; | |
116 | def : WriteRes<WriteBarrier, [A53UnitB]>; | |
117 | def : WriteRes<WriteHint, [A53UnitB]>; | |
118 | ||
119 | // FP ALU | |
120 | def : WriteRes<WriteF, [A53UnitFPALU]> { let Latency = 6; } | |
121 | def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; } | |
122 | def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; } | |
123 | def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; } | |
124 | def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; } | |
125 | def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; } | |
126 | ||
127 | // FP Mul, Div, Sqrt | |
128 | def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; } | |
129 | def : WriteRes<WriteFDiv, [A53UnitFPMDS]> { let Latency = 33; | |
130 | let ResourceCycles = [29]; } | |
131 | def A53WriteFMAC : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 10; } | |
132 | def A53WriteFDivSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 18; | |
133 | let ResourceCycles = [14]; } | |
134 | def A53WriteFDivDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 33; | |
135 | let ResourceCycles = [29]; } | |
136 | def A53WriteFSqrtSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 17; | |
137 | let ResourceCycles = [13]; } | |
138 | def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32; | |
139 | let ResourceCycles = [28]; } | |
140 | ||
141 | //===----------------------------------------------------------------------===// | |
142 | // Subtarget-specific SchedRead types. | |
143 | ||
144 | // No forwarding for these reads. | |
145 | def : ReadAdvance<ReadExtrHi, 0>; | |
146 | def : ReadAdvance<ReadAdrBase, 0>; | |
147 | def : ReadAdvance<ReadVLD, 0>; | |
148 | ||
149 | // ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable | |
150 | // operands are needed one cycle later if and only if they are to be | |
151 | // shifted. Otherwise, they too are needed two cycles later. This same | |
152 | // ReadAdvance applies to Extended registers as well, even though there is | |
153 | // a separate SchedPredicate for them. | |
154 | def : ReadAdvance<ReadI, 2, [WriteImm,WriteI, | |
155 | WriteISReg, WriteIEReg,WriteIS, | |
156 | WriteID32,WriteID64, | |
157 | WriteIM32,WriteIM64]>; | |
158 | def A53ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI, | |
159 | WriteISReg, WriteIEReg,WriteIS, | |
160 | WriteID32,WriteID64, | |
161 | WriteIM32,WriteIM64]>; | |
162 | def A53ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI, | |
163 | WriteISReg, WriteIEReg,WriteIS, | |
164 | WriteID32,WriteID64, | |
165 | WriteIM32,WriteIM64]>; | |
166 | def A53ReadISReg : SchedReadVariant<[ | |
167 | SchedVar<RegShiftedPred, [A53ReadShifted]>, | |
168 | SchedVar<NoSchedPred, [A53ReadNotShifted]>]>; | |
169 | def : SchedAlias<ReadISReg, A53ReadISReg>; | |
170 | ||
171 | def A53ReadIEReg : SchedReadVariant<[ | |
172 | SchedVar<RegExtendedPred, [A53ReadShifted]>, | |
173 | SchedVar<NoSchedPred, [A53ReadNotShifted]>]>; | |
174 | def : SchedAlias<ReadIEReg, A53ReadIEReg>; | |
175 | ||
176 | // MAC - Operands are generally needed one cycle later in the MAC pipe. | |
177 | // Accumulator operands are needed two cycles later. | |
178 | def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI, | |
179 | WriteISReg, WriteIEReg,WriteIS, | |
180 | WriteID32,WriteID64, | |
181 | WriteIM32,WriteIM64]>; | |
182 | def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI, | |
183 | WriteISReg, WriteIEReg,WriteIS, | |
184 | WriteID32,WriteID64, | |
185 | WriteIM32,WriteIM64]>; | |
186 | ||
187 | // Div | |
188 | def : ReadAdvance<ReadID, 1, [WriteImm,WriteI, | |
189 | WriteISReg, WriteIEReg,WriteIS, | |
190 | WriteID32,WriteID64, | |
191 | WriteIM32,WriteIM64]>; | |
192 | ||
193 | //===----------------------------------------------------------------------===// | |
194 | // Subtarget-specific InstRWs. | |
195 | ||
196 | //--- | |
197 | // Miscellaneous | |
198 | //--- | |
199 | def : InstRW<[WriteI], (instrs COPY)>; | |
200 | ||
201 | //--- | |
202 | // Vector Loads | |
203 | //--- | |
204 | def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; | |
205 | def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
206 | def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
207 | def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
208 | def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
209 | def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
210 | def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; | |
211 | def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
212 | def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
213 | def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
214 | def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
215 | def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
216 | ||
217 | def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>; | |
218 | def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
219 | def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; | |
220 | def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; | |
221 | def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; | |
222 | def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; | |
223 | def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; | |
224 | def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; | |
225 | ||
226 | def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>; | |
227 | def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
228 | def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; | |
229 | def : InstRW<[A53WriteVLD3], (instregex "LD3Threev(2d)$")>; | |
230 | def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; | |
231 | def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
232 | def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; | |
233 | def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; | |
234 | ||
235 | def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; | |
236 | def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
237 | def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; | |
238 | def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>; | |
239 | def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; | |
240 | def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
241 | def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; | |
242 | def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; | |
243 | ||
244 | //--- | |
245 | // Vector Stores | |
246 | //--- | |
247 | def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)$")>; | |
248 | def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
249 | def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
250 | def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
251 | def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; | |
252 | def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; | |
253 | def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
254 | def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
255 | def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
256 | def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; | |
257 | ||
258 | def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>; | |
259 | def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; | |
260 | def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; | |
261 | def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; | |
262 | def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; | |
263 | def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; | |
264 | ||
265 | def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>; | |
266 | def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; | |
267 | def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>; | |
268 | def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; | |
269 | def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; | |
270 | def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; | |
271 | ||
272 | def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>; | |
273 | def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; | |
274 | def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>; | |
275 | def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; | |
276 | def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; | |
277 | def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; | |
278 | ||
279 | //--- | |
280 | // Floating Point MAC, DIV, SQRT | |
281 | //--- | |
282 | def : InstRW<[A53WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; | |
283 | def : InstRW<[A53WriteFMAC], (instregex "^FML(A|S).*")>; | |
284 | def : InstRW<[A53WriteFDivSP], (instrs FDIVSrr)>; | |
285 | def : InstRW<[A53WriteFDivDP], (instrs FDIVDrr)>; | |
286 | def : InstRW<[A53WriteFDivSP], (instregex "^FDIVv.*32$")>; | |
287 | def : InstRW<[A53WriteFDivDP], (instregex "^FDIVv.*64$")>; | |
288 | def : InstRW<[A53WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; | |
289 | def : InstRW<[A53WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; | |
290 | ||
291 | } |