]> git.proxmox.com Git - rustc.git/blame - src/llvm/lib/Target/ARM/ARMScheduleA9.td
Imported Upstream version 1.0.0+dfsg1
[rustc.git] / src / llvm / lib / Target / ARM / ARMScheduleA9.td
CommitLineData
223e47cc
LB
1//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the ARM Cortex A9 processors.
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// This section contains legacy support for itineraries. This is
16// required until SD and PostRA schedulers are replaced by MachineScheduler.
17
18//
19// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
20// Reference Manual".
21//
22// Functional units
23def A9_Issue0 : FuncUnit; // Issue 0
24def A9_Issue1 : FuncUnit; // Issue 1
25def A9_Branch : FuncUnit; // Branch
26def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
27def A9_ALU1 : FuncUnit; // ALU pipeline 1
28def A9_AGU : FuncUnit; // Address generation unit for ld / st
29def A9_NPipe : FuncUnit; // NEON pipeline
30def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
31def A9_LSUnit : FuncUnit; // L/S Unit
32def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
33def A9_DRegsN : FuncUnit; // FP register set, NEON side
34
35// Bypasses
36def A9_LdBypass : Bypass;
37
38def CortexA9Itineraries : ProcessorItineraries<
39 [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
40 A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
41 [A9_LdBypass], [
42 // Two fully-pipelined integer ALU pipelines
43
44 //
45 // Move instructions, unconditional
46 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
48 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
50 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
52 InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
53 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
54 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
55 InstrStage<1, [A9_ALU0, A9_ALU1]>,
56 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
57 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
58 InstrStage<1, [A9_ALU0, A9_ALU1]>,
59 InstrStage<1, [A9_ALU0, A9_ALU1]>,
60 InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
61 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
62 InstrStage<1, [A9_ALU0, A9_ALU1]>,
63 InstrStage<1, [A9_ALU0, A9_ALU1]>,
64 InstrStage<1, [A9_MUX0], 0>,
65 InstrStage<1, [A9_AGU], 0>,
66 InstrStage<1, [A9_LSUnit]>], [5]>,
67 //
68 // MVN instructions
69 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
70 InstrStage<1, [A9_ALU0, A9_ALU1]>],
71 [1]>,
72 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
73 InstrStage<1, [A9_ALU0, A9_ALU1]>],
74 [1, 1], [NoBypass, A9_LdBypass]>,
75 InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
76 InstrStage<2, [A9_ALU0, A9_ALU1]>],
77 [2, 1]>,
78 InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
79 InstrStage<3, [A9_ALU0, A9_ALU1]>],
80 [3, 1, 1]>,
81 //
82 // No operand cycles
83 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84 InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
85 //
86 // Binary Instructions that produce a result
87 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
88 InstrStage<1, [A9_ALU0, A9_ALU1]>],
89 [1, 1], [NoBypass, A9_LdBypass]>,
90 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
91 InstrStage<1, [A9_ALU0, A9_ALU1]>],
92 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
93 InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
94 InstrStage<2, [A9_ALU0, A9_ALU1]>],
95 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
96 InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97 InstrStage<2, [A9_ALU0, A9_ALU1]>],
98 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
99 InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
100 InstrStage<3, [A9_ALU0, A9_ALU1]>],
101 [3, 1, 1, 1],
102 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
103 //
104 // Bitwise Instructions that produce a result
105 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
106 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
107 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
109 InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
110 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
111 InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
112 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
113 //
114 // Unary Instructions that produce a result
115
116 // CLZ, RBIT, etc.
117 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
119
120 // BFC, BFI, UBFX, SBFX
121 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
122 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
123
124 //
125 // Zero and sign extension instructions
126 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
127 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
128 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
129 InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
130 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
131 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
132 //
133 // Compare instructions
134 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135 InstrStage<1, [A9_ALU0, A9_ALU1]>],
136 [1], [A9_LdBypass]>,
137 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
138 InstrStage<1, [A9_ALU0, A9_ALU1]>],
139 [1, 1], [A9_LdBypass, A9_LdBypass]>,
140 InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141 InstrStage<2, [A9_ALU0, A9_ALU1]>],
142 [1, 1], [A9_LdBypass, NoBypass]>,
143 InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
144 InstrStage<3, [A9_ALU0, A9_ALU1]>],
145 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
146 //
147 // Test instructions
148 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
150 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
152 InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
153 InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
154 InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
155 InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
156 //
157 // Move instructions, conditional
158 // FIXME: Correctly model the extra input dep on the destination.
159 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
160 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
161 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
163 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
165 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
167 InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
168 InstrStage<1, [A9_ALU0, A9_ALU1]>,
169 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
170 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
171
172 // Integer multiply pipeline
173 //
174 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
175 InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
176 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
177 InstrStage<2, [A9_ALU0]>],
178 [3, 1, 1, 1]>,
179 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
180 InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
181 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
182 InstrStage<2, [A9_ALU0]>],
183 [4, 1, 1, 1]>,
184 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
185 InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
186 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
187 InstrStage<3, [A9_ALU0]>],
188 [4, 5, 1, 1]>,
189 // Integer load pipeline
190 // FIXME: The timings are some rough approximations
191 //
192 // Immediate offset
193 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
194 InstrStage<1, [A9_MUX0], 0>,
195 InstrStage<1, [A9_AGU], 0>,
196 InstrStage<1, [A9_LSUnit]>],
197 [3, 1], [A9_LdBypass]>,
198 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
199 InstrStage<1, [A9_MUX0], 0>,
200 InstrStage<2, [A9_AGU], 0>,
201 InstrStage<1, [A9_LSUnit]>],
202 [4, 1], [A9_LdBypass]>,
203 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
204 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
205 InstrStage<1, [A9_MUX0], 0>,
206 InstrStage<2, [A9_AGU], 0>,
207 InstrStage<1, [A9_LSUnit]>],
208 [3, 3, 1], [A9_LdBypass]>,
209 //
210 // Register offset
211 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
212 InstrStage<1, [A9_MUX0], 0>,
213 InstrStage<1, [A9_AGU], 0>,
214 InstrStage<1, [A9_LSUnit]>],
215 [3, 1, 1], [A9_LdBypass]>,
216 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
217 InstrStage<1, [A9_MUX0], 0>,
218 InstrStage<2, [A9_AGU], 0>,
219 InstrStage<1, [A9_LSUnit]>],
220 [4, 1, 1], [A9_LdBypass]>,
221 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
222 InstrStage<1, [A9_MUX0], 0>,
223 InstrStage<2, [A9_AGU], 0>,
224 InstrStage<1, [A9_LSUnit]>],
225 [3, 3, 1, 1], [A9_LdBypass]>,
226 //
227 // Scaled register offset
228 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
229 InstrStage<1, [A9_MUX0], 0>,
230 InstrStage<1, [A9_AGU], 0>,
231 InstrStage<1, [A9_LSUnit], 0>],
232 [4, 1, 1], [A9_LdBypass]>,
233 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
234 InstrStage<1, [A9_MUX0], 0>,
235 InstrStage<2, [A9_AGU], 0>,
236 InstrStage<1, [A9_LSUnit]>],
237 [5, 1, 1], [A9_LdBypass]>,
238 //
239 // Immediate offset with update
240 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
241 InstrStage<1, [A9_MUX0], 0>,
242 InstrStage<1, [A9_AGU], 0>,
243 InstrStage<1, [A9_LSUnit]>],
244 [3, 2, 1], [A9_LdBypass]>,
245 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
246 InstrStage<1, [A9_MUX0], 0>,
247 InstrStage<2, [A9_AGU], 0>,
248 InstrStage<1, [A9_LSUnit]>],
249 [4, 3, 1], [A9_LdBypass]>,
250 //
251 // Register offset with update
252 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
253 InstrStage<1, [A9_MUX0], 0>,
254 InstrStage<1, [A9_AGU], 0>,
255 InstrStage<1, [A9_LSUnit]>],
256 [3, 2, 1, 1], [A9_LdBypass]>,
257 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
258 InstrStage<1, [A9_MUX0], 0>,
259 InstrStage<2, [A9_AGU], 0>,
260 InstrStage<1, [A9_LSUnit]>],
261 [4, 3, 1, 1], [A9_LdBypass]>,
262 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
263 InstrStage<1, [A9_MUX0], 0>,
264 InstrStage<2, [A9_AGU], 0>,
265 InstrStage<1, [A9_LSUnit]>],
266 [3, 3, 1, 1], [A9_LdBypass]>,
267 //
268 // Scaled register offset with update
269 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
270 InstrStage<1, [A9_MUX0], 0>,
271 InstrStage<1, [A9_AGU], 0>,
272 InstrStage<1, [A9_LSUnit]>],
273 [4, 3, 1, 1], [A9_LdBypass]>,
274 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
275 InstrStage<1, [A9_MUX0], 0>,
276 InstrStage<2, [A9_AGU], 0>,
277 InstrStage<1, [A9_LSUnit]>],
278 [5, 4, 1, 1], [A9_LdBypass]>,
279 //
280 // Load multiple, def is the 5th operand.
281 // FIXME: This assumes 3 to 4 registers.
282 InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
283 InstrStage<1, [A9_MUX0], 0>,
284 InstrStage<2, [A9_AGU], 1>,
285 InstrStage<2, [A9_LSUnit]>],
286 [1, 1, 1, 1, 3],
287 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
288 -1>, // dynamic uops
289 //
290 // Load multiple + update, defs are the 1st and 5th operands.
291 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
292 InstrStage<1, [A9_MUX0], 0>,
293 InstrStage<2, [A9_AGU], 1>,
294 InstrStage<2, [A9_LSUnit]>],
295 [2, 1, 1, 1, 3],
296 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
297 -1>, // dynamic uops
298 //
299 // Load multiple plus branch
300 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
301 InstrStage<1, [A9_MUX0], 0>,
302 InstrStage<1, [A9_AGU], 1>,
303 InstrStage<2, [A9_LSUnit]>,
304 InstrStage<1, [A9_Branch]>],
305 [1, 2, 1, 1, 3],
306 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
307 -1>, // dynamic uops
308 //
309 // Pop, def is the 3rd operand.
310 InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
311 InstrStage<1, [A9_MUX0], 0>,
312 InstrStage<2, [A9_AGU], 1>,
313 InstrStage<2, [A9_LSUnit]>],
314 [1, 1, 3],
315 [NoBypass, NoBypass, A9_LdBypass],
316 -1>, // dynamic uops
317 //
318 // Pop + branch, def is the 3rd operand.
319 InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
320 InstrStage<1, [A9_MUX0], 0>,
321 InstrStage<2, [A9_AGU], 1>,
322 InstrStage<2, [A9_LSUnit]>,
323 InstrStage<1, [A9_Branch]>],
324 [1, 1, 3],
325 [NoBypass, NoBypass, A9_LdBypass],
326 -1>, // dynamic uops
327 //
328 // iLoadi + iALUr for t2LDRpci_pic.
329 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
330 InstrStage<1, [A9_MUX0], 0>,
331 InstrStage<1, [A9_AGU], 0>,
332 InstrStage<1, [A9_LSUnit]>,
333 InstrStage<1, [A9_ALU0, A9_ALU1]>],
334 [2, 1]>,
335
336 // Integer store pipeline
337 ///
338 // Immediate offset
339 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
340 InstrStage<1, [A9_MUX0], 0>,
341 InstrStage<1, [A9_AGU], 0>,
342 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
343 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
344 InstrStage<1, [A9_MUX0], 0>,
345 InstrStage<2, [A9_AGU], 1>,
346 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
347 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
348 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
349 InstrStage<1, [A9_MUX0], 0>,
350 InstrStage<2, [A9_AGU], 1>,
351 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
352 //
353 // Register offset
354 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
355 InstrStage<1, [A9_MUX0], 0>,
356 InstrStage<1, [A9_AGU], 0>,
357 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
358 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
359 InstrStage<1, [A9_MUX0], 0>,
360 InstrStage<2, [A9_AGU], 1>,
361 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
362 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
363 InstrStage<1, [A9_MUX0], 0>,
364 InstrStage<2, [A9_AGU], 1>,
365 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
366 //
367 // Scaled register offset
368 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
369 InstrStage<1, [A9_MUX0], 0>,
370 InstrStage<1, [A9_AGU], 0>,
371 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
372 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
373 InstrStage<1, [A9_MUX0], 0>,
374 InstrStage<2, [A9_AGU], 1>,
375 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
376 //
377 // Immediate offset with update
378 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
379 InstrStage<1, [A9_MUX0], 0>,
380 InstrStage<1, [A9_AGU], 0>,
381 InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
382 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
383 InstrStage<1, [A9_MUX0], 0>,
384 InstrStage<2, [A9_AGU], 1>,
385 InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
386 //
387 // Register offset with update
388 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
389 InstrStage<1, [A9_MUX0], 0>,
390 InstrStage<1, [A9_AGU], 0>,
391 InstrStage<1, [A9_LSUnit]>],
392 [2, 1, 1, 1]>,
393 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
394 InstrStage<1, [A9_MUX0], 0>,
395 InstrStage<2, [A9_AGU], 1>,
396 InstrStage<1, [A9_LSUnit]>],
397 [3, 1, 1, 1]>,
398 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
399 InstrStage<1, [A9_MUX0], 0>,
400 InstrStage<2, [A9_AGU], 1>,
401 InstrStage<1, [A9_LSUnit]>],
402 [3, 1, 1, 1]>,
403 //
404 // Scaled register offset with update
405 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
406 InstrStage<1, [A9_MUX0], 0>,
407 InstrStage<1, [A9_AGU], 0>,
408 InstrStage<1, [A9_LSUnit]>],
409 [2, 1, 1, 1]>,
410 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
411 InstrStage<1, [A9_MUX0], 0>,
412 InstrStage<2, [A9_AGU], 1>,
413 InstrStage<1, [A9_LSUnit]>],
414 [3, 1, 1, 1]>,
415 //
416 // Store multiple
417 InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
418 InstrStage<1, [A9_MUX0], 0>,
419 InstrStage<1, [A9_AGU], 0>,
420 InstrStage<2, [A9_LSUnit]>],
421 [], [], -1>, // dynamic uops
422 //
423 // Store multiple + update
424 InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
425 InstrStage<1, [A9_MUX0], 0>,
426 InstrStage<1, [A9_AGU], 0>,
427 InstrStage<2, [A9_LSUnit]>],
428 [2], [], -1>, // dynamic uops
429 //
430 // Preload
431 InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
432
433 // Branch
434 //
435 // no delay slots, so the latency of a branch is unimportant
436 InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
437 InstrStage<1, [A9_Issue1], 0>,
438 InstrStage<1, [A9_Branch]>]>,
439
440 // VFP and NEON shares the same register file. This means that every VFP
441 // instruction should wait for full completion of the consecutive NEON
442 // instruction and vice-versa. We model this behavior with two artificial FUs:
443 // DRegsVFP and DRegsVFP.
444 //
445 // Every VFP instruction:
446 // - Acquires DRegsVFP resource for 1 cycle
447 // - Reserves DRegsN resource for the whole duration (including time to
448 // register file writeback!).
449 // Every NEON instruction does the same but with FUs swapped.
450 //
451 // Since the reserved FU cannot be acquired, this models precisely
452 // "cross-domain" stalls.
453
454 // VFP
455 // Issue through integer pipeline, and execute in NEON unit.
456
457 // FP Special Register to Integer Register File Move
458 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
459 InstrStage<1, [A9_MUX0], 0>,
460 InstrStage<1, [A9_DRegsVFP], 0, Required>,
461 InstrStage<2, [A9_DRegsN], 0, Reserved>,
462 InstrStage<1, [A9_NPipe]>],
463 [1]>,
464 //
465 // Single-precision FP Unary
466 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
467 InstrStage<1, [A9_MUX0], 0>,
468 InstrStage<1, [A9_DRegsVFP], 0, Required>,
469 // Extra latency cycles since wbck is 2 cycles
470 InstrStage<3, [A9_DRegsN], 0, Reserved>,
471 InstrStage<1, [A9_NPipe]>],
472 [1, 1]>,
473 //
474 // Double-precision FP Unary
475 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
476 InstrStage<1, [A9_MUX0], 0>,
477 InstrStage<1, [A9_DRegsVFP], 0, Required>,
478 // Extra latency cycles since wbck is 2 cycles
479 InstrStage<3, [A9_DRegsN], 0, Reserved>,
480 InstrStage<1, [A9_NPipe]>],
481 [1, 1]>,
482
483 //
484 // Single-precision FP Compare
485 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486 InstrStage<1, [A9_MUX0], 0>,
487 InstrStage<1, [A9_DRegsVFP], 0, Required>,
488 // Extra latency cycles since wbck is 4 cycles
489 InstrStage<5, [A9_DRegsN], 0, Reserved>,
490 InstrStage<1, [A9_NPipe]>],
491 [1, 1]>,
492 //
493 // Double-precision FP Compare
494 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495 InstrStage<1, [A9_MUX0], 0>,
496 InstrStage<1, [A9_DRegsVFP], 0, Required>,
497 // Extra latency cycles since wbck is 4 cycles
498 InstrStage<5, [A9_DRegsN], 0, Reserved>,
499 InstrStage<1, [A9_NPipe]>],
500 [1, 1]>,
501 //
502 // Single to Double FP Convert
503 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
504 InstrStage<1, [A9_MUX0], 0>,
505 InstrStage<1, [A9_DRegsVFP], 0, Required>,
506 InstrStage<5, [A9_DRegsN], 0, Reserved>,
507 InstrStage<1, [A9_NPipe]>],
508 [4, 1]>,
509 //
510 // Double to Single FP Convert
511 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512 InstrStage<1, [A9_MUX0], 0>,
513 InstrStage<1, [A9_DRegsVFP], 0, Required>,
514 InstrStage<5, [A9_DRegsN], 0, Reserved>,
515 InstrStage<1, [A9_NPipe]>],
516 [4, 1]>,
517
518 //
519 // Single to Half FP Convert
520 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
521 InstrStage<1, [A9_MUX0], 0>,
522 InstrStage<1, [A9_DRegsVFP], 0, Required>,
523 InstrStage<5, [A9_DRegsN], 0, Reserved>,
524 InstrStage<1, [A9_NPipe]>],
525 [4, 1]>,
526 //
527 // Half to Single FP Convert
528 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
529 InstrStage<1, [A9_MUX0], 0>,
530 InstrStage<1, [A9_DRegsVFP], 0, Required>,
531 InstrStage<3, [A9_DRegsN], 0, Reserved>,
532 InstrStage<1, [A9_NPipe]>],
533 [2, 1]>,
534
535 //
536 // Single-Precision FP to Integer Convert
537 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
538 InstrStage<1, [A9_MUX0], 0>,
539 InstrStage<1, [A9_DRegsVFP], 0, Required>,
540 InstrStage<5, [A9_DRegsN], 0, Reserved>,
541 InstrStage<1, [A9_NPipe]>],
542 [4, 1]>,
543 //
544 // Double-Precision FP to Integer Convert
545 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
546 InstrStage<1, [A9_MUX0], 0>,
547 InstrStage<1, [A9_DRegsVFP], 0, Required>,
548 InstrStage<5, [A9_DRegsN], 0, Reserved>,
549 InstrStage<1, [A9_NPipe]>],
550 [4, 1]>,
551 //
552 // Integer to Single-Precision FP Convert
553 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
554 InstrStage<1, [A9_MUX0], 0>,
555 InstrStage<1, [A9_DRegsVFP], 0, Required>,
556 InstrStage<5, [A9_DRegsN], 0, Reserved>,
557 InstrStage<1, [A9_NPipe]>],
558 [4, 1]>,
559 //
560 // Integer to Double-Precision FP Convert
561 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
562 InstrStage<1, [A9_MUX0], 0>,
563 InstrStage<1, [A9_DRegsVFP], 0, Required>,
564 InstrStage<5, [A9_DRegsN], 0, Reserved>,
565 InstrStage<1, [A9_NPipe]>],
566 [4, 1]>,
567 //
568 // Single-precision FP ALU
569 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
570 InstrStage<1, [A9_MUX0], 0>,
571 InstrStage<1, [A9_DRegsVFP], 0, Required>,
572 InstrStage<5, [A9_DRegsN], 0, Reserved>,
573 InstrStage<1, [A9_NPipe]>],
574 [4, 1, 1]>,
575 //
576 // Double-precision FP ALU
577 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
578 InstrStage<1, [A9_MUX0], 0>,
579 InstrStage<1, [A9_DRegsVFP], 0, Required>,
580 InstrStage<5, [A9_DRegsN], 0, Reserved>,
581 InstrStage<1, [A9_NPipe]>],
582 [4, 1, 1]>,
583 //
584 // Single-precision FP Multiply
585 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
586 InstrStage<1, [A9_MUX0], 0>,
587 InstrStage<1, [A9_DRegsVFP], 0, Required>,
588 InstrStage<6, [A9_DRegsN], 0, Reserved>,
589 InstrStage<1, [A9_NPipe]>],
590 [5, 1, 1]>,
591 //
592 // Double-precision FP Multiply
593 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
594 InstrStage<1, [A9_MUX0], 0>,
595 InstrStage<1, [A9_DRegsVFP], 0, Required>,
596 InstrStage<7, [A9_DRegsN], 0, Reserved>,
597 InstrStage<2, [A9_NPipe]>],
598 [6, 1, 1]>,
599 //
600 // Single-precision FP MAC
601 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
602 InstrStage<1, [A9_MUX0], 0>,
603 InstrStage<1, [A9_DRegsVFP], 0, Required>,
604 InstrStage<9, [A9_DRegsN], 0, Reserved>,
605 InstrStage<1, [A9_NPipe]>],
606 [8, 1, 1, 1]>,
607 //
608 // Double-precision FP MAC
609 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
610 InstrStage<1, [A9_MUX0], 0>,
611 InstrStage<1, [A9_DRegsVFP], 0, Required>,
612 InstrStage<10, [A9_DRegsN], 0, Reserved>,
613 InstrStage<2, [A9_NPipe]>],
614 [9, 1, 1, 1]>,
615 //
616 // Single-precision Fused FP MAC
617 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
618 InstrStage<1, [A9_MUX0], 0>,
619 InstrStage<1, [A9_DRegsVFP], 0, Required>,
620 InstrStage<9, [A9_DRegsN], 0, Reserved>,
621 InstrStage<1, [A9_NPipe]>],
622 [8, 1, 1, 1]>,
623 //
624 // Double-precision Fused FP MAC
625 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
626 InstrStage<1, [A9_MUX0], 0>,
627 InstrStage<1, [A9_DRegsVFP], 0, Required>,
628 InstrStage<10, [A9_DRegsN], 0, Reserved>,
629 InstrStage<2, [A9_NPipe]>],
630 [9, 1, 1, 1]>,
631 //
632 // Single-precision FP DIV
633 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
634 InstrStage<1, [A9_MUX0], 0>,
635 InstrStage<1, [A9_DRegsVFP], 0, Required>,
636 InstrStage<16, [A9_DRegsN], 0, Reserved>,
637 InstrStage<10, [A9_NPipe]>],
638 [15, 1, 1]>,
639 //
640 // Double-precision FP DIV
641 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
642 InstrStage<1, [A9_MUX0], 0>,
643 InstrStage<1, [A9_DRegsVFP], 0, Required>,
644 InstrStage<26, [A9_DRegsN], 0, Reserved>,
645 InstrStage<20, [A9_NPipe]>],
646 [25, 1, 1]>,
647 //
648 // Single-precision FP SQRT
649 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
650 InstrStage<1, [A9_MUX0], 0>,
651 InstrStage<1, [A9_DRegsVFP], 0, Required>,
652 InstrStage<18, [A9_DRegsN], 0, Reserved>,
653 InstrStage<13, [A9_NPipe]>],
654 [17, 1]>,
655 //
656 // Double-precision FP SQRT
657 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
658 InstrStage<1, [A9_MUX0], 0>,
659 InstrStage<1, [A9_DRegsVFP], 0, Required>,
660 InstrStage<33, [A9_DRegsN], 0, Reserved>,
661 InstrStage<28, [A9_NPipe]>],
662 [32, 1]>,
663
664 //
665 // Integer to Single-precision Move
666 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
667 InstrStage<1, [A9_MUX0], 0>,
668 InstrStage<1, [A9_DRegsVFP], 0, Required>,
669 // Extra 1 latency cycle since wbck is 2 cycles
670 InstrStage<3, [A9_DRegsN], 0, Reserved>,
671 InstrStage<1, [A9_NPipe]>],
672 [1, 1]>,
673 //
674 // Integer to Double-precision Move
675 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676 InstrStage<1, [A9_MUX0], 0>,
677 InstrStage<1, [A9_DRegsVFP], 0, Required>,
678 // Extra 1 latency cycle since wbck is 2 cycles
679 InstrStage<3, [A9_DRegsN], 0, Reserved>,
680 InstrStage<1, [A9_NPipe]>],
681 [1, 1, 1]>,
682 //
683 // Single-precision to Integer Move
684 //
685 // On A9 move-from-VFP is free to issue with no stall if other VFP
686 // operations are in flight. I assume it still can't dual-issue though.
687 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
688 InstrStage<1, [A9_MUX0], 0>],
689 [2, 1]>,
690 //
691 // Double-precision to Integer Move
692 //
693 // On A9 move-from-VFP is free to issue with no stall if other VFP
694 // operations are in flight. I assume it still can't dual-issue though.
695 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
696 InstrStage<1, [A9_MUX0], 0>],
697 [2, 1, 1]>,
698 //
699 // Single-precision FP Load
700 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
701 InstrStage<1, [A9_MUX0], 0>,
702 InstrStage<1, [A9_DRegsVFP], 0, Required>,
703 InstrStage<2, [A9_DRegsN], 0, Reserved>,
704 InstrStage<1, [A9_NPipe], 0>,
705 InstrStage<1, [A9_LSUnit]>],
706 [1, 1]>,
707 //
708 // Double-precision FP Load
709 // FIXME: Result latency is 1 if address is 64-bit aligned.
710 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
711 InstrStage<1, [A9_MUX0], 0>,
712 InstrStage<1, [A9_DRegsVFP], 0, Required>,
713 InstrStage<2, [A9_DRegsN], 0, Reserved>,
714 InstrStage<1, [A9_NPipe], 0>,
715 InstrStage<1, [A9_LSUnit]>],
716 [2, 1]>,
717 //
718 // FP Load Multiple
719 // FIXME: assumes 2 doubles which requires 2 LS cycles.
720 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721 InstrStage<1, [A9_MUX0], 0>,
722 InstrStage<1, [A9_DRegsVFP], 0, Required>,
723 InstrStage<2, [A9_DRegsN], 0, Reserved>,
724 InstrStage<1, [A9_NPipe], 0>,
725 InstrStage<2, [A9_LSUnit]>],
726 [1, 1, 1, 1], [], -1>, // dynamic uops
727 //
728 // FP Load Multiple + update
729 // FIXME: assumes 2 doubles which requires 2 LS cycles.
730 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
731 InstrStage<1, [A9_MUX0], 0>,
732 InstrStage<1, [A9_DRegsVFP], 0, Required>,
733 InstrStage<2, [A9_DRegsN], 0, Reserved>,
734 InstrStage<1, [A9_NPipe], 0>,
735 InstrStage<2, [A9_LSUnit]>],
736 [2, 1, 1, 1], [], -1>, // dynamic uops
737 //
738 // Single-precision FP Store
739 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
740 InstrStage<1, [A9_MUX0], 0>,
741 InstrStage<1, [A9_DRegsVFP], 0, Required>,
742 InstrStage<2, [A9_DRegsN], 0, Reserved>,
743 InstrStage<1, [A9_NPipe], 0>,
744 InstrStage<1, [A9_LSUnit]>],
745 [1, 1]>,
746 //
747 // Double-precision FP Store
748 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749 InstrStage<1, [A9_MUX0], 0>,
750 InstrStage<1, [A9_DRegsVFP], 0, Required>,
751 InstrStage<2, [A9_DRegsN], 0, Reserved>,
752 InstrStage<1, [A9_NPipe], 0>,
753 InstrStage<1, [A9_LSUnit]>],
754 [1, 1]>,
755 //
756 // FP Store Multiple
757 // FIXME: assumes 2 doubles which requires 2 LS cycles.
758 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
759 InstrStage<1, [A9_MUX0], 0>,
760 InstrStage<1, [A9_DRegsVFP], 0, Required>,
761 InstrStage<2, [A9_DRegsN], 0, Reserved>,
762 InstrStage<1, [A9_NPipe], 0>,
763 InstrStage<2, [A9_LSUnit]>],
764 [1, 1, 1, 1], [], -1>, // dynamic uops
765 //
766 // FP Store Multiple + update
767 // FIXME: assumes 2 doubles which requires 2 LS cycles.
768 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
769 InstrStage<1, [A9_MUX0], 0>,
770 InstrStage<1, [A9_DRegsVFP], 0, Required>,
771 InstrStage<2, [A9_DRegsN], 0, Reserved>,
772 InstrStage<1, [A9_NPipe], 0>,
773 InstrStage<2, [A9_LSUnit]>],
774 [2, 1, 1, 1], [], -1>, // dynamic uops
775 // NEON
776 // VLD1
777 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
778 InstrStage<1, [A9_MUX0], 0>,
779 InstrStage<1, [A9_DRegsN], 0, Required>,
780 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
781 InstrStage<1, [A9_NPipe], 0>,
782 InstrStage<1, [A9_LSUnit]>],
783 [1, 1]>,
784 // VLD1x2
785 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
786 InstrStage<1, [A9_MUX0], 0>,
787 InstrStage<1, [A9_DRegsN], 0, Required>,
788 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
789 InstrStage<1, [A9_NPipe], 0>,
790 InstrStage<1, [A9_LSUnit]>],
791 [1, 1, 1]>,
792 // VLD1x3
793 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
794 InstrStage<1, [A9_MUX0], 0>,
795 InstrStage<1, [A9_DRegsN], 0, Required>,
796 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
797 InstrStage<2, [A9_NPipe], 0>,
798 InstrStage<2, [A9_LSUnit]>],
799 [1, 1, 2, 1]>,
800 // VLD1x4
801 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
802 InstrStage<1, [A9_MUX0], 0>,
803 InstrStage<1, [A9_DRegsN], 0, Required>,
804 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
805 InstrStage<2, [A9_NPipe], 0>,
806 InstrStage<2, [A9_LSUnit]>],
807 [1, 1, 2, 2, 1]>,
808 // VLD1u
809 InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
810 InstrStage<1, [A9_MUX0], 0>,
811 InstrStage<1, [A9_DRegsN], 0, Required>,
812 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
813 InstrStage<1, [A9_NPipe], 0>,
814 InstrStage<1, [A9_LSUnit]>],
815 [1, 2, 1]>,
816 // VLD1x2u
817 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
818 InstrStage<1, [A9_MUX0], 0>,
819 InstrStage<1, [A9_DRegsN], 0, Required>,
820 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
821 InstrStage<1, [A9_NPipe], 0>,
822 InstrStage<1, [A9_LSUnit]>],
823 [1, 1, 2, 1]>,
824 // VLD1x3u
825 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
826 InstrStage<1, [A9_MUX0], 0>,
827 InstrStage<1, [A9_DRegsN], 0, Required>,
828 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
829 InstrStage<2, [A9_NPipe], 0>,
830 InstrStage<2, [A9_LSUnit]>],
831 [1, 1, 2, 2, 1]>,
832 // VLD1x4u
833 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
834 InstrStage<1, [A9_MUX0], 0>,
835 InstrStage<1, [A9_DRegsN], 0, Required>,
836 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
837 InstrStage<2, [A9_NPipe], 0>,
838 InstrStage<2, [A9_LSUnit]>],
839 [1, 1, 2, 2, 2, 1]>,
840 //
841 // VLD1ln
842 InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
843 InstrStage<1, [A9_MUX0], 0>,
844 InstrStage<1, [A9_DRegsN], 0, Required>,
845 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
846 InstrStage<2, [A9_NPipe], 0>,
847 InstrStage<2, [A9_LSUnit]>],
848 [3, 1, 1, 1]>,
849 //
850 // VLD1lnu
851 InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
852 InstrStage<1, [A9_MUX0], 0>,
853 InstrStage<1, [A9_DRegsN], 0, Required>,
854 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
855 InstrStage<2, [A9_NPipe], 0>,
856 InstrStage<2, [A9_LSUnit]>],
857 [3, 2, 1, 1, 1, 1]>,
858 //
859 // VLD1dup
860 InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
861 InstrStage<1, [A9_MUX0], 0>,
862 InstrStage<1, [A9_DRegsN], 0, Required>,
863 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
864 InstrStage<1, [A9_NPipe], 0>,
865 InstrStage<1, [A9_LSUnit]>],
866 [2, 1]>,
867 //
868 // VLD1dupu
869 InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
870 InstrStage<1, [A9_MUX0], 0>,
871 InstrStage<1, [A9_DRegsN], 0, Required>,
872 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
873 InstrStage<1, [A9_NPipe], 0>,
874 InstrStage<1, [A9_LSUnit]>],
875 [2, 2, 1, 1]>,
876 //
877 // VLD2
878 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
879 InstrStage<1, [A9_MUX0], 0>,
880 InstrStage<1, [A9_DRegsN], 0, Required>,
881 // Extra latency cycles since wbck is 7 cycles
882 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
883 InstrStage<1, [A9_NPipe], 0>,
884 InstrStage<1, [A9_LSUnit]>],
885 [2, 2, 1]>,
886 //
887 // VLD2x2
888 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
889 InstrStage<1, [A9_MUX0], 0>,
890 InstrStage<1, [A9_DRegsN], 0, Required>,
891 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
892 InstrStage<2, [A9_NPipe], 0>,
893 InstrStage<2, [A9_LSUnit]>],
894 [2, 3, 2, 3, 1]>,
895 //
896 // VLD2ln
897 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
898 InstrStage<1, [A9_MUX0], 0>,
899 InstrStage<1, [A9_DRegsN], 0, Required>,
900 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
901 InstrStage<2, [A9_NPipe], 0>,
902 InstrStage<2, [A9_LSUnit]>],
903 [3, 3, 1, 1, 1, 1]>,
904 //
905 // VLD2u
906 InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
907 InstrStage<1, [A9_MUX0], 0>,
908 InstrStage<1, [A9_DRegsN], 0, Required>,
909 // Extra latency cycles since wbck is 7 cycles
910 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
911 InstrStage<1, [A9_NPipe], 0>,
912 InstrStage<1, [A9_LSUnit]>],
913 [2, 2, 2, 1, 1, 1]>,
914 //
915 // VLD2x2u
916 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
917 InstrStage<1, [A9_MUX0], 0>,
918 InstrStage<1, [A9_DRegsN], 0, Required>,
919 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
920 InstrStage<2, [A9_NPipe], 0>,
921 InstrStage<2, [A9_LSUnit]>],
922 [2, 3, 2, 3, 2, 1]>,
923 //
924 // VLD2lnu
925 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
926 InstrStage<1, [A9_MUX0], 0>,
927 InstrStage<1, [A9_DRegsN], 0, Required>,
928 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
929 InstrStage<2, [A9_NPipe], 0>,
930 InstrStage<2, [A9_LSUnit]>],
931 [3, 3, 2, 1, 1, 1, 1, 1]>,
932 //
933 // VLD2dup
934 InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
935 InstrStage<1, [A9_MUX0], 0>,
936 InstrStage<1, [A9_DRegsN], 0, Required>,
937 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
938 InstrStage<1, [A9_NPipe], 0>,
939 InstrStage<1, [A9_LSUnit]>],
940 [2, 2, 1]>,
941 //
942 // VLD2dupu
943 InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
944 InstrStage<1, [A9_MUX0], 0>,
945 InstrStage<1, [A9_DRegsN], 0, Required>,
946 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
947 InstrStage<1, [A9_NPipe], 0>,
948 InstrStage<1, [A9_LSUnit]>],
949 [2, 2, 2, 1, 1]>,
950 //
951 // VLD3
952 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
953 InstrStage<1, [A9_MUX0], 0>,
954 InstrStage<1, [A9_DRegsN], 0, Required>,
955 InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
956 InstrStage<3, [A9_NPipe], 0>,
957 InstrStage<3, [A9_LSUnit]>],
958 [3, 3, 4, 1]>,
959 //
960 // VLD3ln
961 InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
962 InstrStage<1, [A9_MUX0], 0>,
963 InstrStage<1, [A9_DRegsN], 0, Required>,
964 InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
965 InstrStage<5, [A9_NPipe], 0>,
966 InstrStage<5, [A9_LSUnit]>],
967 [5, 5, 6, 1, 1, 1, 1, 2]>,
968 //
969 // VLD3u
970 InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
971 InstrStage<1, [A9_MUX0], 0>,
972 InstrStage<1, [A9_DRegsN], 0, Required>,
973 InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
974 InstrStage<3, [A9_NPipe], 0>,
975 InstrStage<3, [A9_LSUnit]>],
976 [3, 3, 4, 2, 1]>,
977 //
978 // VLD3lnu
979 InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
980 InstrStage<1, [A9_MUX0], 0>,
981 InstrStage<1, [A9_DRegsN], 0, Required>,
982 InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
983 InstrStage<5, [A9_NPipe], 0>,
984 InstrStage<5, [A9_LSUnit]>],
985 [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
986 //
987 // VLD3dup
988 InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
989 InstrStage<1, [A9_MUX0], 0>,
990 InstrStage<1, [A9_DRegsN], 0, Required>,
991 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
992 InstrStage<3, [A9_NPipe], 0>,
993 InstrStage<3, [A9_LSUnit]>],
994 [3, 3, 4, 1]>,
995 //
996 // VLD3dupu
997 InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
998 InstrStage<1, [A9_MUX0], 0>,
999 InstrStage<1, [A9_DRegsN], 0, Required>,
1000 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1001 InstrStage<3, [A9_NPipe], 0>,
1002 InstrStage<3, [A9_LSUnit]>],
1003 [3, 3, 4, 2, 1, 1]>,
1004 //
1005 // VLD4
1006 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1007 InstrStage<1, [A9_MUX0], 0>,
1008 InstrStage<1, [A9_DRegsN], 0, Required>,
1009 InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1010 InstrStage<3, [A9_NPipe], 0>,
1011 InstrStage<3, [A9_LSUnit]>],
1012 [3, 3, 4, 4, 1]>,
1013 //
1014 // VLD4ln
1015 InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1016 InstrStage<1, [A9_MUX0], 0>,
1017 InstrStage<1, [A9_DRegsN], 0, Required>,
1018 InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1019 InstrStage<4, [A9_NPipe], 0>,
1020 InstrStage<4, [A9_LSUnit]>],
1021 [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
1022 //
1023 // VLD4u
1024 InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1025 InstrStage<1, [A9_MUX0], 0>,
1026 InstrStage<1, [A9_DRegsN], 0, Required>,
1027 InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1028 InstrStage<3, [A9_NPipe], 0>,
1029 InstrStage<3, [A9_LSUnit]>],
1030 [3, 3, 4, 4, 2, 1]>,
1031 //
1032 // VLD4lnu
1033 InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1034 InstrStage<1, [A9_MUX0], 0>,
1035 InstrStage<1, [A9_DRegsN], 0, Required>,
1036 InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1037 InstrStage<4, [A9_NPipe], 0>,
1038 InstrStage<4, [A9_LSUnit]>],
1039 [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
1040 //
1041 // VLD4dup
1042 InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1043 InstrStage<1, [A9_MUX0], 0>,
1044 InstrStage<1, [A9_DRegsN], 0, Required>,
1045 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1046 InstrStage<2, [A9_NPipe], 0>,
1047 InstrStage<2, [A9_LSUnit]>],
1048 [2, 2, 3, 3, 1]>,
1049 //
1050 // VLD4dupu
1051 InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1052 InstrStage<1, [A9_MUX0], 0>,
1053 InstrStage<1, [A9_DRegsN], 0, Required>,
1054 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1055 InstrStage<2, [A9_NPipe], 0>,
1056 InstrStage<2, [A9_LSUnit]>],
1057 [2, 2, 3, 3, 2, 1, 1]>,
1058 //
1059 // VST1
1060 InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1061 InstrStage<1, [A9_MUX0], 0>,
1062 InstrStage<1, [A9_DRegsN], 0, Required>,
1063 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1064 InstrStage<1, [A9_NPipe], 0>,
1065 InstrStage<1, [A9_LSUnit]>],
1066 [1, 1, 1]>,
1067 //
1068 // VST1x2
1069 InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1070 InstrStage<1, [A9_MUX0], 0>,
1071 InstrStage<1, [A9_DRegsN], 0, Required>,
1072 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1073 InstrStage<1, [A9_NPipe], 0>,
1074 InstrStage<1, [A9_LSUnit]>],
1075 [1, 1, 1, 1]>,
1076 //
1077 // VST1x3
1078 InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1079 InstrStage<1, [A9_MUX0], 0>,
1080 InstrStage<1, [A9_DRegsN], 0, Required>,
1081 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1082 InstrStage<2, [A9_NPipe], 0>,
1083 InstrStage<2, [A9_LSUnit]>],
1084 [1, 1, 1, 1, 2]>,
1085 //
1086 // VST1x4
1087 InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1088 InstrStage<1, [A9_MUX0], 0>,
1089 InstrStage<1, [A9_DRegsN], 0, Required>,
1090 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1091 InstrStage<2, [A9_NPipe], 0>,
1092 InstrStage<2, [A9_LSUnit]>],
1093 [1, 1, 1, 1, 2, 2]>,
1094 //
1095 // VST1u
1096 InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1097 InstrStage<1, [A9_MUX0], 0>,
1098 InstrStage<1, [A9_DRegsN], 0, Required>,
1099 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1100 InstrStage<1, [A9_NPipe], 0>,
1101 InstrStage<1, [A9_LSUnit]>],
1102 [2, 1, 1, 1, 1]>,
1103 //
1104 // VST1x2u
1105 InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1106 InstrStage<1, [A9_MUX0], 0>,
1107 InstrStage<1, [A9_DRegsN], 0, Required>,
1108 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1109 InstrStage<1, [A9_NPipe], 0>,
1110 InstrStage<1, [A9_LSUnit]>],
1111 [2, 1, 1, 1, 1, 1]>,
1112 //
1113 // VST1x3u
1114 InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1115 InstrStage<1, [A9_MUX0], 0>,
1116 InstrStage<1, [A9_DRegsN], 0, Required>,
1117 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1118 InstrStage<2, [A9_NPipe], 0>,
1119 InstrStage<2, [A9_LSUnit]>],
1120 [2, 1, 1, 1, 1, 1, 2]>,
1121 //
1122 // VST1x4u
1123 InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1124 InstrStage<1, [A9_MUX0], 0>,
1125 InstrStage<1, [A9_DRegsN], 0, Required>,
1126 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1127 InstrStage<2, [A9_NPipe], 0>,
1128 InstrStage<2, [A9_LSUnit]>],
1129 [2, 1, 1, 1, 1, 1, 2, 2]>,
1130 //
1131 // VST1ln
1132 InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1133 InstrStage<1, [A9_MUX0], 0>,
1134 InstrStage<1, [A9_DRegsN], 0, Required>,
1135 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1136 InstrStage<1, [A9_NPipe], 0>,
1137 InstrStage<1, [A9_LSUnit]>],
1138 [1, 1, 1]>,
1139 //
1140 // VST1lnu
1141 InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1142 InstrStage<1, [A9_MUX0], 0>,
1143 InstrStage<1, [A9_DRegsN], 0, Required>,
1144 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1145 InstrStage<1, [A9_NPipe], 0>,
1146 InstrStage<1, [A9_LSUnit]>],
1147 [2, 1, 1, 1, 1]>,
1148 //
1149 // VST2
1150 InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1151 InstrStage<1, [A9_MUX0], 0>,
1152 InstrStage<1, [A9_DRegsN], 0, Required>,
1153 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1154 InstrStage<1, [A9_NPipe], 0>,
1155 InstrStage<1, [A9_LSUnit]>],
1156 [1, 1, 1, 1]>,
1157 //
1158 // VST2x2
1159 InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1160 InstrStage<1, [A9_MUX0], 0>,
1161 InstrStage<1, [A9_DRegsN], 0, Required>,
1162 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1163 InstrStage<3, [A9_NPipe], 0>,
1164 InstrStage<3, [A9_LSUnit]>],
1165 [1, 1, 1, 1, 2, 2]>,
1166 //
1167 // VST2u
1168 InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1169 InstrStage<1, [A9_MUX0], 0>,
1170 InstrStage<1, [A9_DRegsN], 0, Required>,
1171 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1172 InstrStage<1, [A9_NPipe], 0>,
1173 InstrStage<1, [A9_LSUnit]>],
1174 [2, 1, 1, 1, 1, 1]>,
1175 //
1176 // VST2x2u
1177 InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1178 InstrStage<1, [A9_MUX0], 0>,
1179 InstrStage<1, [A9_DRegsN], 0, Required>,
1180 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1181 InstrStage<3, [A9_NPipe], 0>,
1182 InstrStage<3, [A9_LSUnit]>],
1183 [2, 1, 1, 1, 1, 1, 2, 2]>,
1184 //
1185 // VST2ln
1186 InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1187 InstrStage<1, [A9_MUX0], 0>,
1188 InstrStage<1, [A9_DRegsN], 0, Required>,
1189 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1190 InstrStage<1, [A9_NPipe], 0>,
1191 InstrStage<1, [A9_LSUnit]>],
1192 [1, 1, 1, 1]>,
1193 //
1194 // VST2lnu
1195 InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1196 InstrStage<1, [A9_MUX0], 0>,
1197 InstrStage<1, [A9_DRegsN], 0, Required>,
1198 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1199 InstrStage<1, [A9_NPipe], 0>,
1200 InstrStage<1, [A9_LSUnit]>],
1201 [2, 1, 1, 1, 1, 1]>,
1202 //
1203 // VST3
1204 InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1205 InstrStage<1, [A9_MUX0], 0>,
1206 InstrStage<1, [A9_DRegsN], 0, Required>,
1207 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1208 InstrStage<2, [A9_NPipe], 0>,
1209 InstrStage<2, [A9_LSUnit]>],
1210 [1, 1, 1, 1, 2]>,
1211 //
1212 // VST3u
1213 InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1214 InstrStage<1, [A9_MUX0], 0>,
1215 InstrStage<1, [A9_DRegsN], 0, Required>,
1216 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1217 InstrStage<2, [A9_NPipe], 0>,
1218 InstrStage<2, [A9_LSUnit]>],
1219 [2, 1, 1, 1, 1, 1, 2]>,
1220 //
1221 // VST3ln
1222 InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1223 InstrStage<1, [A9_MUX0], 0>,
1224 InstrStage<1, [A9_DRegsN], 0, Required>,
1225 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1226 InstrStage<3, [A9_NPipe], 0>,
1227 InstrStage<3, [A9_LSUnit]>],
1228 [1, 1, 1, 1, 2]>,
1229 //
1230 // VST3lnu
1231 InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1232 InstrStage<1, [A9_MUX0], 0>,
1233 InstrStage<1, [A9_DRegsN], 0, Required>,
1234 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1235 InstrStage<3, [A9_NPipe], 0>,
1236 InstrStage<3, [A9_LSUnit]>],
1237 [2, 1, 1, 1, 1, 1, 2]>,
1238 //
1239 // VST4
1240 InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1241 InstrStage<1, [A9_MUX0], 0>,
1242 InstrStage<1, [A9_DRegsN], 0, Required>,
1243 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1244 InstrStage<2, [A9_NPipe], 0>,
1245 InstrStage<2, [A9_LSUnit]>],
1246 [1, 1, 1, 1, 2, 2]>,
1247 //
1248 // VST4u
1249 InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1250 InstrStage<1, [A9_MUX0], 0>,
1251 InstrStage<1, [A9_DRegsN], 0, Required>,
1252 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1253 InstrStage<2, [A9_NPipe], 0>,
1254 InstrStage<2, [A9_LSUnit]>],
1255 [2, 1, 1, 1, 1, 1, 2, 2]>,
1256 //
1257 // VST4ln
1258 InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1259 InstrStage<1, [A9_MUX0], 0>,
1260 InstrStage<1, [A9_DRegsN], 0, Required>,
1261 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1262 InstrStage<2, [A9_NPipe], 0>,
1263 InstrStage<2, [A9_LSUnit]>],
1264 [1, 1, 1, 1, 2, 2]>,
1265 //
1266 // VST4lnu
1267 InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1268 InstrStage<1, [A9_MUX0], 0>,
1269 InstrStage<1, [A9_DRegsN], 0, Required>,
1270 InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1271 InstrStage<2, [A9_NPipe], 0>,
1272 InstrStage<2, [A9_LSUnit]>],
1273 [2, 1, 1, 1, 1, 1, 2, 2]>,
1274
1275 //
1276 // Double-register Integer Unary
1277 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1278 InstrStage<1, [A9_MUX0], 0>,
1279 InstrStage<1, [A9_DRegsN], 0, Required>,
1280 // Extra latency cycles since wbck is 6 cycles
1281 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1282 InstrStage<1, [A9_NPipe]>],
1283 [4, 2]>,
1284 //
1285 // Quad-register Integer Unary
1286 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1287 InstrStage<1, [A9_MUX0], 0>,
1288 InstrStage<1, [A9_DRegsN], 0, Required>,
1289 // Extra latency cycles since wbck is 6 cycles
1290 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1291 InstrStage<1, [A9_NPipe]>],
1292 [4, 2]>,
1293 //
1294 // Double-register Integer Q-Unary
1295 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1296 InstrStage<1, [A9_MUX0], 0>,
1297 InstrStage<1, [A9_DRegsN], 0, Required>,
1298 // Extra latency cycles since wbck is 6 cycles
1299 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1300 InstrStage<1, [A9_NPipe]>],
1301 [4, 1]>,
1302 //
1303 // Quad-register Integer CountQ-Unary
1304 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1305 InstrStage<1, [A9_MUX0], 0>,
1306 InstrStage<1, [A9_DRegsN], 0, Required>,
1307 // Extra latency cycles since wbck is 6 cycles
1308 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1309 InstrStage<1, [A9_NPipe]>],
1310 [4, 1]>,
1311 //
1312 // Double-register Integer Binary
1313 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1314 InstrStage<1, [A9_MUX0], 0>,
1315 InstrStage<1, [A9_DRegsN], 0, Required>,
1316 // Extra latency cycles since wbck is 6 cycles
1317 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1318 InstrStage<1, [A9_NPipe]>],
1319 [3, 2, 2]>,
1320 //
1321 // Quad-register Integer Binary
1322 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1323 InstrStage<1, [A9_MUX0], 0>,
1324 InstrStage<1, [A9_DRegsN], 0, Required>,
1325 // Extra latency cycles since wbck is 6 cycles
1326 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1327 InstrStage<1, [A9_NPipe]>],
1328 [3, 2, 2]>,
1329 //
1330 // Double-register Integer Subtract
1331 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1332 InstrStage<1, [A9_MUX0], 0>,
1333 InstrStage<1, [A9_DRegsN], 0, Required>,
1334 // Extra latency cycles since wbck is 6 cycles
1335 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1336 InstrStage<1, [A9_NPipe]>],
1337 [3, 2, 1]>,
1338 //
1339 // Quad-register Integer Subtract
1340 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1341 InstrStage<1, [A9_MUX0], 0>,
1342 InstrStage<1, [A9_DRegsN], 0, Required>,
1343 // Extra latency cycles since wbck is 6 cycles
1344 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1345 InstrStage<1, [A9_NPipe]>],
1346 [3, 2, 1]>,
1347 //
1348 // Double-register Integer Shift
1349 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1350 InstrStage<1, [A9_MUX0], 0>,
1351 InstrStage<1, [A9_DRegsN], 0, Required>,
1352 // Extra latency cycles since wbck is 6 cycles
1353 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1354 InstrStage<1, [A9_NPipe]>],
1355 [3, 1, 1]>,
1356 //
1357 // Quad-register Integer Shift
1358 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1359 InstrStage<1, [A9_MUX0], 0>,
1360 InstrStage<1, [A9_DRegsN], 0, Required>,
1361 // Extra latency cycles since wbck is 6 cycles
1362 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1363 InstrStage<1, [A9_NPipe]>],
1364 [3, 1, 1]>,
1365 //
1366 // Double-register Integer Shift (4 cycle)
1367 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1368 InstrStage<1, [A9_MUX0], 0>,
1369 InstrStage<1, [A9_DRegsN], 0, Required>,
1370 // Extra latency cycles since wbck is 6 cycles
1371 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1372 InstrStage<1, [A9_NPipe]>],
1373 [4, 1, 1]>,
1374 //
1375 // Quad-register Integer Shift (4 cycle)
1376 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1377 InstrStage<1, [A9_MUX0], 0>,
1378 InstrStage<1, [A9_DRegsN], 0, Required>,
1379 // Extra latency cycles since wbck is 6 cycles
1380 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1381 InstrStage<1, [A9_NPipe]>],
1382 [4, 1, 1]>,
1383 //
1384 // Double-register Integer Binary (4 cycle)
1385 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1386 InstrStage<1, [A9_MUX0], 0>,
1387 InstrStage<1, [A9_DRegsN], 0, Required>,
1388 // Extra latency cycles since wbck is 6 cycles
1389 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1390 InstrStage<1, [A9_NPipe]>],
1391 [4, 2, 2]>,
1392 //
1393 // Quad-register Integer Binary (4 cycle)
1394 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1395 InstrStage<1, [A9_MUX0], 0>,
1396 InstrStage<1, [A9_DRegsN], 0, Required>,
1397 // Extra latency cycles since wbck is 6 cycles
1398 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1399 InstrStage<1, [A9_NPipe]>],
1400 [4, 2, 2]>,
1401 //
1402 // Double-register Integer Subtract (4 cycle)
1403 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1404 InstrStage<1, [A9_MUX0], 0>,
1405 InstrStage<1, [A9_DRegsN], 0, Required>,
1406 // Extra latency cycles since wbck is 6 cycles
1407 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1408 InstrStage<1, [A9_NPipe]>],
1409 [4, 2, 1]>,
1410 //
1411 // Quad-register Integer Subtract (4 cycle)
1412 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1413 InstrStage<1, [A9_MUX0], 0>,
1414 InstrStage<1, [A9_DRegsN], 0, Required>,
1415 // Extra latency cycles since wbck is 6 cycles
1416 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1417 InstrStage<1, [A9_NPipe]>],
1418 [4, 2, 1]>,
1419
1420 //
1421 // Double-register Integer Count
1422 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1423 InstrStage<1, [A9_MUX0], 0>,
1424 InstrStage<1, [A9_DRegsN], 0, Required>,
1425 // Extra latency cycles since wbck is 6 cycles
1426 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1427 InstrStage<1, [A9_NPipe]>],
1428 [3, 2, 2]>,
1429 //
1430 // Quad-register Integer Count
1431 // Result written in N3, but that is relative to the last cycle of multicycle,
1432 // so we use 4 for those cases
1433 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1434 InstrStage<1, [A9_MUX0], 0>,
1435 InstrStage<1, [A9_DRegsN], 0, Required>,
1436 // Extra latency cycles since wbck is 7 cycles
1437 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1438 InstrStage<2, [A9_NPipe]>],
1439 [4, 2, 2]>,
1440 //
1441 // Double-register Absolute Difference and Accumulate
1442 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1443 InstrStage<1, [A9_MUX0], 0>,
1444 InstrStage<1, [A9_DRegsN], 0, Required>,
1445 // Extra latency cycles since wbck is 6 cycles
1446 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1447 InstrStage<1, [A9_NPipe]>],
1448 [6, 3, 2, 1]>,
1449 //
1450 // Quad-register Absolute Difference and Accumulate
1451 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1452 InstrStage<1, [A9_MUX0], 0>,
1453 InstrStage<1, [A9_DRegsN], 0, Required>,
1454 // Extra latency cycles since wbck is 6 cycles
1455 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1456 InstrStage<2, [A9_NPipe]>],
1457 [6, 3, 2, 1]>,
1458 //
1459 // Double-register Integer Pair Add Long
1460 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1461 InstrStage<1, [A9_MUX0], 0>,
1462 InstrStage<1, [A9_DRegsN], 0, Required>,
1463 // Extra latency cycles since wbck is 6 cycles
1464 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1465 InstrStage<1, [A9_NPipe]>],
1466 [6, 3, 1]>,
1467 //
1468 // Quad-register Integer Pair Add Long
1469 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1470 InstrStage<1, [A9_MUX0], 0>,
1471 InstrStage<1, [A9_DRegsN], 0, Required>,
1472 // Extra latency cycles since wbck is 6 cycles
1473 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1474 InstrStage<2, [A9_NPipe]>],
1475 [6, 3, 1]>,
1476
1477 //
1478 // Double-register Integer Multiply (.8, .16)
1479 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1480 InstrStage<1, [A9_MUX0], 0>,
1481 InstrStage<1, [A9_DRegsN], 0, Required>,
1482 // Extra latency cycles since wbck is 6 cycles
1483 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1484 InstrStage<1, [A9_NPipe]>],
1485 [6, 2, 2]>,
1486 //
1487 // Quad-register Integer Multiply (.8, .16)
1488 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1489 InstrStage<1, [A9_MUX0], 0>,
1490 InstrStage<1, [A9_DRegsN], 0, Required>,
1491 // Extra latency cycles since wbck is 7 cycles
1492 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1493 InstrStage<2, [A9_NPipe]>],
1494 [7, 2, 2]>,
1495
1496 //
1497 // Double-register Integer Multiply (.32)
1498 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1499 InstrStage<1, [A9_MUX0], 0>,
1500 InstrStage<1, [A9_DRegsN], 0, Required>,
1501 // Extra latency cycles since wbck is 7 cycles
1502 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1503 InstrStage<2, [A9_NPipe]>],
1504 [7, 2, 1]>,
1505 //
1506 // Quad-register Integer Multiply (.32)
1507 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1508 InstrStage<1, [A9_MUX0], 0>,
1509 InstrStage<1, [A9_DRegsN], 0, Required>,
1510 // Extra latency cycles since wbck is 9 cycles
1511 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1512 InstrStage<4, [A9_NPipe]>],
1513 [9, 2, 1]>,
1514 //
1515 // Double-register Integer Multiply-Accumulate (.8, .16)
1516 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1517 InstrStage<1, [A9_MUX0], 0>,
1518 InstrStage<1, [A9_DRegsN], 0, Required>,
1519 // Extra latency cycles since wbck is 6 cycles
1520 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1521 InstrStage<1, [A9_NPipe]>],
1522 [6, 3, 2, 2]>,
1523 //
1524 // Double-register Integer Multiply-Accumulate (.32)
1525 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1526 InstrStage<1, [A9_MUX0], 0>,
1527 InstrStage<1, [A9_DRegsN], 0, Required>,
1528 // Extra latency cycles since wbck is 7 cycles
1529 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1530 InstrStage<2, [A9_NPipe]>],
1531 [7, 3, 2, 1]>,
1532 //
1533 // Quad-register Integer Multiply-Accumulate (.8, .16)
1534 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1535 InstrStage<1, [A9_MUX0], 0>,
1536 InstrStage<1, [A9_DRegsN], 0, Required>,
1537 // Extra latency cycles since wbck is 7 cycles
1538 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1539 InstrStage<2, [A9_NPipe]>],
1540 [7, 3, 2, 2]>,
1541 //
1542 // Quad-register Integer Multiply-Accumulate (.32)
1543 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1544 InstrStage<1, [A9_MUX0], 0>,
1545 InstrStage<1, [A9_DRegsN], 0, Required>,
1546 // Extra latency cycles since wbck is 9 cycles
1547 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1548 InstrStage<4, [A9_NPipe]>],
1549 [9, 3, 2, 1]>,
1550
1551 //
1552 // Move
1553 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1554 InstrStage<1, [A9_MUX0], 0>,
1555 InstrStage<1, [A9_DRegsN], 0, Required>,
1556 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1557 InstrStage<1, [A9_NPipe]>],
1558 [1,1]>,
1559 //
1560 // Move Immediate
1561 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1562 InstrStage<1, [A9_MUX0], 0>,
1563 InstrStage<1, [A9_DRegsN], 0, Required>,
1564 // Extra latency cycles since wbck is 6 cycles
1565 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1566 InstrStage<1, [A9_NPipe]>],
1567 [3]>,
1568 //
1569 // Double-register Permute Move
1570 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1571 InstrStage<1, [A9_MUX0], 0>,
1572 InstrStage<1, [A9_DRegsN], 0, Required>,
1573 // Extra latency cycles since wbck is 6 cycles
1574 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1575 InstrStage<1, [A9_NPipe]>],
1576 [2, 1]>,
1577 //
1578 // Quad-register Permute Move
1579 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1580 InstrStage<1, [A9_MUX0], 0>,
1581 InstrStage<1, [A9_DRegsN], 0, Required>,
1582 // Extra latency cycles since wbck is 6 cycles
1583 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1584 InstrStage<1, [A9_NPipe]>],
1585 [2, 1]>,
1586 //
1587 // Integer to Single-precision Move
1588 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1589 InstrStage<1, [A9_MUX0], 0>,
1590 InstrStage<1, [A9_DRegsN], 0, Required>,
1591 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1592 InstrStage<1, [A9_NPipe]>],
1593 [1, 1]>,
1594 //
1595 // Integer to Double-precision Move
1596 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1597 InstrStage<1, [A9_MUX0], 0>,
1598 InstrStage<1, [A9_DRegsN], 0, Required>,
1599 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1600 InstrStage<1, [A9_NPipe]>],
1601 [1, 1, 1]>,
1602 //
1603 // Single-precision to Integer Move
1604 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1605 InstrStage<1, [A9_MUX0], 0>,
1606 InstrStage<1, [A9_DRegsN], 0, Required>,
1607 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1608 InstrStage<1, [A9_NPipe]>],
1609 [2, 1]>,
1610 //
1611 // Double-precision to Integer Move
1612 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1613 InstrStage<1, [A9_MUX0], 0>,
1614 InstrStage<1, [A9_DRegsN], 0, Required>,
1615 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1616 InstrStage<1, [A9_NPipe]>],
1617 [2, 2, 1]>,
1618 //
1619 // Integer to Lane Move
1620 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1621 InstrStage<1, [A9_MUX0], 0>,
1622 InstrStage<1, [A9_DRegsN], 0, Required>,
1623 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1624 InstrStage<2, [A9_NPipe]>],
1625 [3, 1, 1]>,
1626
1627 //
1628 // Vector narrow move
1629 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1630 InstrStage<1, [A9_MUX0], 0>,
1631 InstrStage<1, [A9_DRegsN], 0, Required>,
1632 // Extra latency cycles since wbck is 6 cycles
1633 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1634 InstrStage<1, [A9_NPipe]>],
1635 [3, 1]>,
1636 //
1637 // Double-register FP Unary
1638 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1639 InstrStage<1, [A9_MUX0], 0>,
1640 InstrStage<1, [A9_DRegsN], 0, Required>,
1641 // Extra latency cycles since wbck is 6 cycles
1642 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1643 InstrStage<1, [A9_NPipe]>],
1644 [5, 2]>,
1645 //
1646 // Quad-register FP Unary
1647 // Result written in N5, but that is relative to the last cycle of multicycle,
1648 // so we use 6 for those cases
1649 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1650 InstrStage<1, [A9_MUX0], 0>,
1651 InstrStage<1, [A9_DRegsN], 0, Required>,
1652 // Extra latency cycles since wbck is 7 cycles
1653 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1654 InstrStage<2, [A9_NPipe]>],
1655 [6, 2]>,
1656 //
1657 // Double-register FP Binary
1658 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1659 // optimistic.
1660 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1661 InstrStage<1, [A9_MUX0], 0>,
1662 InstrStage<1, [A9_DRegsN], 0, Required>,
1663 // Extra latency cycles since wbck is 6 cycles
1664 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1665 InstrStage<1, [A9_NPipe]>],
1666 [5, 2, 2]>,
1667
1668 //
1669 // VPADD, etc.
1670 InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1671 InstrStage<1, [A9_MUX0], 0>,
1672 InstrStage<1, [A9_DRegsN], 0, Required>,
1673 // Extra latency cycles since wbck is 6 cycles
1674 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1675 InstrStage<1, [A9_NPipe]>],
1676 [5, 1, 1]>,
1677 //
1678 // Double-register FP VMUL
1679 InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1680 InstrStage<1, [A9_MUX0], 0>,
1681 InstrStage<1, [A9_DRegsN], 0, Required>,
1682 // Extra latency cycles since wbck is 6 cycles
1683 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1684 InstrStage<1, [A9_NPipe]>],
1685 [5, 2, 1]>,
1686 //
1687 // Quad-register FP Binary
1688 // Result written in N5, but that is relative to the last cycle of multicycle,
1689 // so we use 6 for those cases
1690 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1691 // optimistic.
1692 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1693 InstrStage<1, [A9_MUX0], 0>,
1694 InstrStage<1, [A9_DRegsN], 0, Required>,
1695 // Extra latency cycles since wbck is 7 cycles
1696 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1697 InstrStage<2, [A9_NPipe]>],
1698 [6, 2, 2]>,
1699 //
1700 // Quad-register FP VMUL
1701 InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1702 InstrStage<1, [A9_MUX0], 0>,
1703 InstrStage<1, [A9_DRegsN], 0, Required>,
1704 // Extra latency cycles since wbck is 7 cycles
1705 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1706 InstrStage<1, [A9_NPipe]>],
1707 [6, 2, 1]>,
1708 //
1709 // Double-register FP Multiple-Accumulate
1710 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1711 InstrStage<1, [A9_MUX0], 0>,
1712 InstrStage<1, [A9_DRegsN], 0, Required>,
1713 // Extra latency cycles since wbck is 7 cycles
1714 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1715 InstrStage<2, [A9_NPipe]>],
1716 [6, 3, 2, 1]>,
1717 //
1718 // Quad-register FP Multiple-Accumulate
1719 // Result written in N9, but that is relative to the last cycle of multicycle,
1720 // so we use 10 for those cases
1721 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1722 InstrStage<1, [A9_MUX0], 0>,
1723 InstrStage<1, [A9_DRegsN], 0, Required>,
1724 // Extra latency cycles since wbck is 9 cycles
1725 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1726 InstrStage<4, [A9_NPipe]>],
1727 [8, 4, 2, 1]>,
1728 //
1729 // Double-register Fused FP Multiple-Accumulate
1730 InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1731 InstrStage<1, [A9_MUX0], 0>,
1732 InstrStage<1, [A9_DRegsN], 0, Required>,
1733 // Extra latency cycles since wbck is 7 cycles
1734 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1735 InstrStage<2, [A9_NPipe]>],
1736 [6, 3, 2, 1]>,
1737 //
1738 // Quad-register Fused FP Multiple-Accumulate
1739 // Result written in N9, but that is relative to the last cycle of multicycle,
1740 // so we use 10 for those cases
1741 InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1742 InstrStage<1, [A9_MUX0], 0>,
1743 InstrStage<1, [A9_DRegsN], 0, Required>,
1744 // Extra latency cycles since wbck is 9 cycles
1745 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1746 InstrStage<4, [A9_NPipe]>],
1747 [8, 4, 2, 1]>,
1748 //
1749 // Double-register Reciprical Step
1750 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1751 InstrStage<1, [A9_MUX0], 0>,
1752 InstrStage<1, [A9_DRegsN], 0, Required>,
1753 // Extra latency cycles since wbck is 10 cycles
1754 InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1755 InstrStage<1, [A9_NPipe]>],
1756 [9, 2, 2]>,
1757 //
1758 // Quad-register Reciprical Step
1759 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1760 InstrStage<1, [A9_MUX0], 0>,
1761 InstrStage<1, [A9_DRegsN], 0, Required>,
1762 // Extra latency cycles since wbck is 11 cycles
1763 InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1764 InstrStage<2, [A9_NPipe]>],
1765 [10, 2, 2]>,
1766 //
1767 // Double-register Permute
1768 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1769 InstrStage<1, [A9_MUX0], 0>,
1770 InstrStage<1, [A9_DRegsN], 0, Required>,
1771 // Extra latency cycles since wbck is 6 cycles
1772 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1773 InstrStage<1, [A9_NPipe]>],
1774 [2, 2, 1, 1]>,
1775 //
1776 // Quad-register Permute
1777 // Result written in N2, but that is relative to the last cycle of multicycle,
1778 // so we use 3 for those cases
1779 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1780 InstrStage<1, [A9_MUX0], 0>,
1781 InstrStage<1, [A9_DRegsN], 0, Required>,
1782 // Extra latency cycles since wbck is 7 cycles
1783 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1784 InstrStage<2, [A9_NPipe]>],
1785 [3, 3, 1, 1]>,
1786 //
1787 // Quad-register Permute (3 cycle issue)
1788 // Result written in N2, but that is relative to the last cycle of multicycle,
1789 // so we use 4 for those cases
1790 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1791 InstrStage<1, [A9_MUX0], 0>,
1792 InstrStage<1, [A9_DRegsN], 0, Required>,
1793 // Extra latency cycles since wbck is 8 cycles
1794 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1795 InstrStage<3, [A9_NPipe]>],
1796 [4, 4, 1, 1]>,
1797
1798 //
1799 // Double-register VEXT
1800 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1801 InstrStage<1, [A9_MUX0], 0>,
1802 InstrStage<1, [A9_DRegsN], 0, Required>,
1803 // Extra latency cycles since wbck is 6 cycles
1804 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1805 InstrStage<1, [A9_NPipe]>],
1806 [2, 1, 1]>,
1807 //
1808 // Quad-register VEXT
1809 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1810 InstrStage<1, [A9_MUX0], 0>,
1811 InstrStage<1, [A9_DRegsN], 0, Required>,
1812 // Extra latency cycles since wbck is 7 cycles
1813 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1814 InstrStage<2, [A9_NPipe]>],
1815 [3, 1, 2]>,
1816 //
1817 // VTB
1818 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1819 InstrStage<1, [A9_MUX0], 0>,
1820 InstrStage<1, [A9_DRegsN], 0, Required>,
1821 // Extra latency cycles since wbck is 7 cycles
1822 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1823 InstrStage<2, [A9_NPipe]>],
1824 [3, 2, 1]>,
1825 InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1826 InstrStage<1, [A9_MUX0], 0>,
1827 InstrStage<2, [A9_DRegsN], 0, Required>,
1828 // Extra latency cycles since wbck is 7 cycles
1829 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1830 InstrStage<2, [A9_NPipe]>],
1831 [3, 2, 2, 1]>,
1832 InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1833 InstrStage<1, [A9_MUX0], 0>,
1834 InstrStage<2, [A9_DRegsN], 0, Required>,
1835 // Extra latency cycles since wbck is 8 cycles
1836 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1837 InstrStage<3, [A9_NPipe]>],
1838 [4, 2, 2, 3, 1]>,
1839 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1840 InstrStage<1, [A9_MUX0], 0>,
1841 InstrStage<1, [A9_DRegsN], 0, Required>,
1842 // Extra latency cycles since wbck is 8 cycles
1843 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1844 InstrStage<3, [A9_NPipe]>],
1845 [4, 2, 2, 3, 3, 1]>,
1846 //
1847 // VTBX
1848 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1849 InstrStage<1, [A9_MUX0], 0>,
1850 InstrStage<1, [A9_DRegsN], 0, Required>,
1851 // Extra latency cycles since wbck is 7 cycles
1852 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1853 InstrStage<2, [A9_NPipe]>],
1854 [3, 1, 2, 1]>,
1855 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1856 InstrStage<1, [A9_MUX0], 0>,
1857 InstrStage<1, [A9_DRegsN], 0, Required>,
1858 // Extra latency cycles since wbck is 7 cycles
1859 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1860 InstrStage<2, [A9_NPipe]>],
1861 [3, 1, 2, 2, 1]>,
1862 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1863 InstrStage<1, [A9_MUX0], 0>,
1864 InstrStage<1, [A9_DRegsN], 0, Required>,
1865 // Extra latency cycles since wbck is 8 cycles
1866 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1867 InstrStage<3, [A9_NPipe]>],
1868 [4, 1, 2, 2, 3, 1]>,
1869 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1870 InstrStage<1, [A9_MUX0], 0>,
1871 InstrStage<1, [A9_DRegsN], 0, Required>,
1872 // Extra latency cycles since wbck is 8 cycles
1873 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1874 InstrStage<2, [A9_NPipe]>],
1875 [4, 1, 2, 2, 3, 3, 1]>
1876]>;
1877
1878// ===---------------------------------------------------------------------===//
1879// The following definitions describe the simpler per-operand machine model.
1880// This works with MachineScheduler and will eventually replace itineraries.
1881
1a4d82fc
JJ
1882class A9WriteLMOpsListType<list<WriteSequence> writes> {
1883 list <WriteSequence> Writes = writes;
1884 SchedMachineModel SchedModel = ?;
1885}
223e47cc
LB
1886
1887// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
1888def CortexA9Model : SchedMachineModel {
1889 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
1a4d82fc 1890 let MicroOpBufferSize = 56; // Based on available renamed registers.
223e47cc
LB
1891 let LoadLatency = 2; // Optimistic load latency assuming bypass.
1892 // This is overriden by OperandCycles if the
1893 // Itineraries are queried instead.
1894 let MispredictPenalty = 8; // Based on estimate of pipeline depth.
1895
1896 let Itineraries = CortexA9Itineraries;
1a4d82fc
JJ
1897
1898 // FIXME: Many vector operations were never given an itinerary. We
1899 // haven't mapped these to the new model either.
1900 let CompleteModel = 0;
223e47cc
LB
1901}
1902
1903//===----------------------------------------------------------------------===//
1904// Define each kind of processor resource and number available.
1a4d82fc
JJ
1905//
1906// The AGU unit has BufferSize=1 so that the latency between operations
1907// that use it are considered to stall other operations.
1908//
1909// The FP unit has BufferSize=0 so that it is a hard dispatch
1910// hazard. No instruction may be dispatched while the unit is reserved.
1911
1912let SchedModel = CortexA9Model in {
223e47cc
LB
1913
1914def A9UnitALU : ProcResource<2>;
1915def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
1a4d82fc 1916def A9UnitAGU : ProcResource<1> { let BufferSize = 1; }
223e47cc 1917def A9UnitLS : ProcResource<1>;
1a4d82fc 1918def A9UnitFP : ProcResource<1> { let BufferSize = 0; }
223e47cc
LB
1919def A9UnitB : ProcResource<1>;
1920
1921//===----------------------------------------------------------------------===//
1922// Define scheduler read/write types with their resources and latency on A9.
1923
1924// Consume an issue slot, but no processor resources. This is useful when all
1925// other writes associated with the operand have NumMicroOps = 0.
1926def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
1927
1928// Write an integer register.
1929def A9WriteI : SchedWriteRes<[A9UnitALU]>;
1930// Write an integer shifted-by register
1931def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
1932
1933// Basic ALU.
1a4d82fc 1934def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
223e47cc 1935// ALU with operand shifted by immediate.
1a4d82fc 1936def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
223e47cc 1937// ALU with operand shifted by register.
1a4d82fc 1938def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
223e47cc
LB
1939
1940// Multiplication
1941def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
1942def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
1943 let NumMicroOps = 0; }
1944def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
1945def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
1946 let NumMicroOps = 0; }
1947
1948// Floating-point
1949// Only one FP or AGU instruction may issue per cycle. We model this
1950// by having FP instructions consume the AGU resource.
1951def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
1952def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
1953def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
1954def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
1955def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
1956def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
1957def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
1958def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
1959def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
1960def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
1961
1962// NEON has an odd mix of latencies. Simply name the write types by latency.
1963def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
1964def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
1965def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
1966def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
1967def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
1968def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
1969def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
1970def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
1971def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
1972
1973// Reserve A9UnitFP for 2 consecutive cycles.
1974def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1975 let Latency = 4;
1976 let ResourceCycles = [2];
1977}
1978def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1979 let Latency = 7;
1980 let ResourceCycles = [2];
1981}
1982def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1983 let Latency = 9;
1984 let ResourceCycles = [2];
1985}
1986
1987// Branches don't have a def operand but still consume resources.
1988def A9WriteB : SchedWriteRes<[A9UnitB]>;
1989
1990// Address generation.
1991def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
1992
1993// Load Integer.
1994def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
1995// Load the upper 32-bits using the same micro-op.
1996def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
1997 let NumMicroOps = 0; }
1998// Offset shifted by register.
1999def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
2000// Load (and zero extend) a byte.
2001def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
2002def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
2003
2004// Load or Store Float, aligned.
2005def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
2006
2007// Store Integer.
2008def A9WriteS : SchedWriteRes<[A9UnitLS]>;
2009
2010//===----------------------------------------------------------------------===//
2011// Define resources dynamically for load multiple variants.
2012
2013// Define helpers for extra latency without consuming resources.
2014def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
2015foreach NumCycles = 2-8 in {
2016def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
2017} // foreach NumCycles
2018
223e47cc
LB
2019// Define address generation sequences and predicates for 8 flavors of LDMs.
2020foreach NumAddr = 1-8 in {
2021
2022// Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
2023// latency for instructions that generate multiple loads or stores.
2024def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
2025
2026// Define a predicate to select the LDM based on number of memory addresses.
2027def A9LMAdr#NumAddr#Pred :
1a4d82fc 2028 SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>;
223e47cc
LB
2029
2030} // foreach NumAddr
2031
2032// Fall-back for unknown LDMs.
2033def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">;
2034
2035// LDM/VLDM/VLDn address generation latency & resources.
2036// Dynamically select the A9WriteAdrN sequence using a predicate.
2037def A9WriteLMAdr : SchedWriteVariant<[
2038 SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
2039 SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
2040 SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
2041 SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
2042 SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
2043 SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
2044 SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
2045 SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
2046 // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
2047 SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
2048
2049// Define LDM Resources.
2050// These take no issue resource, so they can be combined with other
2051// writes like WriteB.
2052// A9WriteLMLo takes a single LS resource and 2 cycles.
2053def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
2054 let NumMicroOps = 0; }
2055// Assuming aligned access, the upper half of each pair is free with
2056// the same latency.
2057def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
2058 let NumMicroOps = 0; }
2059// Each A9WriteL#N variant adds N cycles of latency without consuming
2060// additional resources.
2061foreach NumAddr = 1-8 in {
2062def A9WriteL#NumAddr : WriteSequence<
2063 [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2064def A9WriteL#NumAddr#Hi : WriteSequence<
2065 [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2066}
2067
2068//===----------------------------------------------------------------------===//
2069// LDM: Load multiple into 32-bit integer registers.
2070
1a4d82fc
JJ
2071def A9WriteLMOpsList : A9WriteLMOpsListType<
2072 [A9WriteL1, A9WriteL1Hi,
2073 A9WriteL2, A9WriteL2Hi,
2074 A9WriteL3, A9WriteL3Hi,
2075 A9WriteL4, A9WriteL4Hi,
2076 A9WriteL5, A9WriteL5Hi,
2077 A9WriteL6, A9WriteL6Hi,
2078 A9WriteL7, A9WriteL7Hi,
2079 A9WriteL8, A9WriteL8Hi]>;
2080
223e47cc
LB
2081// A9WriteLM variants expand into a pair of writes for each 64-bit
2082// value loaded. When the number of registers is odd, the last
2083// A9WriteLnHi is naturally ignored because the instruction has no
2084// following def operands. These variants take no issue resource, so
2085// they may need to be part of a WriteSequence that includes A9WriteIssue.
2086def A9WriteLM : SchedWriteVariant<[
1a4d82fc
JJ
2087 SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
2088 SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
2089 SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
2090 SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
2091 SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
2092 SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
2093 SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
2094 SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
223e47cc
LB
2095 // For unknown LDMs, define the maximum number of writes, but only
2096 // make the first two consume resources.
2097 SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
2098 A9WriteL2, A9WriteL2Hi,
2099 A9WriteL3Hi, A9WriteL3Hi,
2100 A9WriteL4Hi, A9WriteL4Hi,
2101 A9WriteL5Hi, A9WriteL5Hi,
2102 A9WriteL6Hi, A9WriteL6Hi,
2103 A9WriteL7Hi, A9WriteL7Hi,
2104 A9WriteL8Hi, A9WriteL8Hi]>]> {
2105 let Variadic = 1;
2106}
2107
2108//===----------------------------------------------------------------------===//
2109// VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
2110
2111// A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
2112// so can be used in WriteSequences for in single-issue instructions that
2113// encapsulate multiple loads.
2114def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
2115 let Latency = 1;
2116 let NumMicroOps = 0;
2117}
2118
2119foreach NumAddr = 1-8 in {
2120
2121// Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
2122def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
2123
2124// A9WriteLfp1-8 definitions are statically expanded into a sequence of
2125// A9WriteLfpOps with additive latency that takes a single issue slot.
2126// Used directly to describe NEON VLDn.
2127def A9WriteLfp#NumAddr : WriteSequence<
2128 [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
2129
2130// A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
2131// permuting loaded values.
2132def A9WriteLfp#NumAddr#Mov : WriteSequence<
2133 [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
2134
2135} // foreach NumAddr
2136
2137// Define VLDM/VSTM PreRA resources.
2138// A9WriteLMfpPreRA are dynamically expanded into the correct
2139// A9WriteLfp1-8 sequence based on a predicate. This supports the
2140// preRA VLDM variants in which all 64-bit loads are written to the
2141// same tuple of either single or double precision registers.
2142def A9WriteLMfpPreRA : SchedWriteVariant<[
2143 SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
2144 SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
2145 SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
2146 SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
2147 SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
2148 SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
2149 SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
2150 SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
2151 // For unknown VLDM/VSTM PreRA, assume 2xS registers.
2152 SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
2153
2154// Define VLDM/VSTM PostRA Resources.
2155// A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
2156def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
2157
2158foreach NumAddr = 1-8 in {
2159
2160// Each A9WriteL#N variant adds N cycles of latency without consuming
2161// additional resources.
2162def A9WriteLMfp#NumAddr : WriteSequence<
2163 [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2164
2165// Assuming aligned access, the upper half of each pair is free with
2166// the same latency.
2167def A9WriteLMfp#NumAddr#Hi : WriteSequence<
2168 [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2169
2170} // foreach NumAddr
2171
2172// VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
2173// pair of writes for each 64-bit data loaded. When the number of
2174// registers is odd, the last WriteLMfpnHi is naturally ignored because
2175// the instruction has no following def operands.
1a4d82fc
JJ
2176
2177def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
2178 [A9WriteLMfp1, A9WriteLMfp2, // 0-1
2179 A9WriteLMfp3, A9WriteLMfp4, // 2-3
2180 A9WriteLMfp5, A9WriteLMfp6, // 4-5
2181 A9WriteLMfp7, A9WriteLMfp8, // 6-7
2182 A9WriteLMfp1Hi, // 8-8
2183 A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10
2184 A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12
2185 A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14
2186 A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16
2187 A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18
2188 A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20
2189 A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
2190
223e47cc 2191def A9WriteLMfpPostRA : SchedWriteVariant<[
1a4d82fc
JJ
2192 SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
2193 SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
2194 SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
2195 SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
2196 SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
2197 SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
2198 SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
2199 SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
223e47cc 2200 // For unknown LDMs, define the maximum number of writes, but only
1a4d82fc
JJ
2201 // make the first two consume resources. We are optimizing for the case
2202 // where the operands are DPRs, and this determines the first eight
2203 // types. The remaining eight types are filled to cover the case
2204 // where the operands are SPRs.
2205 SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
2206 A9WriteLMfp3Hi, A9WriteLMfp4Hi,
2207 A9WriteLMfp5Hi, A9WriteLMfp6Hi,
2208 A9WriteLMfp7Hi, A9WriteLMfp8Hi,
223e47cc
LB
2209 A9WriteLMfp5Hi, A9WriteLMfp5Hi,
2210 A9WriteLMfp6Hi, A9WriteLMfp6Hi,
2211 A9WriteLMfp7Hi, A9WriteLMfp7Hi,
2212 A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
2213 let Variadic = 1;
2214}
2215
2216// Distinguish between our multiple MI-level forms of the same
2217// VLDM/VSTM instructions.
2218def A9PreRA : SchedPredicate<
2219 "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">;
2220def A9PostRA : SchedPredicate<
2221 "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">;
2222
2223// VLDM represents all destination registers as a single register
2224// tuple, unlike LDM. So the number of write operands is not variadic.
2225def A9WriteLMfp : SchedWriteVariant<[
2226 SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
2227 SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
2228
2229//===----------------------------------------------------------------------===//
1a4d82fc 2230// Resources for other (non-LDM/VLDM) Variants.
223e47cc
LB
2231
2232// These mov immediate writers are unconditionally expanded with
2233// additive latency.
2234def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
1a4d82fc 2235def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
223e47cc
LB
2236def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
2237
2238// Some ALU operations can read loaded integer values one cycle early.
1a4d82fc 2239def A9ReadALU : SchedReadAdvance<1,
223e47cc
LB
2240 [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
2241 A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
2242 A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
2243 A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
2244 A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
2245
2246// Read types for operands that are unconditionally read in cycle N
2247// after the instruction issues, decreases producer latency by N-1.
2248def A9Read2 : SchedReadAdvance<1>;
2249def A9Read3 : SchedReadAdvance<2>;
2250def A9Read4 : SchedReadAdvance<3>;
2251
2252//===----------------------------------------------------------------------===//
2253// Map itinerary classes to scheduler read/write resources per operand.
2254//
2255// For ARM, we piggyback scheduler resources on the Itinerary classes
2256// to avoid perturbing the existing instruction definitions.
2257
2258// This table follows the ARM Cortex-A9 Technical Reference Manuals,
2259// mostly in order.
223e47cc 2260
1a4d82fc 2261def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
223e47cc
LB
2262 IIC_iMVNi,IIC_iMVNsi,
2263 IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
1a4d82fc 2264def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>;
223e47cc
LB
2265def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
2266
2267def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
2268def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
2269def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
2270
1a4d82fc
JJ
2271def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
2272def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
2273def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
2274def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
2275def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
2276def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
2277def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
2278def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
223e47cc
LB
2279
2280// A9WriteHi ignored for MUL32.
2281def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
2282 IIC_iMUL64,IIC_iMAC64]>;
2283// FIXME: SMLALxx needs itin classes
2284def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
2285
2286// TODO: For floating-point ops, we model the pipeline forwarding
2287// latencies here. WAW latencies are sometimes longer.
2288
2289def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
2290 IIC_fpUNA32, IIC_fpUNA64,
2291 IIC_fpCMP32, IIC_fpCMP64]>;
2292def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
2293def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
2294 IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
2295 IIC_fpALU32, IIC_fpALU64]>;
2296def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
2297def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
2298def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
2299def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
2300def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
2301def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
2302def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
2303def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
2304
2305def :ItinRW<[A9WriteB], [IIC_Br]>;
2306
2307// A9 PLD is processed in a dedicated unit.
2308def :ItinRW<[], [IIC_Preload]>;
2309
2310// Note: We must assume that loads are aligned, since the machine
2311// model cannot know this statically and A9 ignores alignment hints.
2312
2313// A9WriteAdr consumes AGU regardless address writeback. But it's
2314// latency is only relevant for users of an updated address.
2315def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
2316 IIC_iLoad_iu,IIC_iLoad_ru]>;
2317def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
2318def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
2319 IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
2320def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
2321def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
2322 IIC_iLoad_d_ru]>;
2323// Store either has no def operands, or the one def for address writeback.
2324def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
2325 IIC_iStore_iu, IIC_iStore_ru,
2326 IIC_iStore_d_i, IIC_iStore_d_r,
2327 IIC_iStore_d_ru]>;
2328def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
2329 IIC_iStore_bh_i, IIC_iStore_bh_r,
2330 IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
2331def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
2332
2333// A9WriteML will be expanded into a separate write for each def
2334// operand. Address generation consumes resources, but A9WriteLMAdr
2335// is listed after all def operands, so has no effective latency.
2336//
2337// Note: A9WriteLM expands into an even number of def operands. The
2338// actual number of def operands may be less by one.
2339def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
2340
2341// Load multiple with address writeback has an extra def operand in
2342// front of the loaded registers.
2343//
2344// Reuse the load-multiple variants for store-multiple because the
2345// resources are identical, For stores only the address writeback
2346// has a def operand so the WriteL latencies are unused.
2347def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
2348 IIC_iStore_m,
2349 IIC_iStore_mu]>;
2350def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
1a4d82fc 2351def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
223e47cc
LB
2352
2353def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
2354
2355def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
2356def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
2357def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
2358 IIC_fpStore_m, IIC_fpStore_mu]>;
2359
2360// Note: Unlike VLDM, VLD1 expects the writeback operand after the
2361// normal writes.
2362def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
2363 IIC_VLD1x2, IIC_VLD1x2u]>;
2364def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
2365 IIC_VLD1x4, IIC_VLD1x4u,
2366 IIC_VLD4dup, IIC_VLD4dupu]>;
2367def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
2368 IIC_VLD2, IIC_VLD2u,
2369 IIC_VLD2dup, IIC_VLD2dupu]>;
2370def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
2371 IIC_VLD2x2, IIC_VLD2x2u,
2372 IIC_VLD2ln, IIC_VLD2lnu]>;
2373def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
2374 IIC_VLD3dup, IIC_VLD3dupu]>;
2375def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
2376 IIC_VLD4ln, IIC_VLD4lnu]>;
2377def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
2378
2379// Vector stores use similar resources to vector loads, so use the
2380// same write types. The address write must be first for stores with
2381// address writeback.
2382def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
2383 IIC_VST1x2, IIC_VST1x2u,
2384 IIC_VST1ln, IIC_VST1lnu,
2385 IIC_VST2, IIC_VST2u,
2386 IIC_VST2x2, IIC_VST2x2u,
2387 IIC_VST2ln, IIC_VST2lnu]>;
2388def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
2389 IIC_VST1x4, IIC_VST1x4u,
2390 IIC_VST3, IIC_VST3u,
2391 IIC_VST3ln, IIC_VST3lnu,
2392 IIC_VST4, IIC_VST4u,
2393 IIC_VST4ln, IIC_VST4lnu]>;
2394
2395// NEON moves.
2396def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
2397def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
2398def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
2399
2400// NEON integer arithmetic
2401//
2402// VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
2403def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
2404// VSUB/VMVN/VCLSD/VCLZD/VCNTD
2405def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
2406// VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
2407// ...
2408// VHADD/VRHADD/VQADD/VTST/VADH/VRADH
2409def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
1a4d82fc 2410
223e47cc
LB
2411// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
2412def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
2413// VQNEG/VQABS
2414def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
2415// VABS
2416def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
2417// VPADD/VPADDL are mapped later under IIC_SHLi.
2418// ...
2419// VCLSQ/VCLZQ/VCNTQ, takes two cycles.
2420def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
2421// VMOVimm/VMVNimm/VORRimm/VBICimm
2422def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
2423def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
2424def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
2425
2426// NEON integer multiply
2427//
2428// Note: these don't quite match the timing docs, but they do match
2429// the original A9 itinerary.
2430def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
2431def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
2432def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
2433def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
2434def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
2435def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
2436def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
2437def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
2438
2439// NEON integer shift
2440// TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
2441def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
2442def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
2443
2444// NEON permute
1a4d82fc 2445def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
223e47cc
LB
2446def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
2447 [IIC_VPERMQ3, IIC_VEXTQ]>;
2448def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
2449def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
2450def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
2451def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
2452def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
2453def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
2454def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
2455def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
2456 [IIC_VTBX4]>;
2457
2458// NEON floating-point
2459def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
2460def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
2461def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
2462def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
2463def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
2464def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
2465def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
2466def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
1a4d82fc
JJ
2467
2468// Map SchedRWs that are identical for cortexa9 to existing resources.
2469def : SchedAlias<WriteALU, A9WriteALU>;
2470def : SchedAlias<WriteALUsr, A9WriteALUsr>;
2471def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
2472def : SchedAlias<ReadALU, A9ReadALU>;
2473def : SchedAlias<ReadALUsr, A9ReadALU>;
2474def : InstRW< [WriteALU],
2475 (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
2476 "BICrr")>;
2477def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>;
2478def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>;
2479
2480
2481def : SchedAlias<WriteCMP, A9WriteALU>;
2482def : SchedAlias<WriteCMPsi, A9WriteALU>;
2483def : SchedAlias<WriteCMPsr, A9WriteALU>;
2484
2485def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
2486 "MOVCCsr")>;
2487def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
2488def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm",
2489 "MOV_ga_dyn")>;
2490def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
2491def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
2492
2493def : InstRW< [WriteALU], (instregex "SEL")>;
2494
2495def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>;
2496
2497def : InstRW< [A9WriteM],
2498 (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS",
2499 "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
2500def : InstRW< [A9WriteM, A9WriteMHi],
2501 (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
2502 "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB",
2503 "SMLALTT")>;
2504// FIXME: These instructions used to have NoItinerary. Just copied the one from above.
2505def : InstRW< [A9WriteM, A9WriteMHi],
2506 (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
2507 "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
2508
2509def : InstRW<[A9WriteM16, A9WriteM16Hi],
2510 (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
2511def : InstRW<[A9WriteM16, A9WriteM16Hi],
2512 (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>;
2513
2514def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>;
2515def : InstRW<[A9WriteLsi], (instregex "LDRrs")>;
2516def : InstRW<[A9WriteLb],
2517 (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB",
2518 "LDRH", "LDRSH", "LDRSB")>;
2519def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
2520
2521def : WriteRes<WriteDiv, []> { let Latency = 0; }
2522
2523def : WriteRes<WriteBr, [A9UnitB]>;
2524def : WriteRes<WriteBrL, [A9UnitB]>;
2525def : WriteRes<WriteBrTbl, [A9UnitB]>;
2526def : WriteRes<WritePreLd, []>;
2527def : SchedAlias<WriteCvtFP, A9WriteF>;
2528def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
223e47cc 2529} // SchedModel = CortexA9Model