]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file defines the itinerary class data for the ARM Cortex A9 processors. | |
11 | // | |
12 | //===----------------------------------------------------------------------===// | |
13 | ||
14 | // ===---------------------------------------------------------------------===// | |
15 | // This section contains legacy support for itineraries. This is | |
16 | // required until SD and PostRA schedulers are replaced by MachineScheduler. | |
17 | ||
18 | // | |
19 | // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical | |
20 | // Reference Manual". | |
21 | // | |
22 | // Functional units | |
23 | def A9_Issue0 : FuncUnit; // Issue 0 | |
24 | def A9_Issue1 : FuncUnit; // Issue 1 | |
25 | def A9_Branch : FuncUnit; // Branch | |
26 | def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0 | |
27 | def A9_ALU1 : FuncUnit; // ALU pipeline 1 | |
28 | def A9_AGU : FuncUnit; // Address generation unit for ld / st | |
29 | def A9_NPipe : FuncUnit; // NEON pipeline | |
30 | def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer | |
31 | def A9_LSUnit : FuncUnit; // L/S Unit | |
32 | def A9_DRegsVFP: FuncUnit; // FP register set, VFP side | |
33 | def A9_DRegsN : FuncUnit; // FP register set, NEON side | |
34 | ||
35 | // Bypasses | |
36 | def A9_LdBypass : Bypass; | |
37 | ||
38 | def CortexA9Itineraries : ProcessorItineraries< | |
39 | [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0, | |
40 | A9_LSUnit, A9_DRegsVFP, A9_DRegsN], | |
41 | [A9_LdBypass], [ | |
42 | // Two fully-pipelined integer ALU pipelines | |
43 | ||
44 | // | |
45 | // Move instructions, unconditional | |
46 | InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
47 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, | |
48 | InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
49 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, | |
50 | InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
51 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, | |
52 | InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
53 | InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, | |
54 | InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
55 | InstrStage<1, [A9_ALU0, A9_ALU1]>, | |
56 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, | |
57 | InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
58 | InstrStage<1, [A9_ALU0, A9_ALU1]>, | |
59 | InstrStage<1, [A9_ALU0, A9_ALU1]>, | |
60 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>, | |
61 | InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
62 | InstrStage<1, [A9_ALU0, A9_ALU1]>, | |
63 | InstrStage<1, [A9_ALU0, A9_ALU1]>, | |
64 | InstrStage<1, [A9_MUX0], 0>, | |
65 | InstrStage<1, [A9_AGU], 0>, | |
66 | InstrStage<1, [A9_LSUnit]>], [5]>, | |
67 | // | |
68 | // MVN instructions | |
69 | InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
70 | InstrStage<1, [A9_ALU0, A9_ALU1]>], | |
71 | [1]>, | |
72 | InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
73 | InstrStage<1, [A9_ALU0, A9_ALU1]>], | |
74 | [1, 1], [NoBypass, A9_LdBypass]>, | |
75 | InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
76 | InstrStage<2, [A9_ALU0, A9_ALU1]>], | |
77 | [2, 1]>, | |
78 | InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
79 | InstrStage<3, [A9_ALU0, A9_ALU1]>], | |
80 | [3, 1, 1]>, | |
81 | // | |
82 | // No operand cycles | |
83 | InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
84 | InstrStage<1, [A9_ALU0, A9_ALU1]>]>, | |
85 | // | |
86 | // Binary Instructions that produce a result | |
87 | InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
88 | InstrStage<1, [A9_ALU0, A9_ALU1]>], | |
89 | [1, 1], [NoBypass, A9_LdBypass]>, | |
90 | InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
91 | InstrStage<1, [A9_ALU0, A9_ALU1]>], | |
92 | [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>, | |
93 | InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
94 | InstrStage<2, [A9_ALU0, A9_ALU1]>], | |
95 | [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>, | |
96 | InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
97 | InstrStage<2, [A9_ALU0, A9_ALU1]>], | |
98 | [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>, | |
99 | InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
100 | InstrStage<3, [A9_ALU0, A9_ALU1]>], | |
101 | [3, 1, 1, 1], | |
102 | [NoBypass, A9_LdBypass, NoBypass, NoBypass]>, | |
103 | // | |
104 | // Bitwise Instructions that produce a result | |
105 | InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
106 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, | |
107 | InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
108 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, | |
109 | InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
110 | InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, | |
111 | InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
112 | InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, | |
113 | // | |
114 | // Unary Instructions that produce a result | |
115 | ||
116 | // CLZ, RBIT, etc. | |
117 | InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
118 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, | |
119 | ||
120 | // BFC, BFI, UBFX, SBFX | |
121 | InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
122 | InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>, | |
123 | ||
124 | // | |
125 | // Zero and sign extension instructions | |
126 | InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
127 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>, | |
128 | InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
129 | InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>, | |
130 | InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
131 | InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, | |
132 | // | |
133 | // Compare instructions | |
134 | InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
135 | InstrStage<1, [A9_ALU0, A9_ALU1]>], | |
136 | [1], [A9_LdBypass]>, | |
137 | InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
138 | InstrStage<1, [A9_ALU0, A9_ALU1]>], | |
139 | [1, 1], [A9_LdBypass, A9_LdBypass]>, | |
140 | InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
141 | InstrStage<2, [A9_ALU0, A9_ALU1]>], | |
142 | [1, 1], [A9_LdBypass, NoBypass]>, | |
143 | InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
144 | InstrStage<3, [A9_ALU0, A9_ALU1]>], | |
145 | [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>, | |
146 | // | |
147 | // Test instructions | |
148 | InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
149 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, | |
150 | InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
151 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, | |
152 | InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
153 | InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>, | |
154 | InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
155 | InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, | |
156 | // | |
157 | // Move instructions, conditional | |
158 | // FIXME: Correctly model the extra input dep on the destination. | |
159 | InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
160 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, | |
161 | InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
162 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, | |
163 | InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
164 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, | |
165 | InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
166 | InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, | |
167 | InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
168 | InstrStage<1, [A9_ALU0, A9_ALU1]>, | |
169 | InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
170 | InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, | |
171 | ||
172 | // Integer multiply pipeline | |
173 | // | |
174 | InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
175 | InstrStage<2, [A9_ALU0]>], [3, 1, 1]>, | |
176 | InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
177 | InstrStage<2, [A9_ALU0]>], | |
178 | [3, 1, 1, 1]>, | |
179 | InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
180 | InstrStage<2, [A9_ALU0]>], [4, 1, 1]>, | |
181 | InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
182 | InstrStage<2, [A9_ALU0]>], | |
183 | [4, 1, 1, 1]>, | |
184 | InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
185 | InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>, | |
186 | InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
187 | InstrStage<3, [A9_ALU0]>], | |
188 | [4, 5, 1, 1]>, | |
189 | // Integer load pipeline | |
190 | // FIXME: The timings are some rough approximations | |
191 | // | |
192 | // Immediate offset | |
193 | InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
194 | InstrStage<1, [A9_MUX0], 0>, | |
195 | InstrStage<1, [A9_AGU], 0>, | |
196 | InstrStage<1, [A9_LSUnit]>], | |
197 | [3, 1], [A9_LdBypass]>, | |
198 | InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
199 | InstrStage<1, [A9_MUX0], 0>, | |
200 | InstrStage<2, [A9_AGU], 0>, | |
201 | InstrStage<1, [A9_LSUnit]>], | |
202 | [4, 1], [A9_LdBypass]>, | |
203 | // FIXME: If address is 64-bit aligned, AGU cycles is 1. | |
204 | InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
205 | InstrStage<1, [A9_MUX0], 0>, | |
206 | InstrStage<2, [A9_AGU], 0>, | |
207 | InstrStage<1, [A9_LSUnit]>], | |
208 | [3, 3, 1], [A9_LdBypass]>, | |
209 | // | |
210 | // Register offset | |
211 | InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
212 | InstrStage<1, [A9_MUX0], 0>, | |
213 | InstrStage<1, [A9_AGU], 0>, | |
214 | InstrStage<1, [A9_LSUnit]>], | |
215 | [3, 1, 1], [A9_LdBypass]>, | |
216 | InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
217 | InstrStage<1, [A9_MUX0], 0>, | |
218 | InstrStage<2, [A9_AGU], 0>, | |
219 | InstrStage<1, [A9_LSUnit]>], | |
220 | [4, 1, 1], [A9_LdBypass]>, | |
221 | InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
222 | InstrStage<1, [A9_MUX0], 0>, | |
223 | InstrStage<2, [A9_AGU], 0>, | |
224 | InstrStage<1, [A9_LSUnit]>], | |
225 | [3, 3, 1, 1], [A9_LdBypass]>, | |
226 | // | |
227 | // Scaled register offset | |
228 | InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
229 | InstrStage<1, [A9_MUX0], 0>, | |
230 | InstrStage<1, [A9_AGU], 0>, | |
231 | InstrStage<1, [A9_LSUnit], 0>], | |
232 | [4, 1, 1], [A9_LdBypass]>, | |
233 | InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
234 | InstrStage<1, [A9_MUX0], 0>, | |
235 | InstrStage<2, [A9_AGU], 0>, | |
236 | InstrStage<1, [A9_LSUnit]>], | |
237 | [5, 1, 1], [A9_LdBypass]>, | |
238 | // | |
239 | // Immediate offset with update | |
240 | InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
241 | InstrStage<1, [A9_MUX0], 0>, | |
242 | InstrStage<1, [A9_AGU], 0>, | |
243 | InstrStage<1, [A9_LSUnit]>], | |
244 | [3, 2, 1], [A9_LdBypass]>, | |
245 | InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
246 | InstrStage<1, [A9_MUX0], 0>, | |
247 | InstrStage<2, [A9_AGU], 0>, | |
248 | InstrStage<1, [A9_LSUnit]>], | |
249 | [4, 3, 1], [A9_LdBypass]>, | |
250 | // | |
251 | // Register offset with update | |
252 | InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
253 | InstrStage<1, [A9_MUX0], 0>, | |
254 | InstrStage<1, [A9_AGU], 0>, | |
255 | InstrStage<1, [A9_LSUnit]>], | |
256 | [3, 2, 1, 1], [A9_LdBypass]>, | |
257 | InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
258 | InstrStage<1, [A9_MUX0], 0>, | |
259 | InstrStage<2, [A9_AGU], 0>, | |
260 | InstrStage<1, [A9_LSUnit]>], | |
261 | [4, 3, 1, 1], [A9_LdBypass]>, | |
262 | InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
263 | InstrStage<1, [A9_MUX0], 0>, | |
264 | InstrStage<2, [A9_AGU], 0>, | |
265 | InstrStage<1, [A9_LSUnit]>], | |
266 | [3, 3, 1, 1], [A9_LdBypass]>, | |
267 | // | |
268 | // Scaled register offset with update | |
269 | InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
270 | InstrStage<1, [A9_MUX0], 0>, | |
271 | InstrStage<1, [A9_AGU], 0>, | |
272 | InstrStage<1, [A9_LSUnit]>], | |
273 | [4, 3, 1, 1], [A9_LdBypass]>, | |
274 | InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
275 | InstrStage<1, [A9_MUX0], 0>, | |
276 | InstrStage<2, [A9_AGU], 0>, | |
277 | InstrStage<1, [A9_LSUnit]>], | |
278 | [5, 4, 1, 1], [A9_LdBypass]>, | |
279 | // | |
280 | // Load multiple, def is the 5th operand. | |
281 | // FIXME: This assumes 3 to 4 registers. | |
282 | InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
283 | InstrStage<1, [A9_MUX0], 0>, | |
284 | InstrStage<2, [A9_AGU], 1>, | |
285 | InstrStage<2, [A9_LSUnit]>], | |
286 | [1, 1, 1, 1, 3], | |
287 | [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], | |
288 | -1>, // dynamic uops | |
289 | // | |
290 | // Load multiple + update, defs are the 1st and 5th operands. | |
291 | InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
292 | InstrStage<1, [A9_MUX0], 0>, | |
293 | InstrStage<2, [A9_AGU], 1>, | |
294 | InstrStage<2, [A9_LSUnit]>], | |
295 | [2, 1, 1, 1, 3], | |
296 | [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], | |
297 | -1>, // dynamic uops | |
298 | // | |
299 | // Load multiple plus branch | |
300 | InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
301 | InstrStage<1, [A9_MUX0], 0>, | |
302 | InstrStage<1, [A9_AGU], 1>, | |
303 | InstrStage<2, [A9_LSUnit]>, | |
304 | InstrStage<1, [A9_Branch]>], | |
305 | [1, 2, 1, 1, 3], | |
306 | [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], | |
307 | -1>, // dynamic uops | |
308 | // | |
309 | // Pop, def is the 3rd operand. | |
310 | InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
311 | InstrStage<1, [A9_MUX0], 0>, | |
312 | InstrStage<2, [A9_AGU], 1>, | |
313 | InstrStage<2, [A9_LSUnit]>], | |
314 | [1, 1, 3], | |
315 | [NoBypass, NoBypass, A9_LdBypass], | |
316 | -1>, // dynamic uops | |
317 | // | |
318 | // Pop + branch, def is the 3rd operand. | |
319 | InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
320 | InstrStage<1, [A9_MUX0], 0>, | |
321 | InstrStage<2, [A9_AGU], 1>, | |
322 | InstrStage<2, [A9_LSUnit]>, | |
323 | InstrStage<1, [A9_Branch]>], | |
324 | [1, 1, 3], | |
325 | [NoBypass, NoBypass, A9_LdBypass], | |
326 | -1>, // dynamic uops | |
327 | // | |
328 | // iLoadi + iALUr for t2LDRpci_pic. | |
329 | InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
330 | InstrStage<1, [A9_MUX0], 0>, | |
331 | InstrStage<1, [A9_AGU], 0>, | |
332 | InstrStage<1, [A9_LSUnit]>, | |
333 | InstrStage<1, [A9_ALU0, A9_ALU1]>], | |
334 | [2, 1]>, | |
335 | ||
336 | // Integer store pipeline | |
337 | /// | |
338 | // Immediate offset | |
339 | InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
340 | InstrStage<1, [A9_MUX0], 0>, | |
341 | InstrStage<1, [A9_AGU], 0>, | |
342 | InstrStage<1, [A9_LSUnit]>], [1, 1]>, | |
343 | InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
344 | InstrStage<1, [A9_MUX0], 0>, | |
345 | InstrStage<2, [A9_AGU], 1>, | |
346 | InstrStage<1, [A9_LSUnit]>], [1, 1]>, | |
347 | // FIXME: If address is 64-bit aligned, AGU cycles is 1. | |
348 | InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
349 | InstrStage<1, [A9_MUX0], 0>, | |
350 | InstrStage<2, [A9_AGU], 1>, | |
351 | InstrStage<1, [A9_LSUnit]>], [1, 1]>, | |
352 | // | |
353 | // Register offset | |
354 | InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
355 | InstrStage<1, [A9_MUX0], 0>, | |
356 | InstrStage<1, [A9_AGU], 0>, | |
357 | InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, | |
358 | InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
359 | InstrStage<1, [A9_MUX0], 0>, | |
360 | InstrStage<2, [A9_AGU], 1>, | |
361 | InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, | |
362 | InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
363 | InstrStage<1, [A9_MUX0], 0>, | |
364 | InstrStage<2, [A9_AGU], 1>, | |
365 | InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, | |
366 | // | |
367 | // Scaled register offset | |
368 | InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
369 | InstrStage<1, [A9_MUX0], 0>, | |
370 | InstrStage<1, [A9_AGU], 0>, | |
371 | InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, | |
372 | InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
373 | InstrStage<1, [A9_MUX0], 0>, | |
374 | InstrStage<2, [A9_AGU], 1>, | |
375 | InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, | |
376 | // | |
377 | // Immediate offset with update | |
378 | InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
379 | InstrStage<1, [A9_MUX0], 0>, | |
380 | InstrStage<1, [A9_AGU], 0>, | |
381 | InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>, | |
382 | InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
383 | InstrStage<1, [A9_MUX0], 0>, | |
384 | InstrStage<2, [A9_AGU], 1>, | |
385 | InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>, | |
386 | // | |
387 | // Register offset with update | |
388 | InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
389 | InstrStage<1, [A9_MUX0], 0>, | |
390 | InstrStage<1, [A9_AGU], 0>, | |
391 | InstrStage<1, [A9_LSUnit]>], | |
392 | [2, 1, 1, 1]>, | |
393 | InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
394 | InstrStage<1, [A9_MUX0], 0>, | |
395 | InstrStage<2, [A9_AGU], 1>, | |
396 | InstrStage<1, [A9_LSUnit]>], | |
397 | [3, 1, 1, 1]>, | |
398 | InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
399 | InstrStage<1, [A9_MUX0], 0>, | |
400 | InstrStage<2, [A9_AGU], 1>, | |
401 | InstrStage<1, [A9_LSUnit]>], | |
402 | [3, 1, 1, 1]>, | |
403 | // | |
404 | // Scaled register offset with update | |
405 | InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
406 | InstrStage<1, [A9_MUX0], 0>, | |
407 | InstrStage<1, [A9_AGU], 0>, | |
408 | InstrStage<1, [A9_LSUnit]>], | |
409 | [2, 1, 1, 1]>, | |
410 | InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
411 | InstrStage<1, [A9_MUX0], 0>, | |
412 | InstrStage<2, [A9_AGU], 1>, | |
413 | InstrStage<1, [A9_LSUnit]>], | |
414 | [3, 1, 1, 1]>, | |
415 | // | |
416 | // Store multiple | |
417 | InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
418 | InstrStage<1, [A9_MUX0], 0>, | |
419 | InstrStage<1, [A9_AGU], 0>, | |
420 | InstrStage<2, [A9_LSUnit]>], | |
421 | [], [], -1>, // dynamic uops | |
422 | // | |
423 | // Store multiple + update | |
424 | InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
425 | InstrStage<1, [A9_MUX0], 0>, | |
426 | InstrStage<1, [A9_AGU], 0>, | |
427 | InstrStage<2, [A9_LSUnit]>], | |
428 | [2], [], -1>, // dynamic uops | |
429 | // | |
430 | // Preload | |
431 | InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>, | |
432 | ||
433 | // Branch | |
434 | // | |
435 | // no delay slots, so the latency of a branch is unimportant | |
436 | InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>, | |
437 | InstrStage<1, [A9_Issue1], 0>, | |
438 | InstrStage<1, [A9_Branch]>]>, | |
439 | ||
440 | // VFP and NEON shares the same register file. This means that every VFP | |
441 | // instruction should wait for full completion of the consecutive NEON | |
442 | // instruction and vice-versa. We model this behavior with two artificial FUs: | |
443 | // DRegsVFP and DRegsVFP. | |
444 | // | |
445 | // Every VFP instruction: | |
446 | // - Acquires DRegsVFP resource for 1 cycle | |
447 | // - Reserves DRegsN resource for the whole duration (including time to | |
448 | // register file writeback!). | |
449 | // Every NEON instruction does the same but with FUs swapped. | |
450 | // | |
451 | // Since the reserved FU cannot be acquired, this models precisely | |
452 | // "cross-domain" stalls. | |
453 | ||
454 | // VFP | |
455 | // Issue through integer pipeline, and execute in NEON unit. | |
456 | ||
457 | // FP Special Register to Integer Register File Move | |
458 | InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
459 | InstrStage<1, [A9_MUX0], 0>, | |
460 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
461 | InstrStage<2, [A9_DRegsN], 0, Reserved>, | |
462 | InstrStage<1, [A9_NPipe]>], | |
463 | [1]>, | |
464 | // | |
465 | // Single-precision FP Unary | |
466 | InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
467 | InstrStage<1, [A9_MUX0], 0>, | |
468 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
469 | // Extra latency cycles since wbck is 2 cycles | |
470 | InstrStage<3, [A9_DRegsN], 0, Reserved>, | |
471 | InstrStage<1, [A9_NPipe]>], | |
472 | [1, 1]>, | |
473 | // | |
474 | // Double-precision FP Unary | |
475 | InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
476 | InstrStage<1, [A9_MUX0], 0>, | |
477 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
478 | // Extra latency cycles since wbck is 2 cycles | |
479 | InstrStage<3, [A9_DRegsN], 0, Reserved>, | |
480 | InstrStage<1, [A9_NPipe]>], | |
481 | [1, 1]>, | |
482 | ||
483 | // | |
484 | // Single-precision FP Compare | |
485 | InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
486 | InstrStage<1, [A9_MUX0], 0>, | |
487 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
488 | // Extra latency cycles since wbck is 4 cycles | |
489 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
490 | InstrStage<1, [A9_NPipe]>], | |
491 | [1, 1]>, | |
492 | // | |
493 | // Double-precision FP Compare | |
494 | InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
495 | InstrStage<1, [A9_MUX0], 0>, | |
496 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
497 | // Extra latency cycles since wbck is 4 cycles | |
498 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
499 | InstrStage<1, [A9_NPipe]>], | |
500 | [1, 1]>, | |
501 | // | |
502 | // Single to Double FP Convert | |
503 | InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
504 | InstrStage<1, [A9_MUX0], 0>, | |
505 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
506 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
507 | InstrStage<1, [A9_NPipe]>], | |
508 | [4, 1]>, | |
509 | // | |
510 | // Double to Single FP Convert | |
511 | InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
512 | InstrStage<1, [A9_MUX0], 0>, | |
513 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
514 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
515 | InstrStage<1, [A9_NPipe]>], | |
516 | [4, 1]>, | |
517 | ||
518 | // | |
519 | // Single to Half FP Convert | |
520 | InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
521 | InstrStage<1, [A9_MUX0], 0>, | |
522 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
523 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
524 | InstrStage<1, [A9_NPipe]>], | |
525 | [4, 1]>, | |
526 | // | |
527 | // Half to Single FP Convert | |
528 | InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
529 | InstrStage<1, [A9_MUX0], 0>, | |
530 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
531 | InstrStage<3, [A9_DRegsN], 0, Reserved>, | |
532 | InstrStage<1, [A9_NPipe]>], | |
533 | [2, 1]>, | |
534 | ||
535 | // | |
536 | // Single-Precision FP to Integer Convert | |
537 | InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
538 | InstrStage<1, [A9_MUX0], 0>, | |
539 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
540 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
541 | InstrStage<1, [A9_NPipe]>], | |
542 | [4, 1]>, | |
543 | // | |
544 | // Double-Precision FP to Integer Convert | |
545 | InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
546 | InstrStage<1, [A9_MUX0], 0>, | |
547 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
548 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
549 | InstrStage<1, [A9_NPipe]>], | |
550 | [4, 1]>, | |
551 | // | |
552 | // Integer to Single-Precision FP Convert | |
553 | InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
554 | InstrStage<1, [A9_MUX0], 0>, | |
555 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
556 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
557 | InstrStage<1, [A9_NPipe]>], | |
558 | [4, 1]>, | |
559 | // | |
560 | // Integer to Double-Precision FP Convert | |
561 | InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
562 | InstrStage<1, [A9_MUX0], 0>, | |
563 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
564 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
565 | InstrStage<1, [A9_NPipe]>], | |
566 | [4, 1]>, | |
567 | // | |
568 | // Single-precision FP ALU | |
569 | InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
570 | InstrStage<1, [A9_MUX0], 0>, | |
571 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
572 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
573 | InstrStage<1, [A9_NPipe]>], | |
574 | [4, 1, 1]>, | |
575 | // | |
576 | // Double-precision FP ALU | |
577 | InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
578 | InstrStage<1, [A9_MUX0], 0>, | |
579 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
580 | InstrStage<5, [A9_DRegsN], 0, Reserved>, | |
581 | InstrStage<1, [A9_NPipe]>], | |
582 | [4, 1, 1]>, | |
583 | // | |
584 | // Single-precision FP Multiply | |
585 | InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
586 | InstrStage<1, [A9_MUX0], 0>, | |
587 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
588 | InstrStage<6, [A9_DRegsN], 0, Reserved>, | |
589 | InstrStage<1, [A9_NPipe]>], | |
590 | [5, 1, 1]>, | |
591 | // | |
592 | // Double-precision FP Multiply | |
593 | InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
594 | InstrStage<1, [A9_MUX0], 0>, | |
595 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
596 | InstrStage<7, [A9_DRegsN], 0, Reserved>, | |
597 | InstrStage<2, [A9_NPipe]>], | |
598 | [6, 1, 1]>, | |
599 | // | |
600 | // Single-precision FP MAC | |
601 | InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
602 | InstrStage<1, [A9_MUX0], 0>, | |
603 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
604 | InstrStage<9, [A9_DRegsN], 0, Reserved>, | |
605 | InstrStage<1, [A9_NPipe]>], | |
606 | [8, 1, 1, 1]>, | |
607 | // | |
608 | // Double-precision FP MAC | |
609 | InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
610 | InstrStage<1, [A9_MUX0], 0>, | |
611 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
612 | InstrStage<10, [A9_DRegsN], 0, Reserved>, | |
613 | InstrStage<2, [A9_NPipe]>], | |
614 | [9, 1, 1, 1]>, | |
615 | // | |
616 | // Single-precision Fused FP MAC | |
617 | InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
618 | InstrStage<1, [A9_MUX0], 0>, | |
619 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
620 | InstrStage<9, [A9_DRegsN], 0, Reserved>, | |
621 | InstrStage<1, [A9_NPipe]>], | |
622 | [8, 1, 1, 1]>, | |
623 | // | |
624 | // Double-precision Fused FP MAC | |
625 | InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
626 | InstrStage<1, [A9_MUX0], 0>, | |
627 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
628 | InstrStage<10, [A9_DRegsN], 0, Reserved>, | |
629 | InstrStage<2, [A9_NPipe]>], | |
630 | [9, 1, 1, 1]>, | |
631 | // | |
632 | // Single-precision FP DIV | |
633 | InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
634 | InstrStage<1, [A9_MUX0], 0>, | |
635 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
636 | InstrStage<16, [A9_DRegsN], 0, Reserved>, | |
637 | InstrStage<10, [A9_NPipe]>], | |
638 | [15, 1, 1]>, | |
639 | // | |
640 | // Double-precision FP DIV | |
641 | InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
642 | InstrStage<1, [A9_MUX0], 0>, | |
643 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
644 | InstrStage<26, [A9_DRegsN], 0, Reserved>, | |
645 | InstrStage<20, [A9_NPipe]>], | |
646 | [25, 1, 1]>, | |
647 | // | |
648 | // Single-precision FP SQRT | |
649 | InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
650 | InstrStage<1, [A9_MUX0], 0>, | |
651 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
652 | InstrStage<18, [A9_DRegsN], 0, Reserved>, | |
653 | InstrStage<13, [A9_NPipe]>], | |
654 | [17, 1]>, | |
655 | // | |
656 | // Double-precision FP SQRT | |
657 | InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
658 | InstrStage<1, [A9_MUX0], 0>, | |
659 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
660 | InstrStage<33, [A9_DRegsN], 0, Reserved>, | |
661 | InstrStage<28, [A9_NPipe]>], | |
662 | [32, 1]>, | |
663 | ||
664 | // | |
665 | // Integer to Single-precision Move | |
666 | InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
667 | InstrStage<1, [A9_MUX0], 0>, | |
668 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
669 | // Extra 1 latency cycle since wbck is 2 cycles | |
670 | InstrStage<3, [A9_DRegsN], 0, Reserved>, | |
671 | InstrStage<1, [A9_NPipe]>], | |
672 | [1, 1]>, | |
673 | // | |
674 | // Integer to Double-precision Move | |
675 | InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
676 | InstrStage<1, [A9_MUX0], 0>, | |
677 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
678 | // Extra 1 latency cycle since wbck is 2 cycles | |
679 | InstrStage<3, [A9_DRegsN], 0, Reserved>, | |
680 | InstrStage<1, [A9_NPipe]>], | |
681 | [1, 1, 1]>, | |
682 | // | |
683 | // Single-precision to Integer Move | |
684 | // | |
685 | // On A9 move-from-VFP is free to issue with no stall if other VFP | |
686 | // operations are in flight. I assume it still can't dual-issue though. | |
687 | InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
688 | InstrStage<1, [A9_MUX0], 0>], | |
689 | [2, 1]>, | |
690 | // | |
691 | // Double-precision to Integer Move | |
692 | // | |
693 | // On A9 move-from-VFP is free to issue with no stall if other VFP | |
694 | // operations are in flight. I assume it still can't dual-issue though. | |
695 | InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
696 | InstrStage<1, [A9_MUX0], 0>], | |
697 | [2, 1, 1]>, | |
698 | // | |
699 | // Single-precision FP Load | |
700 | InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
701 | InstrStage<1, [A9_MUX0], 0>, | |
702 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
703 | InstrStage<2, [A9_DRegsN], 0, Reserved>, | |
704 | InstrStage<1, [A9_NPipe], 0>, | |
705 | InstrStage<1, [A9_LSUnit]>], | |
706 | [1, 1]>, | |
707 | // | |
708 | // Double-precision FP Load | |
709 | // FIXME: Result latency is 1 if address is 64-bit aligned. | |
710 | InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
711 | InstrStage<1, [A9_MUX0], 0>, | |
712 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
713 | InstrStage<2, [A9_DRegsN], 0, Reserved>, | |
714 | InstrStage<1, [A9_NPipe], 0>, | |
715 | InstrStage<1, [A9_LSUnit]>], | |
716 | [2, 1]>, | |
717 | // | |
718 | // FP Load Multiple | |
719 | // FIXME: assumes 2 doubles which requires 2 LS cycles. | |
720 | InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
721 | InstrStage<1, [A9_MUX0], 0>, | |
722 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
723 | InstrStage<2, [A9_DRegsN], 0, Reserved>, | |
724 | InstrStage<1, [A9_NPipe], 0>, | |
725 | InstrStage<2, [A9_LSUnit]>], | |
726 | [1, 1, 1, 1], [], -1>, // dynamic uops | |
727 | // | |
728 | // FP Load Multiple + update | |
729 | // FIXME: assumes 2 doubles which requires 2 LS cycles. | |
730 | InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
731 | InstrStage<1, [A9_MUX0], 0>, | |
732 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
733 | InstrStage<2, [A9_DRegsN], 0, Reserved>, | |
734 | InstrStage<1, [A9_NPipe], 0>, | |
735 | InstrStage<2, [A9_LSUnit]>], | |
736 | [2, 1, 1, 1], [], -1>, // dynamic uops | |
737 | // | |
738 | // Single-precision FP Store | |
739 | InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
740 | InstrStage<1, [A9_MUX0], 0>, | |
741 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
742 | InstrStage<2, [A9_DRegsN], 0, Reserved>, | |
743 | InstrStage<1, [A9_NPipe], 0>, | |
744 | InstrStage<1, [A9_LSUnit]>], | |
745 | [1, 1]>, | |
746 | // | |
747 | // Double-precision FP Store | |
748 | InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
749 | InstrStage<1, [A9_MUX0], 0>, | |
750 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
751 | InstrStage<2, [A9_DRegsN], 0, Reserved>, | |
752 | InstrStage<1, [A9_NPipe], 0>, | |
753 | InstrStage<1, [A9_LSUnit]>], | |
754 | [1, 1]>, | |
755 | // | |
756 | // FP Store Multiple | |
757 | // FIXME: assumes 2 doubles which requires 2 LS cycles. | |
758 | InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
759 | InstrStage<1, [A9_MUX0], 0>, | |
760 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
761 | InstrStage<2, [A9_DRegsN], 0, Reserved>, | |
762 | InstrStage<1, [A9_NPipe], 0>, | |
763 | InstrStage<2, [A9_LSUnit]>], | |
764 | [1, 1, 1, 1], [], -1>, // dynamic uops | |
765 | // | |
766 | // FP Store Multiple + update | |
767 | // FIXME: assumes 2 doubles which requires 2 LS cycles. | |
768 | InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
769 | InstrStage<1, [A9_MUX0], 0>, | |
770 | InstrStage<1, [A9_DRegsVFP], 0, Required>, | |
771 | InstrStage<2, [A9_DRegsN], 0, Reserved>, | |
772 | InstrStage<1, [A9_NPipe], 0>, | |
773 | InstrStage<2, [A9_LSUnit]>], | |
774 | [2, 1, 1, 1], [], -1>, // dynamic uops | |
775 | // NEON | |
776 | // VLD1 | |
777 | InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
778 | InstrStage<1, [A9_MUX0], 0>, | |
779 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
780 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
781 | InstrStage<1, [A9_NPipe], 0>, | |
782 | InstrStage<1, [A9_LSUnit]>], | |
783 | [1, 1]>, | |
784 | // VLD1x2 | |
785 | InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
786 | InstrStage<1, [A9_MUX0], 0>, | |
787 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
788 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
789 | InstrStage<1, [A9_NPipe], 0>, | |
790 | InstrStage<1, [A9_LSUnit]>], | |
791 | [1, 1, 1]>, | |
792 | // VLD1x3 | |
793 | InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
794 | InstrStage<1, [A9_MUX0], 0>, | |
795 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
796 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
797 | InstrStage<2, [A9_NPipe], 0>, | |
798 | InstrStage<2, [A9_LSUnit]>], | |
799 | [1, 1, 2, 1]>, | |
800 | // VLD1x4 | |
801 | InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
802 | InstrStage<1, [A9_MUX0], 0>, | |
803 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
804 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
805 | InstrStage<2, [A9_NPipe], 0>, | |
806 | InstrStage<2, [A9_LSUnit]>], | |
807 | [1, 1, 2, 2, 1]>, | |
808 | // VLD1u | |
809 | InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
810 | InstrStage<1, [A9_MUX0], 0>, | |
811 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
812 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
813 | InstrStage<1, [A9_NPipe], 0>, | |
814 | InstrStage<1, [A9_LSUnit]>], | |
815 | [1, 2, 1]>, | |
816 | // VLD1x2u | |
817 | InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
818 | InstrStage<1, [A9_MUX0], 0>, | |
819 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
820 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
821 | InstrStage<1, [A9_NPipe], 0>, | |
822 | InstrStage<1, [A9_LSUnit]>], | |
823 | [1, 1, 2, 1]>, | |
824 | // VLD1x3u | |
825 | InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
826 | InstrStage<1, [A9_MUX0], 0>, | |
827 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
828 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
829 | InstrStage<2, [A9_NPipe], 0>, | |
830 | InstrStage<2, [A9_LSUnit]>], | |
831 | [1, 1, 2, 2, 1]>, | |
832 | // VLD1x4u | |
833 | InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
834 | InstrStage<1, [A9_MUX0], 0>, | |
835 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
836 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
837 | InstrStage<2, [A9_NPipe], 0>, | |
838 | InstrStage<2, [A9_LSUnit]>], | |
839 | [1, 1, 2, 2, 2, 1]>, | |
840 | // | |
841 | // VLD1ln | |
842 | InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
843 | InstrStage<1, [A9_MUX0], 0>, | |
844 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
845 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
846 | InstrStage<2, [A9_NPipe], 0>, | |
847 | InstrStage<2, [A9_LSUnit]>], | |
848 | [3, 1, 1, 1]>, | |
849 | // | |
850 | // VLD1lnu | |
851 | InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
852 | InstrStage<1, [A9_MUX0], 0>, | |
853 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
854 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
855 | InstrStage<2, [A9_NPipe], 0>, | |
856 | InstrStage<2, [A9_LSUnit]>], | |
857 | [3, 2, 1, 1, 1, 1]>, | |
858 | // | |
859 | // VLD1dup | |
860 | InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
861 | InstrStage<1, [A9_MUX0], 0>, | |
862 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
863 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
864 | InstrStage<1, [A9_NPipe], 0>, | |
865 | InstrStage<1, [A9_LSUnit]>], | |
866 | [2, 1]>, | |
867 | // | |
868 | // VLD1dupu | |
869 | InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
870 | InstrStage<1, [A9_MUX0], 0>, | |
871 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
872 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
873 | InstrStage<1, [A9_NPipe], 0>, | |
874 | InstrStage<1, [A9_LSUnit]>], | |
875 | [2, 2, 1, 1]>, | |
876 | // | |
877 | // VLD2 | |
878 | InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
879 | InstrStage<1, [A9_MUX0], 0>, | |
880 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
881 | // Extra latency cycles since wbck is 7 cycles | |
882 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
883 | InstrStage<1, [A9_NPipe], 0>, | |
884 | InstrStage<1, [A9_LSUnit]>], | |
885 | [2, 2, 1]>, | |
886 | // | |
887 | // VLD2x2 | |
888 | InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
889 | InstrStage<1, [A9_MUX0], 0>, | |
890 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
891 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
892 | InstrStage<2, [A9_NPipe], 0>, | |
893 | InstrStage<2, [A9_LSUnit]>], | |
894 | [2, 3, 2, 3, 1]>, | |
895 | // | |
896 | // VLD2ln | |
897 | InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
898 | InstrStage<1, [A9_MUX0], 0>, | |
899 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
900 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
901 | InstrStage<2, [A9_NPipe], 0>, | |
902 | InstrStage<2, [A9_LSUnit]>], | |
903 | [3, 3, 1, 1, 1, 1]>, | |
904 | // | |
905 | // VLD2u | |
906 | InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
907 | InstrStage<1, [A9_MUX0], 0>, | |
908 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
909 | // Extra latency cycles since wbck is 7 cycles | |
910 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
911 | InstrStage<1, [A9_NPipe], 0>, | |
912 | InstrStage<1, [A9_LSUnit]>], | |
913 | [2, 2, 2, 1, 1, 1]>, | |
914 | // | |
915 | // VLD2x2u | |
916 | InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
917 | InstrStage<1, [A9_MUX0], 0>, | |
918 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
919 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
920 | InstrStage<2, [A9_NPipe], 0>, | |
921 | InstrStage<2, [A9_LSUnit]>], | |
922 | [2, 3, 2, 3, 2, 1]>, | |
923 | // | |
924 | // VLD2lnu | |
925 | InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
926 | InstrStage<1, [A9_MUX0], 0>, | |
927 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
928 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
929 | InstrStage<2, [A9_NPipe], 0>, | |
930 | InstrStage<2, [A9_LSUnit]>], | |
931 | [3, 3, 2, 1, 1, 1, 1, 1]>, | |
932 | // | |
933 | // VLD2dup | |
934 | InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
935 | InstrStage<1, [A9_MUX0], 0>, | |
936 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
937 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
938 | InstrStage<1, [A9_NPipe], 0>, | |
939 | InstrStage<1, [A9_LSUnit]>], | |
940 | [2, 2, 1]>, | |
941 | // | |
942 | // VLD2dupu | |
943 | InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
944 | InstrStage<1, [A9_MUX0], 0>, | |
945 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
946 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
947 | InstrStage<1, [A9_NPipe], 0>, | |
948 | InstrStage<1, [A9_LSUnit]>], | |
949 | [2, 2, 2, 1, 1]>, | |
950 | // | |
951 | // VLD3 | |
952 | InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
953 | InstrStage<1, [A9_MUX0], 0>, | |
954 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
955 | InstrStage<9,[A9_DRegsVFP], 0, Reserved>, | |
956 | InstrStage<3, [A9_NPipe], 0>, | |
957 | InstrStage<3, [A9_LSUnit]>], | |
958 | [3, 3, 4, 1]>, | |
959 | // | |
960 | // VLD3ln | |
961 | InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
962 | InstrStage<1, [A9_MUX0], 0>, | |
963 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
964 | InstrStage<11,[A9_DRegsVFP], 0, Reserved>, | |
965 | InstrStage<5, [A9_NPipe], 0>, | |
966 | InstrStage<5, [A9_LSUnit]>], | |
967 | [5, 5, 6, 1, 1, 1, 1, 2]>, | |
968 | // | |
969 | // VLD3u | |
970 | InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
971 | InstrStage<1, [A9_MUX0], 0>, | |
972 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
973 | InstrStage<9,[A9_DRegsVFP], 0, Reserved>, | |
974 | InstrStage<3, [A9_NPipe], 0>, | |
975 | InstrStage<3, [A9_LSUnit]>], | |
976 | [3, 3, 4, 2, 1]>, | |
977 | // | |
978 | // VLD3lnu | |
979 | InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
980 | InstrStage<1, [A9_MUX0], 0>, | |
981 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
982 | InstrStage<11,[A9_DRegsVFP], 0, Reserved>, | |
983 | InstrStage<5, [A9_NPipe], 0>, | |
984 | InstrStage<5, [A9_LSUnit]>], | |
985 | [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>, | |
986 | // | |
987 | // VLD3dup | |
988 | InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
989 | InstrStage<1, [A9_MUX0], 0>, | |
990 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
991 | InstrStage<9, [A9_DRegsVFP], 0, Reserved>, | |
992 | InstrStage<3, [A9_NPipe], 0>, | |
993 | InstrStage<3, [A9_LSUnit]>], | |
994 | [3, 3, 4, 1]>, | |
995 | // | |
996 | // VLD3dupu | |
997 | InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
998 | InstrStage<1, [A9_MUX0], 0>, | |
999 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1000 | InstrStage<9, [A9_DRegsVFP], 0, Reserved>, | |
1001 | InstrStage<3, [A9_NPipe], 0>, | |
1002 | InstrStage<3, [A9_LSUnit]>], | |
1003 | [3, 3, 4, 2, 1, 1]>, | |
1004 | // | |
1005 | // VLD4 | |
1006 | InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1007 | InstrStage<1, [A9_MUX0], 0>, | |
1008 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1009 | InstrStage<9,[A9_DRegsVFP], 0, Reserved>, | |
1010 | InstrStage<3, [A9_NPipe], 0>, | |
1011 | InstrStage<3, [A9_LSUnit]>], | |
1012 | [3, 3, 4, 4, 1]>, | |
1013 | // | |
1014 | // VLD4ln | |
1015 | InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1016 | InstrStage<1, [A9_MUX0], 0>, | |
1017 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1018 | InstrStage<10,[A9_DRegsVFP], 0, Reserved>, | |
1019 | InstrStage<4, [A9_NPipe], 0>, | |
1020 | InstrStage<4, [A9_LSUnit]>], | |
1021 | [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, | |
1022 | // | |
1023 | // VLD4u | |
1024 | InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1025 | InstrStage<1, [A9_MUX0], 0>, | |
1026 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1027 | InstrStage<9,[A9_DRegsVFP], 0, Reserved>, | |
1028 | InstrStage<3, [A9_NPipe], 0>, | |
1029 | InstrStage<3, [A9_LSUnit]>], | |
1030 | [3, 3, 4, 4, 2, 1]>, | |
1031 | // | |
1032 | // VLD4lnu | |
1033 | InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1034 | InstrStage<1, [A9_MUX0], 0>, | |
1035 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1036 | InstrStage<10,[A9_DRegsVFP], 0, Reserved>, | |
1037 | InstrStage<4, [A9_NPipe], 0>, | |
1038 | InstrStage<4, [A9_LSUnit]>], | |
1039 | [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, | |
1040 | // | |
1041 | // VLD4dup | |
1042 | InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1043 | InstrStage<1, [A9_MUX0], 0>, | |
1044 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1045 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1046 | InstrStage<2, [A9_NPipe], 0>, | |
1047 | InstrStage<2, [A9_LSUnit]>], | |
1048 | [2, 2, 3, 3, 1]>, | |
1049 | // | |
1050 | // VLD4dupu | |
1051 | InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1052 | InstrStage<1, [A9_MUX0], 0>, | |
1053 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1054 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1055 | InstrStage<2, [A9_NPipe], 0>, | |
1056 | InstrStage<2, [A9_LSUnit]>], | |
1057 | [2, 2, 3, 3, 2, 1, 1]>, | |
1058 | // | |
1059 | // VST1 | |
1060 | InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1061 | InstrStage<1, [A9_MUX0], 0>, | |
1062 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1063 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1064 | InstrStage<1, [A9_NPipe], 0>, | |
1065 | InstrStage<1, [A9_LSUnit]>], | |
1066 | [1, 1, 1]>, | |
1067 | // | |
1068 | // VST1x2 | |
1069 | InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1070 | InstrStage<1, [A9_MUX0], 0>, | |
1071 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1072 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1073 | InstrStage<1, [A9_NPipe], 0>, | |
1074 | InstrStage<1, [A9_LSUnit]>], | |
1075 | [1, 1, 1, 1]>, | |
1076 | // | |
1077 | // VST1x3 | |
1078 | InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1079 | InstrStage<1, [A9_MUX0], 0>, | |
1080 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1081 | InstrStage<2, [A9_DRegsVFP], 0, Reserved>, | |
1082 | InstrStage<2, [A9_NPipe], 0>, | |
1083 | InstrStage<2, [A9_LSUnit]>], | |
1084 | [1, 1, 1, 1, 2]>, | |
1085 | // | |
1086 | // VST1x4 | |
1087 | InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1088 | InstrStage<1, [A9_MUX0], 0>, | |
1089 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1090 | InstrStage<2, [A9_DRegsVFP], 0, Reserved>, | |
1091 | InstrStage<2, [A9_NPipe], 0>, | |
1092 | InstrStage<2, [A9_LSUnit]>], | |
1093 | [1, 1, 1, 1, 2, 2]>, | |
1094 | // | |
1095 | // VST1u | |
1096 | InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1097 | InstrStage<1, [A9_MUX0], 0>, | |
1098 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1099 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1100 | InstrStage<1, [A9_NPipe], 0>, | |
1101 | InstrStage<1, [A9_LSUnit]>], | |
1102 | [2, 1, 1, 1, 1]>, | |
1103 | // | |
1104 | // VST1x2u | |
1105 | InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1106 | InstrStage<1, [A9_MUX0], 0>, | |
1107 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1108 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1109 | InstrStage<1, [A9_NPipe], 0>, | |
1110 | InstrStage<1, [A9_LSUnit]>], | |
1111 | [2, 1, 1, 1, 1, 1]>, | |
1112 | // | |
1113 | // VST1x3u | |
1114 | InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1115 | InstrStage<1, [A9_MUX0], 0>, | |
1116 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1117 | InstrStage<2, [A9_DRegsVFP], 0, Reserved>, | |
1118 | InstrStage<2, [A9_NPipe], 0>, | |
1119 | InstrStage<2, [A9_LSUnit]>], | |
1120 | [2, 1, 1, 1, 1, 1, 2]>, | |
1121 | // | |
1122 | // VST1x4u | |
1123 | InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1124 | InstrStage<1, [A9_MUX0], 0>, | |
1125 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1126 | InstrStage<2, [A9_DRegsVFP], 0, Reserved>, | |
1127 | InstrStage<2, [A9_NPipe], 0>, | |
1128 | InstrStage<2, [A9_LSUnit]>], | |
1129 | [2, 1, 1, 1, 1, 1, 2, 2]>, | |
1130 | // | |
1131 | // VST1ln | |
1132 | InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1133 | InstrStage<1, [A9_MUX0], 0>, | |
1134 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1135 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1136 | InstrStage<1, [A9_NPipe], 0>, | |
1137 | InstrStage<1, [A9_LSUnit]>], | |
1138 | [1, 1, 1]>, | |
1139 | // | |
1140 | // VST1lnu | |
1141 | InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1142 | InstrStage<1, [A9_MUX0], 0>, | |
1143 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1144 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1145 | InstrStage<1, [A9_NPipe], 0>, | |
1146 | InstrStage<1, [A9_LSUnit]>], | |
1147 | [2, 1, 1, 1, 1]>, | |
1148 | // | |
1149 | // VST2 | |
1150 | InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1151 | InstrStage<1, [A9_MUX0], 0>, | |
1152 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1153 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1154 | InstrStage<1, [A9_NPipe], 0>, | |
1155 | InstrStage<1, [A9_LSUnit]>], | |
1156 | [1, 1, 1, 1]>, | |
1157 | // | |
1158 | // VST2x2 | |
1159 | InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1160 | InstrStage<1, [A9_MUX0], 0>, | |
1161 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1162 | InstrStage<3, [A9_DRegsVFP], 0, Reserved>, | |
1163 | InstrStage<3, [A9_NPipe], 0>, | |
1164 | InstrStage<3, [A9_LSUnit]>], | |
1165 | [1, 1, 1, 1, 2, 2]>, | |
1166 | // | |
1167 | // VST2u | |
1168 | InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1169 | InstrStage<1, [A9_MUX0], 0>, | |
1170 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1171 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1172 | InstrStage<1, [A9_NPipe], 0>, | |
1173 | InstrStage<1, [A9_LSUnit]>], | |
1174 | [2, 1, 1, 1, 1, 1]>, | |
1175 | // | |
1176 | // VST2x2u | |
1177 | InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1178 | InstrStage<1, [A9_MUX0], 0>, | |
1179 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1180 | InstrStage<3, [A9_DRegsVFP], 0, Reserved>, | |
1181 | InstrStage<3, [A9_NPipe], 0>, | |
1182 | InstrStage<3, [A9_LSUnit]>], | |
1183 | [2, 1, 1, 1, 1, 1, 2, 2]>, | |
1184 | // | |
1185 | // VST2ln | |
1186 | InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1187 | InstrStage<1, [A9_MUX0], 0>, | |
1188 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1189 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1190 | InstrStage<1, [A9_NPipe], 0>, | |
1191 | InstrStage<1, [A9_LSUnit]>], | |
1192 | [1, 1, 1, 1]>, | |
1193 | // | |
1194 | // VST2lnu | |
1195 | InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1196 | InstrStage<1, [A9_MUX0], 0>, | |
1197 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1198 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1199 | InstrStage<1, [A9_NPipe], 0>, | |
1200 | InstrStage<1, [A9_LSUnit]>], | |
1201 | [2, 1, 1, 1, 1, 1]>, | |
1202 | // | |
1203 | // VST3 | |
1204 | InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1205 | InstrStage<1, [A9_MUX0], 0>, | |
1206 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1207 | InstrStage<2, [A9_DRegsVFP], 0, Reserved>, | |
1208 | InstrStage<2, [A9_NPipe], 0>, | |
1209 | InstrStage<2, [A9_LSUnit]>], | |
1210 | [1, 1, 1, 1, 2]>, | |
1211 | // | |
1212 | // VST3u | |
1213 | InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1214 | InstrStage<1, [A9_MUX0], 0>, | |
1215 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1216 | InstrStage<2, [A9_DRegsVFP], 0, Reserved>, | |
1217 | InstrStage<2, [A9_NPipe], 0>, | |
1218 | InstrStage<2, [A9_LSUnit]>], | |
1219 | [2, 1, 1, 1, 1, 1, 2]>, | |
1220 | // | |
1221 | // VST3ln | |
1222 | InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1223 | InstrStage<1, [A9_MUX0], 0>, | |
1224 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1225 | InstrStage<3, [A9_DRegsVFP], 0, Reserved>, | |
1226 | InstrStage<3, [A9_NPipe], 0>, | |
1227 | InstrStage<3, [A9_LSUnit]>], | |
1228 | [1, 1, 1, 1, 2]>, | |
1229 | // | |
1230 | // VST3lnu | |
1231 | InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1232 | InstrStage<1, [A9_MUX0], 0>, | |
1233 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1234 | InstrStage<3, [A9_DRegsVFP], 0, Reserved>, | |
1235 | InstrStage<3, [A9_NPipe], 0>, | |
1236 | InstrStage<3, [A9_LSUnit]>], | |
1237 | [2, 1, 1, 1, 1, 1, 2]>, | |
1238 | // | |
1239 | // VST4 | |
1240 | InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1241 | InstrStage<1, [A9_MUX0], 0>, | |
1242 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1243 | InstrStage<2, [A9_DRegsVFP], 0, Reserved>, | |
1244 | InstrStage<2, [A9_NPipe], 0>, | |
1245 | InstrStage<2, [A9_LSUnit]>], | |
1246 | [1, 1, 1, 1, 2, 2]>, | |
1247 | // | |
1248 | // VST4u | |
1249 | InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1250 | InstrStage<1, [A9_MUX0], 0>, | |
1251 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1252 | InstrStage<2, [A9_DRegsVFP], 0, Reserved>, | |
1253 | InstrStage<2, [A9_NPipe], 0>, | |
1254 | InstrStage<2, [A9_LSUnit]>], | |
1255 | [2, 1, 1, 1, 1, 1, 2, 2]>, | |
1256 | // | |
1257 | // VST4ln | |
1258 | InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1259 | InstrStage<1, [A9_MUX0], 0>, | |
1260 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1261 | InstrStage<2, [A9_DRegsVFP], 0, Reserved>, | |
1262 | InstrStage<2, [A9_NPipe], 0>, | |
1263 | InstrStage<2, [A9_LSUnit]>], | |
1264 | [1, 1, 1, 1, 2, 2]>, | |
1265 | // | |
1266 | // VST4lnu | |
1267 | InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1268 | InstrStage<1, [A9_MUX0], 0>, | |
1269 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1270 | InstrStage<2, [A9_DRegsVFP], 0, Reserved>, | |
1271 | InstrStage<2, [A9_NPipe], 0>, | |
1272 | InstrStage<2, [A9_LSUnit]>], | |
1273 | [2, 1, 1, 1, 1, 1, 2, 2]>, | |
1274 | ||
1275 | // | |
1276 | // Double-register Integer Unary | |
1277 | InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1278 | InstrStage<1, [A9_MUX0], 0>, | |
1279 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1280 | // Extra latency cycles since wbck is 6 cycles | |
1281 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1282 | InstrStage<1, [A9_NPipe]>], | |
1283 | [4, 2]>, | |
1284 | // | |
1285 | // Quad-register Integer Unary | |
1286 | InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1287 | InstrStage<1, [A9_MUX0], 0>, | |
1288 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1289 | // Extra latency cycles since wbck is 6 cycles | |
1290 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1291 | InstrStage<1, [A9_NPipe]>], | |
1292 | [4, 2]>, | |
1293 | // | |
1294 | // Double-register Integer Q-Unary | |
1295 | InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1296 | InstrStage<1, [A9_MUX0], 0>, | |
1297 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1298 | // Extra latency cycles since wbck is 6 cycles | |
1299 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1300 | InstrStage<1, [A9_NPipe]>], | |
1301 | [4, 1]>, | |
1302 | // | |
1303 | // Quad-register Integer CountQ-Unary | |
1304 | InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1305 | InstrStage<1, [A9_MUX0], 0>, | |
1306 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1307 | // Extra latency cycles since wbck is 6 cycles | |
1308 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1309 | InstrStage<1, [A9_NPipe]>], | |
1310 | [4, 1]>, | |
1311 | // | |
1312 | // Double-register Integer Binary | |
1313 | InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1314 | InstrStage<1, [A9_MUX0], 0>, | |
1315 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1316 | // Extra latency cycles since wbck is 6 cycles | |
1317 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1318 | InstrStage<1, [A9_NPipe]>], | |
1319 | [3, 2, 2]>, | |
1320 | // | |
1321 | // Quad-register Integer Binary | |
1322 | InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1323 | InstrStage<1, [A9_MUX0], 0>, | |
1324 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1325 | // Extra latency cycles since wbck is 6 cycles | |
1326 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1327 | InstrStage<1, [A9_NPipe]>], | |
1328 | [3, 2, 2]>, | |
1329 | // | |
1330 | // Double-register Integer Subtract | |
1331 | InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1332 | InstrStage<1, [A9_MUX0], 0>, | |
1333 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1334 | // Extra latency cycles since wbck is 6 cycles | |
1335 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1336 | InstrStage<1, [A9_NPipe]>], | |
1337 | [3, 2, 1]>, | |
1338 | // | |
1339 | // Quad-register Integer Subtract | |
1340 | InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1341 | InstrStage<1, [A9_MUX0], 0>, | |
1342 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1343 | // Extra latency cycles since wbck is 6 cycles | |
1344 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1345 | InstrStage<1, [A9_NPipe]>], | |
1346 | [3, 2, 1]>, | |
1347 | // | |
1348 | // Double-register Integer Shift | |
1349 | InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1350 | InstrStage<1, [A9_MUX0], 0>, | |
1351 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1352 | // Extra latency cycles since wbck is 6 cycles | |
1353 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1354 | InstrStage<1, [A9_NPipe]>], | |
1355 | [3, 1, 1]>, | |
1356 | // | |
1357 | // Quad-register Integer Shift | |
1358 | InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1359 | InstrStage<1, [A9_MUX0], 0>, | |
1360 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1361 | // Extra latency cycles since wbck is 6 cycles | |
1362 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1363 | InstrStage<1, [A9_NPipe]>], | |
1364 | [3, 1, 1]>, | |
1365 | // | |
1366 | // Double-register Integer Shift (4 cycle) | |
1367 | InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1368 | InstrStage<1, [A9_MUX0], 0>, | |
1369 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1370 | // Extra latency cycles since wbck is 6 cycles | |
1371 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1372 | InstrStage<1, [A9_NPipe]>], | |
1373 | [4, 1, 1]>, | |
1374 | // | |
1375 | // Quad-register Integer Shift (4 cycle) | |
1376 | InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1377 | InstrStage<1, [A9_MUX0], 0>, | |
1378 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1379 | // Extra latency cycles since wbck is 6 cycles | |
1380 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1381 | InstrStage<1, [A9_NPipe]>], | |
1382 | [4, 1, 1]>, | |
1383 | // | |
1384 | // Double-register Integer Binary (4 cycle) | |
1385 | InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1386 | InstrStage<1, [A9_MUX0], 0>, | |
1387 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1388 | // Extra latency cycles since wbck is 6 cycles | |
1389 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1390 | InstrStage<1, [A9_NPipe]>], | |
1391 | [4, 2, 2]>, | |
1392 | // | |
1393 | // Quad-register Integer Binary (4 cycle) | |
1394 | InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1395 | InstrStage<1, [A9_MUX0], 0>, | |
1396 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1397 | // Extra latency cycles since wbck is 6 cycles | |
1398 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1399 | InstrStage<1, [A9_NPipe]>], | |
1400 | [4, 2, 2]>, | |
1401 | // | |
1402 | // Double-register Integer Subtract (4 cycle) | |
1403 | InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1404 | InstrStage<1, [A9_MUX0], 0>, | |
1405 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1406 | // Extra latency cycles since wbck is 6 cycles | |
1407 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1408 | InstrStage<1, [A9_NPipe]>], | |
1409 | [4, 2, 1]>, | |
1410 | // | |
1411 | // Quad-register Integer Subtract (4 cycle) | |
1412 | InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1413 | InstrStage<1, [A9_MUX0], 0>, | |
1414 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1415 | // Extra latency cycles since wbck is 6 cycles | |
1416 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1417 | InstrStage<1, [A9_NPipe]>], | |
1418 | [4, 2, 1]>, | |
1419 | ||
1420 | // | |
1421 | // Double-register Integer Count | |
1422 | InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1423 | InstrStage<1, [A9_MUX0], 0>, | |
1424 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1425 | // Extra latency cycles since wbck is 6 cycles | |
1426 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1427 | InstrStage<1, [A9_NPipe]>], | |
1428 | [3, 2, 2]>, | |
1429 | // | |
1430 | // Quad-register Integer Count | |
1431 | // Result written in N3, but that is relative to the last cycle of multicycle, | |
1432 | // so we use 4 for those cases | |
1433 | InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1434 | InstrStage<1, [A9_MUX0], 0>, | |
1435 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1436 | // Extra latency cycles since wbck is 7 cycles | |
1437 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1438 | InstrStage<2, [A9_NPipe]>], | |
1439 | [4, 2, 2]>, | |
1440 | // | |
1441 | // Double-register Absolute Difference and Accumulate | |
1442 | InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1443 | InstrStage<1, [A9_MUX0], 0>, | |
1444 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1445 | // Extra latency cycles since wbck is 6 cycles | |
1446 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1447 | InstrStage<1, [A9_NPipe]>], | |
1448 | [6, 3, 2, 1]>, | |
1449 | // | |
1450 | // Quad-register Absolute Difference and Accumulate | |
1451 | InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1452 | InstrStage<1, [A9_MUX0], 0>, | |
1453 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1454 | // Extra latency cycles since wbck is 6 cycles | |
1455 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1456 | InstrStage<2, [A9_NPipe]>], | |
1457 | [6, 3, 2, 1]>, | |
1458 | // | |
1459 | // Double-register Integer Pair Add Long | |
1460 | InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1461 | InstrStage<1, [A9_MUX0], 0>, | |
1462 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1463 | // Extra latency cycles since wbck is 6 cycles | |
1464 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1465 | InstrStage<1, [A9_NPipe]>], | |
1466 | [6, 3, 1]>, | |
1467 | // | |
1468 | // Quad-register Integer Pair Add Long | |
1469 | InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1470 | InstrStage<1, [A9_MUX0], 0>, | |
1471 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1472 | // Extra latency cycles since wbck is 6 cycles | |
1473 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1474 | InstrStage<2, [A9_NPipe]>], | |
1475 | [6, 3, 1]>, | |
1476 | ||
1477 | // | |
1478 | // Double-register Integer Multiply (.8, .16) | |
1479 | InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1480 | InstrStage<1, [A9_MUX0], 0>, | |
1481 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1482 | // Extra latency cycles since wbck is 6 cycles | |
1483 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1484 | InstrStage<1, [A9_NPipe]>], | |
1485 | [6, 2, 2]>, | |
1486 | // | |
1487 | // Quad-register Integer Multiply (.8, .16) | |
1488 | InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1489 | InstrStage<1, [A9_MUX0], 0>, | |
1490 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1491 | // Extra latency cycles since wbck is 7 cycles | |
1492 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1493 | InstrStage<2, [A9_NPipe]>], | |
1494 | [7, 2, 2]>, | |
1495 | ||
1496 | // | |
1497 | // Double-register Integer Multiply (.32) | |
1498 | InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1499 | InstrStage<1, [A9_MUX0], 0>, | |
1500 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1501 | // Extra latency cycles since wbck is 7 cycles | |
1502 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1503 | InstrStage<2, [A9_NPipe]>], | |
1504 | [7, 2, 1]>, | |
1505 | // | |
1506 | // Quad-register Integer Multiply (.32) | |
1507 | InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1508 | InstrStage<1, [A9_MUX0], 0>, | |
1509 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1510 | // Extra latency cycles since wbck is 9 cycles | |
1511 | InstrStage<10, [A9_DRegsVFP], 0, Reserved>, | |
1512 | InstrStage<4, [A9_NPipe]>], | |
1513 | [9, 2, 1]>, | |
1514 | // | |
1515 | // Double-register Integer Multiply-Accumulate (.8, .16) | |
1516 | InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1517 | InstrStage<1, [A9_MUX0], 0>, | |
1518 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1519 | // Extra latency cycles since wbck is 6 cycles | |
1520 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1521 | InstrStage<1, [A9_NPipe]>], | |
1522 | [6, 3, 2, 2]>, | |
1523 | // | |
1524 | // Double-register Integer Multiply-Accumulate (.32) | |
1525 | InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1526 | InstrStage<1, [A9_MUX0], 0>, | |
1527 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1528 | // Extra latency cycles since wbck is 7 cycles | |
1529 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1530 | InstrStage<2, [A9_NPipe]>], | |
1531 | [7, 3, 2, 1]>, | |
1532 | // | |
1533 | // Quad-register Integer Multiply-Accumulate (.8, .16) | |
1534 | InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1535 | InstrStage<1, [A9_MUX0], 0>, | |
1536 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1537 | // Extra latency cycles since wbck is 7 cycles | |
1538 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1539 | InstrStage<2, [A9_NPipe]>], | |
1540 | [7, 3, 2, 2]>, | |
1541 | // | |
1542 | // Quad-register Integer Multiply-Accumulate (.32) | |
1543 | InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1544 | InstrStage<1, [A9_MUX0], 0>, | |
1545 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1546 | // Extra latency cycles since wbck is 9 cycles | |
1547 | InstrStage<10, [A9_DRegsVFP], 0, Reserved>, | |
1548 | InstrStage<4, [A9_NPipe]>], | |
1549 | [9, 3, 2, 1]>, | |
1550 | ||
1551 | // | |
1552 | // Move | |
1553 | InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1554 | InstrStage<1, [A9_MUX0], 0>, | |
1555 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1556 | InstrStage<1, [A9_DRegsVFP], 0, Reserved>, | |
1557 | InstrStage<1, [A9_NPipe]>], | |
1558 | [1,1]>, | |
1559 | // | |
1560 | // Move Immediate | |
1561 | InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1562 | InstrStage<1, [A9_MUX0], 0>, | |
1563 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1564 | // Extra latency cycles since wbck is 6 cycles | |
1565 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1566 | InstrStage<1, [A9_NPipe]>], | |
1567 | [3]>, | |
1568 | // | |
1569 | // Double-register Permute Move | |
1570 | InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1571 | InstrStage<1, [A9_MUX0], 0>, | |
1572 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1573 | // Extra latency cycles since wbck is 6 cycles | |
1574 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1575 | InstrStage<1, [A9_NPipe]>], | |
1576 | [2, 1]>, | |
1577 | // | |
1578 | // Quad-register Permute Move | |
1579 | InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1580 | InstrStage<1, [A9_MUX0], 0>, | |
1581 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1582 | // Extra latency cycles since wbck is 6 cycles | |
1583 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1584 | InstrStage<1, [A9_NPipe]>], | |
1585 | [2, 1]>, | |
1586 | // | |
1587 | // Integer to Single-precision Move | |
1588 | InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1589 | InstrStage<1, [A9_MUX0], 0>, | |
1590 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1591 | InstrStage<3, [A9_DRegsVFP], 0, Reserved>, | |
1592 | InstrStage<1, [A9_NPipe]>], | |
1593 | [1, 1]>, | |
1594 | // | |
1595 | // Integer to Double-precision Move | |
1596 | InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1597 | InstrStage<1, [A9_MUX0], 0>, | |
1598 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1599 | InstrStage<3, [A9_DRegsVFP], 0, Reserved>, | |
1600 | InstrStage<1, [A9_NPipe]>], | |
1601 | [1, 1, 1]>, | |
1602 | // | |
1603 | // Single-precision to Integer Move | |
1604 | InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1605 | InstrStage<1, [A9_MUX0], 0>, | |
1606 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1607 | InstrStage<3, [A9_DRegsVFP], 0, Reserved>, | |
1608 | InstrStage<1, [A9_NPipe]>], | |
1609 | [2, 1]>, | |
1610 | // | |
1611 | // Double-precision to Integer Move | |
1612 | InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1613 | InstrStage<1, [A9_MUX0], 0>, | |
1614 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1615 | InstrStage<3, [A9_DRegsVFP], 0, Reserved>, | |
1616 | InstrStage<1, [A9_NPipe]>], | |
1617 | [2, 2, 1]>, | |
1618 | // | |
1619 | // Integer to Lane Move | |
1620 | InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1621 | InstrStage<1, [A9_MUX0], 0>, | |
1622 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1623 | InstrStage<4, [A9_DRegsVFP], 0, Reserved>, | |
1624 | InstrStage<2, [A9_NPipe]>], | |
1625 | [3, 1, 1]>, | |
1626 | ||
1627 | // | |
1628 | // Vector narrow move | |
1629 | InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1630 | InstrStage<1, [A9_MUX0], 0>, | |
1631 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1632 | // Extra latency cycles since wbck is 6 cycles | |
1633 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1634 | InstrStage<1, [A9_NPipe]>], | |
1635 | [3, 1]>, | |
1636 | // | |
1637 | // Double-register FP Unary | |
1638 | InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1639 | InstrStage<1, [A9_MUX0], 0>, | |
1640 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1641 | // Extra latency cycles since wbck is 6 cycles | |
1642 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1643 | InstrStage<1, [A9_NPipe]>], | |
1644 | [5, 2]>, | |
1645 | // | |
1646 | // Quad-register FP Unary | |
1647 | // Result written in N5, but that is relative to the last cycle of multicycle, | |
1648 | // so we use 6 for those cases | |
1649 | InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1650 | InstrStage<1, [A9_MUX0], 0>, | |
1651 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1652 | // Extra latency cycles since wbck is 7 cycles | |
1653 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1654 | InstrStage<2, [A9_NPipe]>], | |
1655 | [6, 2]>, | |
1656 | // | |
1657 | // Double-register FP Binary | |
1658 | // FIXME: We're using this itin for many instructions and [2, 2] here is too | |
1659 | // optimistic. | |
1660 | InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1661 | InstrStage<1, [A9_MUX0], 0>, | |
1662 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1663 | // Extra latency cycles since wbck is 6 cycles | |
1664 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1665 | InstrStage<1, [A9_NPipe]>], | |
1666 | [5, 2, 2]>, | |
1667 | ||
1668 | // | |
1669 | // VPADD, etc. | |
1670 | InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1671 | InstrStage<1, [A9_MUX0], 0>, | |
1672 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1673 | // Extra latency cycles since wbck is 6 cycles | |
1674 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1675 | InstrStage<1, [A9_NPipe]>], | |
1676 | [5, 1, 1]>, | |
1677 | // | |
1678 | // Double-register FP VMUL | |
1679 | InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1680 | InstrStage<1, [A9_MUX0], 0>, | |
1681 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1682 | // Extra latency cycles since wbck is 6 cycles | |
1683 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1684 | InstrStage<1, [A9_NPipe]>], | |
1685 | [5, 2, 1]>, | |
1686 | // | |
1687 | // Quad-register FP Binary | |
1688 | // Result written in N5, but that is relative to the last cycle of multicycle, | |
1689 | // so we use 6 for those cases | |
1690 | // FIXME: We're using this itin for many instructions and [2, 2] here is too | |
1691 | // optimistic. | |
1692 | InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1693 | InstrStage<1, [A9_MUX0], 0>, | |
1694 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1695 | // Extra latency cycles since wbck is 7 cycles | |
1696 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1697 | InstrStage<2, [A9_NPipe]>], | |
1698 | [6, 2, 2]>, | |
1699 | // | |
1700 | // Quad-register FP VMUL | |
1701 | InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1702 | InstrStage<1, [A9_MUX0], 0>, | |
1703 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1704 | // Extra latency cycles since wbck is 7 cycles | |
1705 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1706 | InstrStage<1, [A9_NPipe]>], | |
1707 | [6, 2, 1]>, | |
1708 | // | |
1709 | // Double-register FP Multiple-Accumulate | |
1710 | InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1711 | InstrStage<1, [A9_MUX0], 0>, | |
1712 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1713 | // Extra latency cycles since wbck is 7 cycles | |
1714 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1715 | InstrStage<2, [A9_NPipe]>], | |
1716 | [6, 3, 2, 1]>, | |
1717 | // | |
1718 | // Quad-register FP Multiple-Accumulate | |
1719 | // Result written in N9, but that is relative to the last cycle of multicycle, | |
1720 | // so we use 10 for those cases | |
1721 | InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1722 | InstrStage<1, [A9_MUX0], 0>, | |
1723 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1724 | // Extra latency cycles since wbck is 9 cycles | |
1725 | InstrStage<10, [A9_DRegsVFP], 0, Reserved>, | |
1726 | InstrStage<4, [A9_NPipe]>], | |
1727 | [8, 4, 2, 1]>, | |
1728 | // | |
1729 | // Double-register Fused FP Multiple-Accumulate | |
1730 | InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1731 | InstrStage<1, [A9_MUX0], 0>, | |
1732 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1733 | // Extra latency cycles since wbck is 7 cycles | |
1734 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1735 | InstrStage<2, [A9_NPipe]>], | |
1736 | [6, 3, 2, 1]>, | |
1737 | // | |
1738 | // Quad-register Fused FP Multiple-Accumulate | |
1739 | // Result written in N9, but that is relative to the last cycle of multicycle, | |
1740 | // so we use 10 for those cases | |
1741 | InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1742 | InstrStage<1, [A9_MUX0], 0>, | |
1743 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1744 | // Extra latency cycles since wbck is 9 cycles | |
1745 | InstrStage<10, [A9_DRegsVFP], 0, Reserved>, | |
1746 | InstrStage<4, [A9_NPipe]>], | |
1747 | [8, 4, 2, 1]>, | |
1748 | // | |
1749 | // Double-register Reciprical Step | |
1750 | InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1751 | InstrStage<1, [A9_MUX0], 0>, | |
1752 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1753 | // Extra latency cycles since wbck is 10 cycles | |
1754 | InstrStage<11, [A9_DRegsVFP], 0, Reserved>, | |
1755 | InstrStage<1, [A9_NPipe]>], | |
1756 | [9, 2, 2]>, | |
1757 | // | |
1758 | // Quad-register Reciprical Step | |
1759 | InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1760 | InstrStage<1, [A9_MUX0], 0>, | |
1761 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1762 | // Extra latency cycles since wbck is 11 cycles | |
1763 | InstrStage<12, [A9_DRegsVFP], 0, Reserved>, | |
1764 | InstrStage<2, [A9_NPipe]>], | |
1765 | [10, 2, 2]>, | |
1766 | // | |
1767 | // Double-register Permute | |
1768 | InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1769 | InstrStage<1, [A9_MUX0], 0>, | |
1770 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1771 | // Extra latency cycles since wbck is 6 cycles | |
1772 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1773 | InstrStage<1, [A9_NPipe]>], | |
1774 | [2, 2, 1, 1]>, | |
1775 | // | |
1776 | // Quad-register Permute | |
1777 | // Result written in N2, but that is relative to the last cycle of multicycle, | |
1778 | // so we use 3 for those cases | |
1779 | InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1780 | InstrStage<1, [A9_MUX0], 0>, | |
1781 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1782 | // Extra latency cycles since wbck is 7 cycles | |
1783 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1784 | InstrStage<2, [A9_NPipe]>], | |
1785 | [3, 3, 1, 1]>, | |
1786 | // | |
1787 | // Quad-register Permute (3 cycle issue) | |
1788 | // Result written in N2, but that is relative to the last cycle of multicycle, | |
1789 | // so we use 4 for those cases | |
1790 | InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1791 | InstrStage<1, [A9_MUX0], 0>, | |
1792 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1793 | // Extra latency cycles since wbck is 8 cycles | |
1794 | InstrStage<9, [A9_DRegsVFP], 0, Reserved>, | |
1795 | InstrStage<3, [A9_NPipe]>], | |
1796 | [4, 4, 1, 1]>, | |
1797 | ||
1798 | // | |
1799 | // Double-register VEXT | |
1800 | InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1801 | InstrStage<1, [A9_MUX0], 0>, | |
1802 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1803 | // Extra latency cycles since wbck is 6 cycles | |
1804 | InstrStage<7, [A9_DRegsVFP], 0, Reserved>, | |
1805 | InstrStage<1, [A9_NPipe]>], | |
1806 | [2, 1, 1]>, | |
1807 | // | |
1808 | // Quad-register VEXT | |
1809 | InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1810 | InstrStage<1, [A9_MUX0], 0>, | |
1811 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1812 | // Extra latency cycles since wbck is 7 cycles | |
1813 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1814 | InstrStage<2, [A9_NPipe]>], | |
1815 | [3, 1, 2]>, | |
1816 | // | |
1817 | // VTB | |
1818 | InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1819 | InstrStage<1, [A9_MUX0], 0>, | |
1820 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1821 | // Extra latency cycles since wbck is 7 cycles | |
1822 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1823 | InstrStage<2, [A9_NPipe]>], | |
1824 | [3, 2, 1]>, | |
1825 | InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1826 | InstrStage<1, [A9_MUX0], 0>, | |
1827 | InstrStage<2, [A9_DRegsN], 0, Required>, | |
1828 | // Extra latency cycles since wbck is 7 cycles | |
1829 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1830 | InstrStage<2, [A9_NPipe]>], | |
1831 | [3, 2, 2, 1]>, | |
1832 | InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1833 | InstrStage<1, [A9_MUX0], 0>, | |
1834 | InstrStage<2, [A9_DRegsN], 0, Required>, | |
1835 | // Extra latency cycles since wbck is 8 cycles | |
1836 | InstrStage<9, [A9_DRegsVFP], 0, Reserved>, | |
1837 | InstrStage<3, [A9_NPipe]>], | |
1838 | [4, 2, 2, 3, 1]>, | |
1839 | InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1840 | InstrStage<1, [A9_MUX0], 0>, | |
1841 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1842 | // Extra latency cycles since wbck is 8 cycles | |
1843 | InstrStage<9, [A9_DRegsVFP], 0, Reserved>, | |
1844 | InstrStage<3, [A9_NPipe]>], | |
1845 | [4, 2, 2, 3, 3, 1]>, | |
1846 | // | |
1847 | // VTBX | |
1848 | InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1849 | InstrStage<1, [A9_MUX0], 0>, | |
1850 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1851 | // Extra latency cycles since wbck is 7 cycles | |
1852 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1853 | InstrStage<2, [A9_NPipe]>], | |
1854 | [3, 1, 2, 1]>, | |
1855 | InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1856 | InstrStage<1, [A9_MUX0], 0>, | |
1857 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1858 | // Extra latency cycles since wbck is 7 cycles | |
1859 | InstrStage<8, [A9_DRegsVFP], 0, Reserved>, | |
1860 | InstrStage<2, [A9_NPipe]>], | |
1861 | [3, 1, 2, 2, 1]>, | |
1862 | InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1863 | InstrStage<1, [A9_MUX0], 0>, | |
1864 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1865 | // Extra latency cycles since wbck is 8 cycles | |
1866 | InstrStage<9, [A9_DRegsVFP], 0, Reserved>, | |
1867 | InstrStage<3, [A9_NPipe]>], | |
1868 | [4, 1, 2, 2, 3, 1]>, | |
1869 | InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, | |
1870 | InstrStage<1, [A9_MUX0], 0>, | |
1871 | InstrStage<1, [A9_DRegsN], 0, Required>, | |
1872 | // Extra latency cycles since wbck is 8 cycles | |
1873 | InstrStage<9, [A9_DRegsVFP], 0, Reserved>, | |
1874 | InstrStage<2, [A9_NPipe]>], | |
1875 | [4, 1, 2, 2, 3, 3, 1]> | |
1876 | ]>; | |
1877 | ||
1878 | // ===---------------------------------------------------------------------===// | |
1879 | // The following definitions describe the simpler per-operand machine model. | |
1880 | // This works with MachineScheduler and will eventually replace itineraries. | |
1881 | ||
1a4d82fc JJ |
1882 | class A9WriteLMOpsListType<list<WriteSequence> writes> { |
1883 | list <WriteSequence> Writes = writes; | |
1884 | SchedMachineModel SchedModel = ?; | |
1885 | } | |
223e47cc LB |
1886 | |
1887 | // Cortex-A9 machine model for scheduling and other instruction cost heuristics. | |
1888 | def CortexA9Model : SchedMachineModel { | |
1889 | let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. | |
1a4d82fc | 1890 | let MicroOpBufferSize = 56; // Based on available renamed registers. |
223e47cc LB |
1891 | let LoadLatency = 2; // Optimistic load latency assuming bypass. |
1892 | // This is overriden by OperandCycles if the | |
1893 | // Itineraries are queried instead. | |
1894 | let MispredictPenalty = 8; // Based on estimate of pipeline depth. | |
1895 | ||
1896 | let Itineraries = CortexA9Itineraries; | |
1a4d82fc JJ |
1897 | |
1898 | // FIXME: Many vector operations were never given an itinerary. We | |
1899 | // haven't mapped these to the new model either. | |
1900 | let CompleteModel = 0; | |
223e47cc LB |
1901 | } |
1902 | ||
1903 | //===----------------------------------------------------------------------===// | |
1904 | // Define each kind of processor resource and number available. | |
1a4d82fc JJ |
1905 | // |
1906 | // The AGU unit has BufferSize=1 so that the latency between operations | |
1907 | // that use it are considered to stall other operations. | |
1908 | // | |
1909 | // The FP unit has BufferSize=0 so that it is a hard dispatch | |
1910 | // hazard. No instruction may be dispatched while the unit is reserved. | |
1911 | ||
1912 | let SchedModel = CortexA9Model in { | |
223e47cc LB |
1913 | |
1914 | def A9UnitALU : ProcResource<2>; | |
1915 | def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } | |
1a4d82fc | 1916 | def A9UnitAGU : ProcResource<1> { let BufferSize = 1; } |
223e47cc | 1917 | def A9UnitLS : ProcResource<1>; |
1a4d82fc | 1918 | def A9UnitFP : ProcResource<1> { let BufferSize = 0; } |
223e47cc LB |
1919 | def A9UnitB : ProcResource<1>; |
1920 | ||
1921 | //===----------------------------------------------------------------------===// | |
1922 | // Define scheduler read/write types with their resources and latency on A9. | |
1923 | ||
1924 | // Consume an issue slot, but no processor resources. This is useful when all | |
1925 | // other writes associated with the operand have NumMicroOps = 0. | |
1926 | def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; } | |
1927 | ||
1928 | // Write an integer register. | |
1929 | def A9WriteI : SchedWriteRes<[A9UnitALU]>; | |
1930 | // Write an integer shifted-by register | |
1931 | def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } | |
1932 | ||
1933 | // Basic ALU. | |
1a4d82fc | 1934 | def A9WriteALU : SchedWriteRes<[A9UnitALU]>; |
223e47cc | 1935 | // ALU with operand shifted by immediate. |
1a4d82fc | 1936 | def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; } |
223e47cc | 1937 | // ALU with operand shifted by register. |
1a4d82fc | 1938 | def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } |
223e47cc LB |
1939 | |
1940 | // Multiplication | |
1941 | def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; } | |
1942 | def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5; | |
1943 | let NumMicroOps = 0; } | |
1944 | def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } | |
1945 | def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; | |
1946 | let NumMicroOps = 0; } | |
1947 | ||
1948 | // Floating-point | |
1949 | // Only one FP or AGU instruction may issue per cycle. We model this | |
1950 | // by having FP instructions consume the AGU resource. | |
1951 | def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } | |
1952 | def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } | |
1953 | def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } | |
1954 | def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } | |
1955 | def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } | |
1956 | def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } | |
1957 | def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } | |
1958 | def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } | |
1959 | def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; } | |
1960 | def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; } | |
1961 | ||
1962 | // NEON has an odd mix of latencies. Simply name the write types by latency. | |
1963 | def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } | |
1964 | def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; } | |
1965 | def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; } | |
1966 | def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } | |
1967 | def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } | |
1968 | def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } | |
1969 | def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } | |
1970 | def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } | |
1971 | def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } | |
1972 | ||
1973 | // Reserve A9UnitFP for 2 consecutive cycles. | |
1974 | def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { | |
1975 | let Latency = 4; | |
1976 | let ResourceCycles = [2]; | |
1977 | } | |
1978 | def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { | |
1979 | let Latency = 7; | |
1980 | let ResourceCycles = [2]; | |
1981 | } | |
1982 | def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { | |
1983 | let Latency = 9; | |
1984 | let ResourceCycles = [2]; | |
1985 | } | |
1986 | ||
1987 | // Branches don't have a def operand but still consume resources. | |
1988 | def A9WriteB : SchedWriteRes<[A9UnitB]>; | |
1989 | ||
1990 | // Address generation. | |
1991 | def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; } | |
1992 | ||
1993 | // Load Integer. | |
1994 | def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; } | |
1995 | // Load the upper 32-bits using the same micro-op. | |
1996 | def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3; | |
1997 | let NumMicroOps = 0; } | |
1998 | // Offset shifted by register. | |
1999 | def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } | |
2000 | // Load (and zero extend) a byte. | |
2001 | def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } | |
2002 | def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; } | |
2003 | ||
2004 | // Load or Store Float, aligned. | |
2005 | def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; } | |
2006 | ||
2007 | // Store Integer. | |
2008 | def A9WriteS : SchedWriteRes<[A9UnitLS]>; | |
2009 | ||
2010 | //===----------------------------------------------------------------------===// | |
2011 | // Define resources dynamically for load multiple variants. | |
2012 | ||
2013 | // Define helpers for extra latency without consuming resources. | |
2014 | def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; } | |
2015 | foreach NumCycles = 2-8 in { | |
2016 | def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>; | |
2017 | } // foreach NumCycles | |
2018 | ||
223e47cc LB |
2019 | // Define address generation sequences and predicates for 8 flavors of LDMs. |
2020 | foreach NumAddr = 1-8 in { | |
2021 | ||
2022 | // Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive | |
2023 | // latency for instructions that generate multiple loads or stores. | |
2024 | def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; | |
2025 | ||
2026 | // Define a predicate to select the LDM based on number of memory addresses. | |
2027 | def A9LMAdr#NumAddr#Pred : | |
1a4d82fc | 2028 | SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>; |
223e47cc LB |
2029 | |
2030 | } // foreach NumAddr | |
2031 | ||
2032 | // Fall-back for unknown LDMs. | |
2033 | def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">; | |
2034 | ||
2035 | // LDM/VLDM/VLDn address generation latency & resources. | |
2036 | // Dynamically select the A9WriteAdrN sequence using a predicate. | |
2037 | def A9WriteLMAdr : SchedWriteVariant<[ | |
2038 | SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>, | |
2039 | SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>, | |
2040 | SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>, | |
2041 | SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>, | |
2042 | SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>, | |
2043 | SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>, | |
2044 | SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>, | |
2045 | SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>, | |
2046 | // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers. | |
2047 | SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>; | |
2048 | ||
2049 | // Define LDM Resources. | |
2050 | // These take no issue resource, so they can be combined with other | |
2051 | // writes like WriteB. | |
2052 | // A9WriteLMLo takes a single LS resource and 2 cycles. | |
2053 | def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2; | |
2054 | let NumMicroOps = 0; } | |
2055 | // Assuming aligned access, the upper half of each pair is free with | |
2056 | // the same latency. | |
2057 | def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2; | |
2058 | let NumMicroOps = 0; } | |
2059 | // Each A9WriteL#N variant adds N cycles of latency without consuming | |
2060 | // additional resources. | |
2061 | foreach NumAddr = 1-8 in { | |
2062 | def A9WriteL#NumAddr : WriteSequence< | |
2063 | [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; | |
2064 | def A9WriteL#NumAddr#Hi : WriteSequence< | |
2065 | [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; | |
2066 | } | |
2067 | ||
2068 | //===----------------------------------------------------------------------===// | |
2069 | // LDM: Load multiple into 32-bit integer registers. | |
2070 | ||
1a4d82fc JJ |
2071 | def A9WriteLMOpsList : A9WriteLMOpsListType< |
2072 | [A9WriteL1, A9WriteL1Hi, | |
2073 | A9WriteL2, A9WriteL2Hi, | |
2074 | A9WriteL3, A9WriteL3Hi, | |
2075 | A9WriteL4, A9WriteL4Hi, | |
2076 | A9WriteL5, A9WriteL5Hi, | |
2077 | A9WriteL6, A9WriteL6Hi, | |
2078 | A9WriteL7, A9WriteL7Hi, | |
2079 | A9WriteL8, A9WriteL8Hi]>; | |
2080 | ||
223e47cc LB |
2081 | // A9WriteLM variants expand into a pair of writes for each 64-bit |
2082 | // value loaded. When the number of registers is odd, the last | |
2083 | // A9WriteLnHi is naturally ignored because the instruction has no | |
2084 | // following def operands. These variants take no issue resource, so | |
2085 | // they may need to be part of a WriteSequence that includes A9WriteIssue. | |
2086 | def A9WriteLM : SchedWriteVariant<[ | |
1a4d82fc JJ |
2087 | SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>, |
2088 | SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>, | |
2089 | SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>, | |
2090 | SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>, | |
2091 | SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>, | |
2092 | SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>, | |
2093 | SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>, | |
2094 | SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>, | |
223e47cc LB |
2095 | // For unknown LDMs, define the maximum number of writes, but only |
2096 | // make the first two consume resources. | |
2097 | SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi, | |
2098 | A9WriteL2, A9WriteL2Hi, | |
2099 | A9WriteL3Hi, A9WriteL3Hi, | |
2100 | A9WriteL4Hi, A9WriteL4Hi, | |
2101 | A9WriteL5Hi, A9WriteL5Hi, | |
2102 | A9WriteL6Hi, A9WriteL6Hi, | |
2103 | A9WriteL7Hi, A9WriteL7Hi, | |
2104 | A9WriteL8Hi, A9WriteL8Hi]>]> { | |
2105 | let Variadic = 1; | |
2106 | } | |
2107 | ||
2108 | //===----------------------------------------------------------------------===// | |
2109 | // VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support. | |
2110 | ||
2111 | // A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources | |
2112 | // so can be used in WriteSequences for in single-issue instructions that | |
2113 | // encapsulate multiple loads. | |
2114 | def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { | |
2115 | let Latency = 1; | |
2116 | let NumMicroOps = 0; | |
2117 | } | |
2118 | ||
2119 | foreach NumAddr = 1-8 in { | |
2120 | ||
2121 | // Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops. | |
2122 | def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>; | |
2123 | ||
2124 | // A9WriteLfp1-8 definitions are statically expanded into a sequence of | |
2125 | // A9WriteLfpOps with additive latency that takes a single issue slot. | |
2126 | // Used directly to describe NEON VLDn. | |
2127 | def A9WriteLfp#NumAddr : WriteSequence< | |
2128 | [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; | |
2129 | ||
2130 | // A9WriteLfp1-8Mov adds a cycle of latency and FP resource for | |
2131 | // permuting loaded values. | |
2132 | def A9WriteLfp#NumAddr#Mov : WriteSequence< | |
2133 | [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; | |
2134 | ||
2135 | } // foreach NumAddr | |
2136 | ||
2137 | // Define VLDM/VSTM PreRA resources. | |
2138 | // A9WriteLMfpPreRA are dynamically expanded into the correct | |
2139 | // A9WriteLfp1-8 sequence based on a predicate. This supports the | |
2140 | // preRA VLDM variants in which all 64-bit loads are written to the | |
2141 | // same tuple of either single or double precision registers. | |
2142 | def A9WriteLMfpPreRA : SchedWriteVariant<[ | |
2143 | SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>, | |
2144 | SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>, | |
2145 | SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>, | |
2146 | SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>, | |
2147 | SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>, | |
2148 | SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>, | |
2149 | SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>, | |
2150 | SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>, | |
2151 | // For unknown VLDM/VSTM PreRA, assume 2xS registers. | |
2152 | SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>; | |
2153 | ||
2154 | // Define VLDM/VSTM PostRA Resources. | |
2155 | // A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency. | |
2156 | def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; } | |
2157 | ||
2158 | foreach NumAddr = 1-8 in { | |
2159 | ||
2160 | // Each A9WriteL#N variant adds N cycles of latency without consuming | |
2161 | // additional resources. | |
2162 | def A9WriteLMfp#NumAddr : WriteSequence< | |
2163 | [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; | |
2164 | ||
2165 | // Assuming aligned access, the upper half of each pair is free with | |
2166 | // the same latency. | |
2167 | def A9WriteLMfp#NumAddr#Hi : WriteSequence< | |
2168 | [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; | |
2169 | ||
2170 | } // foreach NumAddr | |
2171 | ||
2172 | // VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a | |
2173 | // pair of writes for each 64-bit data loaded. When the number of | |
2174 | // registers is odd, the last WriteLMfpnHi is naturally ignored because | |
2175 | // the instruction has no following def operands. | |
1a4d82fc JJ |
2176 | |
2177 | def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType< | |
2178 | [A9WriteLMfp1, A9WriteLMfp2, // 0-1 | |
2179 | A9WriteLMfp3, A9WriteLMfp4, // 2-3 | |
2180 | A9WriteLMfp5, A9WriteLMfp6, // 4-5 | |
2181 | A9WriteLMfp7, A9WriteLMfp8, // 6-7 | |
2182 | A9WriteLMfp1Hi, // 8-8 | |
2183 | A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10 | |
2184 | A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12 | |
2185 | A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14 | |
2186 | A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16 | |
2187 | A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18 | |
2188 | A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20 | |
2189 | A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22 | |
2190 | ||
223e47cc | 2191 | def A9WriteLMfpPostRA : SchedWriteVariant<[ |
1a4d82fc JJ |
2192 | SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>, |
2193 | SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>, | |
2194 | SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>, | |
2195 | SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>, | |
2196 | SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>, | |
2197 | SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>, | |
2198 | SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>, | |
2199 | SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>, | |
223e47cc | 2200 | // For unknown LDMs, define the maximum number of writes, but only |
1a4d82fc JJ |
2201 | // make the first two consume resources. We are optimizing for the case |
2202 | // where the operands are DPRs, and this determines the first eight | |
2203 | // types. The remaining eight types are filled to cover the case | |
2204 | // where the operands are SPRs. | |
2205 | SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2, | |
2206 | A9WriteLMfp3Hi, A9WriteLMfp4Hi, | |
2207 | A9WriteLMfp5Hi, A9WriteLMfp6Hi, | |
2208 | A9WriteLMfp7Hi, A9WriteLMfp8Hi, | |
223e47cc LB |
2209 | A9WriteLMfp5Hi, A9WriteLMfp5Hi, |
2210 | A9WriteLMfp6Hi, A9WriteLMfp6Hi, | |
2211 | A9WriteLMfp7Hi, A9WriteLMfp7Hi, | |
2212 | A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> { | |
2213 | let Variadic = 1; | |
2214 | } | |
2215 | ||
2216 | // Distinguish between our multiple MI-level forms of the same | |
2217 | // VLDM/VSTM instructions. | |
2218 | def A9PreRA : SchedPredicate< | |
2219 | "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">; | |
2220 | def A9PostRA : SchedPredicate< | |
2221 | "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">; | |
2222 | ||
2223 | // VLDM represents all destination registers as a single register | |
2224 | // tuple, unlike LDM. So the number of write operands is not variadic. | |
2225 | def A9WriteLMfp : SchedWriteVariant<[ | |
2226 | SchedVar<A9PreRA, [A9WriteLMfpPreRA]>, | |
2227 | SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>; | |
2228 | ||
2229 | //===----------------------------------------------------------------------===// | |
1a4d82fc | 2230 | // Resources for other (non-LDM/VLDM) Variants. |
223e47cc LB |
2231 | |
2232 | // These mov immediate writers are unconditionally expanded with | |
2233 | // additive latency. | |
2234 | def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>; | |
1a4d82fc | 2235 | def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>; |
223e47cc LB |
2236 | def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>; |
2237 | ||
2238 | // Some ALU operations can read loaded integer values one cycle early. | |
1a4d82fc | 2239 | def A9ReadALU : SchedReadAdvance<1, |
223e47cc LB |
2240 | [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi, |
2241 | A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4, | |
2242 | A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8, | |
2243 | A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi, | |
2244 | A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>; | |
2245 | ||
2246 | // Read types for operands that are unconditionally read in cycle N | |
2247 | // after the instruction issues, decreases producer latency by N-1. | |
2248 | def A9Read2 : SchedReadAdvance<1>; | |
2249 | def A9Read3 : SchedReadAdvance<2>; | |
2250 | def A9Read4 : SchedReadAdvance<3>; | |
2251 | ||
2252 | //===----------------------------------------------------------------------===// | |
2253 | // Map itinerary classes to scheduler read/write resources per operand. | |
2254 | // | |
2255 | // For ARM, we piggyback scheduler resources on the Itinerary classes | |
2256 | // to avoid perturbing the existing instruction definitions. | |
2257 | ||
2258 | // This table follows the ARM Cortex-A9 Technical Reference Manuals, | |
2259 | // mostly in order. | |
223e47cc | 2260 | |
1a4d82fc | 2261 | def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, |
223e47cc LB |
2262 | IIC_iMVNi,IIC_iMVNsi, |
2263 | IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; | |
1a4d82fc | 2264 | def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>; |
223e47cc LB |
2265 | def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; |
2266 | ||
2267 | def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; | |
2268 | def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; | |
2269 | def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; | |
2270 | ||
1a4d82fc JJ |
2271 | def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; |
2272 | def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; | |
2273 | def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>; | |
2274 | def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; | |
2275 | def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>; | |
2276 | def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB | |
2277 | def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; | |
2278 | def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>; | |
223e47cc LB |
2279 | |
2280 | // A9WriteHi ignored for MUL32. | |
2281 | def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, | |
2282 | IIC_iMUL64,IIC_iMAC64]>; | |
2283 | // FIXME: SMLALxx needs itin classes | |
2284 | def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>; | |
2285 | ||
2286 | // TODO: For floating-point ops, we model the pipeline forwarding | |
2287 | // latencies here. WAW latencies are sometimes longer. | |
2288 | ||
2289 | def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI, | |
2290 | IIC_fpUNA32, IIC_fpUNA64, | |
2291 | IIC_fpCMP32, IIC_fpCMP64]>; | |
2292 | def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>; | |
2293 | def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS, | |
2294 | IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI, | |
2295 | IIC_fpALU32, IIC_fpALU64]>; | |
2296 | def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>; | |
2297 | def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>; | |
2298 | def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>; | |
2299 | def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>; | |
2300 | def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>; | |
2301 | def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>; | |
2302 | def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>; | |
2303 | def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>; | |
2304 | ||
2305 | def :ItinRW<[A9WriteB], [IIC_Br]>; | |
2306 | ||
2307 | // A9 PLD is processed in a dedicated unit. | |
2308 | def :ItinRW<[], [IIC_Preload]>; | |
2309 | ||
2310 | // Note: We must assume that loads are aligned, since the machine | |
2311 | // model cannot know this statically and A9 ignores alignment hints. | |
2312 | ||
2313 | // A9WriteAdr consumes AGU regardless address writeback. But it's | |
2314 | // latency is only relevant for users of an updated address. | |
2315 | def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r, | |
2316 | IIC_iLoad_iu,IIC_iLoad_ru]>; | |
2317 | def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>; | |
2318 | def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r, | |
2319 | IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>; | |
2320 | def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>; | |
2321 | def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r, | |
2322 | IIC_iLoad_d_ru]>; | |
2323 | // Store either has no def operands, or the one def for address writeback. | |
2324 | def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r, | |
2325 | IIC_iStore_iu, IIC_iStore_ru, | |
2326 | IIC_iStore_d_i, IIC_iStore_d_r, | |
2327 | IIC_iStore_d_ru]>; | |
2328 | def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu, | |
2329 | IIC_iStore_bh_i, IIC_iStore_bh_r, | |
2330 | IIC_iStore_bh_iu, IIC_iStore_bh_ru]>; | |
2331 | def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>; | |
2332 | ||
2333 | // A9WriteML will be expanded into a separate write for each def | |
2334 | // operand. Address generation consumes resources, but A9WriteLMAdr | |
2335 | // is listed after all def operands, so has no effective latency. | |
2336 | // | |
2337 | // Note: A9WriteLM expands into an even number of def operands. The | |
2338 | // actual number of def operands may be less by one. | |
2339 | def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>; | |
2340 | ||
2341 | // Load multiple with address writeback has an extra def operand in | |
2342 | // front of the loaded registers. | |
2343 | // | |
2344 | // Reuse the load-multiple variants for store-multiple because the | |
2345 | // resources are identical, For stores only the address writeback | |
2346 | // has a def operand so the WriteL latencies are unused. | |
2347 | def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu, | |
2348 | IIC_iStore_m, | |
2349 | IIC_iStore_mu]>; | |
2350 | def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>; | |
1a4d82fc | 2351 | def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>; |
223e47cc LB |
2352 | |
2353 | def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>; | |
2354 | ||
2355 | def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>; | |
2356 | def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>; | |
2357 | def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64, | |
2358 | IIC_fpStore_m, IIC_fpStore_mu]>; | |
2359 | ||
2360 | // Note: Unlike VLDM, VLD1 expects the writeback operand after the | |
2361 | // normal writes. | |
2362 | def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u, | |
2363 | IIC_VLD1x2, IIC_VLD1x2u]>; | |
2364 | def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u, | |
2365 | IIC_VLD1x4, IIC_VLD1x4u, | |
2366 | IIC_VLD4dup, IIC_VLD4dupu]>; | |
2367 | def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu, | |
2368 | IIC_VLD2, IIC_VLD2u, | |
2369 | IIC_VLD2dup, IIC_VLD2dupu]>; | |
2370 | def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu, | |
2371 | IIC_VLD2x2, IIC_VLD2x2u, | |
2372 | IIC_VLD2ln, IIC_VLD2lnu]>; | |
2373 | def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u, | |
2374 | IIC_VLD3dup, IIC_VLD3dupu]>; | |
2375 | def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u, | |
2376 | IIC_VLD4ln, IIC_VLD4lnu]>; | |
2377 | def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>; | |
2378 | ||
2379 | // Vector stores use similar resources to vector loads, so use the | |
2380 | // same write types. The address write must be first for stores with | |
2381 | // address writeback. | |
2382 | def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u, | |
2383 | IIC_VST1x2, IIC_VST1x2u, | |
2384 | IIC_VST1ln, IIC_VST1lnu, | |
2385 | IIC_VST2, IIC_VST2u, | |
2386 | IIC_VST2x2, IIC_VST2x2u, | |
2387 | IIC_VST2ln, IIC_VST2lnu]>; | |
2388 | def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u, | |
2389 | IIC_VST1x4, IIC_VST1x4u, | |
2390 | IIC_VST3, IIC_VST3u, | |
2391 | IIC_VST3ln, IIC_VST3lnu, | |
2392 | IIC_VST4, IIC_VST4u, | |
2393 | IIC_VST4ln, IIC_VST4lnu]>; | |
2394 | ||
2395 | // NEON moves. | |
2396 | def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>; | |
2397 | def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>; | |
2398 | def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>; | |
2399 | ||
2400 | // NEON integer arithmetic | |
2401 | // | |
2402 | // VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL | |
2403 | def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>; | |
2404 | // VSUB/VMVN/VCLSD/VCLZD/VCNTD | |
2405 | def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>; | |
2406 | // VADDL/VSUBL/VNEG are mapped later under IIC_SHLi. | |
2407 | // ... | |
2408 | // VHADD/VRHADD/VQADD/VTST/VADH/VRADH | |
2409 | def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>; | |
1a4d82fc | 2410 | |
223e47cc LB |
2411 | // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL |
2412 | def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>; | |
2413 | // VQNEG/VQABS | |
2414 | def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>; | |
2415 | // VABS | |
2416 | def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>; | |
2417 | // VPADD/VPADDL are mapped later under IIC_SHLi. | |
2418 | // ... | |
2419 | // VCLSQ/VCLZQ/VCNTQ, takes two cycles. | |
2420 | def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>; | |
2421 | // VMOVimm/VMVNimm/VORRimm/VBICimm | |
2422 | def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>; | |
2423 | def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>; | |
2424 | def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>; | |
2425 | ||
2426 | // NEON integer multiply | |
2427 | // | |
2428 | // Note: these don't quite match the timing docs, but they do match | |
2429 | // the original A9 itinerary. | |
2430 | def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>; | |
2431 | def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>; | |
2432 | def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>; | |
2433 | def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>; | |
2434 | def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>; | |
2435 | def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>; | |
2436 | def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>; | |
2437 | def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>; | |
2438 | ||
2439 | // NEON integer shift | |
2440 | // TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles. | |
2441 | def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>; | |
2442 | def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>; | |
2443 | ||
2444 | // NEON permute | |
1a4d82fc | 2445 | def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; |
223e47cc LB |
2446 | def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2], |
2447 | [IIC_VPERMQ3, IIC_VEXTQ]>; | |
2448 | def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>; | |
2449 | def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>; | |
2450 | def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>; | |
2451 | def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>; | |
2452 | def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>; | |
2453 | def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>; | |
2454 | def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>; | |
2455 | def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3], | |
2456 | [IIC_VTBX4]>; | |
2457 | ||
2458 | // NEON floating-point | |
2459 | def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>; | |
2460 | def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>; | |
2461 | def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>; | |
2462 | def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>; | |
2463 | def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>; | |
2464 | def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>; | |
2465 | def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; | |
2466 | def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; | |
1a4d82fc JJ |
2467 | |
2468 | // Map SchedRWs that are identical for cortexa9 to existing resources. | |
2469 | def : SchedAlias<WriteALU, A9WriteALU>; | |
2470 | def : SchedAlias<WriteALUsr, A9WriteALUsr>; | |
2471 | def : SchedAlias<WriteALUSsr, A9WriteALUsr>; | |
2472 | def : SchedAlias<ReadALU, A9ReadALU>; | |
2473 | def : SchedAlias<ReadALUsr, A9ReadALU>; | |
2474 | def : InstRW< [WriteALU], | |
2475 | (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", | |
2476 | "BICrr")>; | |
2477 | def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>; | |
2478 | def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>; | |
2479 | ||
2480 | ||
2481 | def : SchedAlias<WriteCMP, A9WriteALU>; | |
2482 | def : SchedAlias<WriteCMPsi, A9WriteALU>; | |
2483 | def : SchedAlias<WriteCMPsr, A9WriteALU>; | |
2484 | ||
2485 | def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi", | |
2486 | "MOVCCsr")>; | |
2487 | def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>; | |
2488 | def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm", | |
2489 | "MOV_ga_dyn")>; | |
2490 | def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>; | |
2491 | def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; | |
2492 | ||
2493 | def : InstRW< [WriteALU], (instregex "SEL")>; | |
2494 | ||
2495 | def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>; | |
2496 | ||
2497 | def : InstRW< [A9WriteM], | |
2498 | (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS", | |
2499 | "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>; | |
2500 | def : InstRW< [A9WriteM, A9WriteMHi], | |
2501 | (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL", | |
2502 | "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB", | |
2503 | "SMLALTT")>; | |
2504 | // FIXME: These instructions used to have NoItinerary. Just copied the one from above. | |
2505 | def : InstRW< [A9WriteM, A9WriteMHi], | |
2506 | (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX", | |
2507 | "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>; | |
2508 | ||
2509 | def : InstRW<[A9WriteM16, A9WriteM16Hi], | |
2510 | (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>; | |
2511 | def : InstRW<[A9WriteM16, A9WriteM16Hi], | |
2512 | (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>; | |
2513 | ||
2514 | def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>; | |
2515 | def : InstRW<[A9WriteLsi], (instregex "LDRrs")>; | |
2516 | def : InstRW<[A9WriteLb], | |
2517 | (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB", | |
2518 | "LDRH", "LDRSH", "LDRSB")>; | |
2519 | def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; | |
2520 | ||
2521 | def : WriteRes<WriteDiv, []> { let Latency = 0; } | |
2522 | ||
2523 | def : WriteRes<WriteBr, [A9UnitB]>; | |
2524 | def : WriteRes<WriteBrL, [A9UnitB]>; | |
2525 | def : WriteRes<WriteBrTbl, [A9UnitB]>; | |
2526 | def : WriteRes<WritePreLd, []>; | |
2527 | def : SchedAlias<WriteCvtFP, A9WriteF>; | |
2528 | def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } | |
223e47cc | 2529 | } // SchedModel = CortexA9Model |