]>
Commit | Line | Data |
---|---|---|
e99716f1 BS |
1 | /* fuc microcode for nvc0 PGRAPH/GPC |
2 | * | |
3 | * Copyright 2011 Red Hat Inc. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the "Software"), | |
7 | * to deal in the Software without restriction, including without limitation | |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
9 | * and/or sell copies of the Software, and to permit persons to whom the | |
10 | * Software is furnished to do so, subject to the following conditions: | |
11 | * | |
12 | * The above copyright notice and this permission notice shall be included in | |
13 | * all copies or substantial portions of the Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
21 | * OTHER DEALINGS IN THE SOFTWARE. | |
22 | * | |
23 | * Authors: Ben Skeggs | |
24 | */ | |
25 | ||
26 | /* TODO | |
27 | * - bracket certain functions with scratch writes, useful for debugging | |
28 | * - watchdog timer around ctx operations | |
29 | */ | |
30 | ||
31 | #ifdef INCLUDE_DATA | |
32 | gpc_id: .b32 0 | |
33 | gpc_mmio_list_head: .b32 0 | |
34 | gpc_mmio_list_tail: .b32 0 | |
35 | ||
36 | tpc_count: .b32 0 | |
37 | tpc_mask: .b32 0 | |
38 | tpc_mmio_list_head: .b32 0 | |
39 | tpc_mmio_list_tail: .b32 0 | |
40 | ||
41 | cmd_queue: queue_init | |
42 | #endif | |
43 | ||
44 | #ifdef INCLUDE_CODE | |
45 | // reports an exception to the host | |
46 | // | |
47 | // In: $r15 error code (see nvc0.fuc) | |
48 | // | |
49 | error: | |
50 | push $r14 | |
51 | mov $r14 -0x67ec // 0x9814 | |
52 | sethi $r14 0x400000 | |
53 | call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code | |
54 | add b32 $r14 0x41c | |
55 | mov $r15 1 | |
56 | call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET | |
57 | pop $r14 | |
58 | ret | |
59 | ||
60 | // GPC fuc initialisation, executed by triggering ucode start, will | |
61 | // fall through to main loop after completion. | |
62 | // | |
63 | // Input: | |
64 | // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) | |
65 | // CC_SCRATCH[1]: context base | |
66 | // | |
67 | // Output: | |
68 | // CC_SCRATCH[0]: | |
69 | // 31:31: set to signal completion | |
70 | // CC_SCRATCH[1]: | |
71 | // 31:0: GPC context size | |
72 | // | |
73 | init: | |
74 | clear b32 $r0 | |
75 | mov $sp $r0 | |
76 | ||
77 | // enable fifo access | |
78 | mov $r1 0x1200 | |
79 | mov $r2 2 | |
80 | iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE | |
81 | ||
82 | // setup i0 handler, and route all interrupts to it | |
83 | mov $r1 #ih | |
84 | mov $iv0 $r1 | |
85 | mov $r1 0x400 | |
86 | iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH | |
87 | ||
88 | // enable fifo interrupt | |
89 | mov $r2 4 | |
90 | iowr I[$r1 + 0x000] $r2 // INTR_EN_SET | |
91 | ||
92 | // enable interrupts | |
93 | bset $flags ie0 | |
94 | ||
95 | // figure out which GPC we are, and how many TPCs we have | |
96 | mov $r1 0x608 | |
97 | shl b32 $r1 6 | |
98 | iord $r2 I[$r1 + 0x000] // UNITS | |
99 | mov $r3 1 | |
100 | and $r2 0x1f | |
101 | shl b32 $r3 $r2 | |
102 | sub b32 $r3 1 | |
103 | st b32 D[$r0 + #tpc_count] $r2 | |
104 | st b32 D[$r0 + #tpc_mask] $r3 | |
105 | add b32 $r1 0x400 | |
106 | iord $r2 I[$r1 + 0x000] // MYINDEX | |
107 | st b32 D[$r0 + #gpc_id] $r2 | |
108 | ||
109 | // find context data for this chipset | |
110 | mov $r2 0x800 | |
111 | shl b32 $r2 6 | |
112 | iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] | |
113 | mov $r1 #chipsets - 12 | |
114 | init_find_chipset: | |
115 | add b32 $r1 12 | |
116 | ld b32 $r3 D[$r1 + 0x00] | |
117 | cmpu b32 $r3 $r2 | |
118 | bra e #init_context | |
119 | cmpu b32 $r3 0 | |
120 | bra ne #init_find_chipset | |
121 | // unknown chipset | |
122 | ret | |
123 | ||
124 | // initialise context base, and size tracking | |
125 | init_context: | |
126 | mov $r2 0x800 | |
127 | shl b32 $r2 6 | |
128 | iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base | |
129 | clear b32 $r3 // track GPC context size here | |
130 | ||
131 | // set mmctx base addresses now so we don't have to do it later, | |
132 | // they don't currently ever change | |
133 | mov $r4 0x700 | |
134 | shl b32 $r4 6 | |
135 | shr b32 $r5 $r2 8 | |
136 | iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE | |
137 | iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE | |
138 | ||
139 | // calculate GPC mmio context size, store the chipset-specific | |
140 | // mmio list pointers somewhere we can get at them later without | |
141 | // re-parsing the chipset list | |
142 | clear b32 $r14 | |
143 | clear b32 $r15 | |
144 | ld b16 $r14 D[$r1 + 4] | |
145 | ld b16 $r15 D[$r1 + 6] | |
146 | st b16 D[$r0 + #gpc_mmio_list_head] $r14 | |
147 | st b16 D[$r0 + #gpc_mmio_list_tail] $r15 | |
148 | call #mmctx_size | |
149 | add b32 $r2 $r15 | |
150 | add b32 $r3 $r15 | |
151 | ||
152 | // calculate per-TPC mmio context size, store the list pointers | |
153 | ld b16 $r14 D[$r1 + 8] | |
154 | ld b16 $r15 D[$r1 + 10] | |
155 | st b16 D[$r0 + #tpc_mmio_list_head] $r14 | |
156 | st b16 D[$r0 + #tpc_mmio_list_tail] $r15 | |
157 | call #mmctx_size | |
158 | ld b32 $r14 D[$r0 + #tpc_count] | |
159 | mulu $r14 $r15 | |
160 | add b32 $r2 $r14 | |
161 | add b32 $r3 $r14 | |
162 | ||
163 | // round up base/size to 256 byte boundary (for strand SWBASE) | |
164 | add b32 $r4 0x1300 | |
165 | shr b32 $r3 2 | |
166 | iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!? | |
167 | shr b32 $r2 8 | |
168 | shr b32 $r3 6 | |
169 | add b32 $r2 1 | |
170 | add b32 $r3 1 | |
171 | shl b32 $r2 8 | |
172 | shl b32 $r3 8 | |
173 | ||
174 | // calculate size of strand context data | |
175 | mov b32 $r15 $r2 | |
176 | call #strand_ctx_init | |
177 | add b32 $r3 $r15 | |
178 | ||
179 | // save context size, and tell HUB we're done | |
180 | mov $r1 0x800 | |
181 | shl b32 $r1 6 | |
182 | iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size | |
183 | add b32 $r1 0x800 | |
184 | clear b32 $r2 | |
185 | bset $r2 31 | |
186 | iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 | |
187 | ||
188 | // Main program loop, very simple, sleeps until woken up by the interrupt | |
189 | // handler, pulls a command from the queue and executes its handler | |
190 | // | |
191 | main: | |
192 | bset $flags $p0 | |
193 | sleep $p0 | |
194 | mov $r13 #cmd_queue | |
195 | call #queue_get | |
196 | bra $p1 #main | |
197 | ||
198 | // 0x0000-0x0003 are all context transfers | |
199 | cmpu b32 $r14 0x04 | |
200 | bra nc #main_not_ctx_xfer | |
201 | // fetch $flags and mask off $p1/$p2 | |
202 | mov $r1 $flags | |
203 | mov $r2 0x0006 | |
204 | not b32 $r2 | |
205 | and $r1 $r2 | |
206 | // set $p1/$p2 according to transfer type | |
207 | shl b32 $r14 1 | |
208 | or $r1 $r14 | |
209 | mov $flags $r1 | |
210 | // transfer context data | |
211 | call #ctx_xfer | |
212 | bra #main | |
213 | ||
214 | main_not_ctx_xfer: | |
215 | shl b32 $r15 $r14 16 | |
216 | or $r15 E_BAD_COMMAND | |
217 | call #error | |
218 | bra #main | |
219 | ||
220 | // interrupt handler | |
221 | ih: | |
222 | push $r8 | |
223 | mov $r8 $flags | |
224 | push $r8 | |
225 | push $r9 | |
226 | push $r10 | |
227 | push $r11 | |
228 | push $r13 | |
229 | push $r14 | |
230 | push $r15 | |
231 | ||
232 | // incoming fifo command? | |
233 | iord $r10 I[$r0 + 0x200] // INTR | |
234 | and $r11 $r10 0x00000004 | |
235 | bra e #ih_no_fifo | |
236 | // queue incoming fifo command for later processing | |
237 | mov $r11 0x1900 | |
238 | mov $r13 #cmd_queue | |
239 | iord $r14 I[$r11 + 0x100] // FIFO_CMD | |
240 | iord $r15 I[$r11 + 0x000] // FIFO_DATA | |
241 | call #queue_put | |
242 | add b32 $r11 0x400 | |
243 | mov $r14 1 | |
244 | iowr I[$r11 + 0x000] $r14 // FIFO_ACK | |
245 | ||
246 | // ack, and wake up main() | |
247 | ih_no_fifo: | |
248 | iowr I[$r0 + 0x100] $r10 // INTR_ACK | |
249 | ||
250 | pop $r15 | |
251 | pop $r14 | |
252 | pop $r13 | |
253 | pop $r11 | |
254 | pop $r10 | |
255 | pop $r9 | |
256 | pop $r8 | |
257 | mov $flags $r8 | |
258 | pop $r8 | |
259 | bclr $flags $p0 | |
260 | iret | |
261 | ||
262 | // Set this GPC's bit in HUB_BAR, used to signal completion of various | |
263 | // activities to the HUB fuc | |
264 | // | |
265 | hub_barrier_done: | |
266 | mov $r15 1 | |
267 | ld b32 $r14 D[$r0 + #gpc_id] | |
268 | shl b32 $r15 $r14 | |
269 | mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET | |
270 | sethi $r14 0x400000 | |
271 | call #nv_wr32 | |
272 | ret | |
273 | ||
274 | // Disables various things, waits a bit, and re-enables them.. | |
275 | // | |
276 | // Not sure how exactly this helps, perhaps "ENABLE" is not such a | |
277 | // good description for the bits we turn off? Anyways, without this, | |
278 | // funny things happen. | |
279 | // | |
280 | ctx_redswitch: | |
281 | mov $r14 0x614 | |
282 | shl b32 $r14 6 | |
283 | mov $r15 0x020 | |
284 | iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER | |
285 | mov $r15 8 | |
286 | ctx_redswitch_delay: | |
287 | sub b32 $r15 1 | |
288 | bra ne #ctx_redswitch_delay | |
289 | mov $r15 0xa20 | |
290 | iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER | |
291 | ret | |
292 | ||
293 | // Transfer GPC context data between GPU and storage area | |
294 | // | |
295 | // In: $r15 context base address | |
296 | // $p1 clear on save, set on load | |
297 | // $p2 set if opposite direction done/will be done, so: | |
298 | // on save it means: "a load will follow this save" | |
299 | // on load it means: "a save preceeded this load" | |
300 | // | |
301 | ctx_xfer: | |
302 | // set context base address | |
303 | mov $r1 0xa04 | |
304 | shl b32 $r1 6 | |
305 | iowr I[$r1 + 0x000] $r15// MEM_BASE | |
306 | bra not $p1 #ctx_xfer_not_load | |
307 | call #ctx_redswitch | |
308 | ctx_xfer_not_load: | |
309 | ||
310 | // strands | |
311 | mov $r1 0x4afc | |
312 | sethi $r1 0x20000 | |
313 | mov $r2 0xc | |
314 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c | |
315 | call #strand_wait | |
316 | mov $r2 0x47fc | |
317 | sethi $r2 0x20000 | |
318 | iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 | |
319 | xbit $r2 $flags $p1 | |
320 | add b32 $r2 3 | |
321 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) | |
322 | ||
323 | // mmio context | |
324 | xbit $r10 $flags $p1 // direction | |
325 | or $r10 2 // first | |
326 | mov $r11 0x0000 | |
327 | sethi $r11 0x500000 | |
328 | ld b32 $r12 D[$r0 + #gpc_id] | |
329 | shl b32 $r12 15 | |
330 | add b32 $r11 $r12 // base = NV_PGRAPH_GPCn | |
331 | ld b32 $r12 D[$r0 + #gpc_mmio_list_head] | |
332 | ld b32 $r13 D[$r0 + #gpc_mmio_list_tail] | |
333 | mov $r14 0 // not multi | |
334 | call #mmctx_xfer | |
335 | ||
336 | // per-TPC mmio context | |
337 | xbit $r10 $flags $p1 // direction | |
338 | or $r10 4 // last | |
339 | mov $r11 0x4000 | |
340 | sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0 | |
341 | ld b32 $r12 D[$r0 + #gpc_id] | |
342 | shl b32 $r12 15 | |
343 | add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0 | |
344 | ld b32 $r12 D[$r0 + #tpc_mmio_list_head] | |
345 | ld b32 $r13 D[$r0 + #tpc_mmio_list_tail] | |
346 | ld b32 $r15 D[$r0 + #tpc_mask] | |
347 | mov $r14 0x800 // stride = 0x800 | |
348 | call #mmctx_xfer | |
349 | ||
350 | // wait for strands to finish | |
351 | call #strand_wait | |
352 | ||
353 | // if load, or a save without a load following, do some | |
354 | // unknown stuff that's done after finishing a block of | |
355 | // strand commands | |
356 | bra $p1 #ctx_xfer_post | |
357 | bra not $p2 #ctx_xfer_done | |
358 | ctx_xfer_post: | |
359 | mov $r1 0x4afc | |
360 | sethi $r1 0x20000 | |
361 | mov $r2 0xd | |
362 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d | |
363 | call #strand_wait | |
364 | ||
365 | // mark completion in HUB's barrier | |
366 | ctx_xfer_done: | |
367 | call #hub_barrier_done | |
368 | ret | |
369 | #endif |