]>
Commit | Line | Data |
---|---|---|
d5b1a78a EA |
1 | /* |
2 | * Copyright © 2014 Broadcom | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | */ | |
23 | ||
24 | #include <linux/module.h> | |
25 | #include <linux/platform_device.h> | |
001bdb55 | 26 | #include <linux/pm_runtime.h> |
d5b1a78a EA |
27 | #include <linux/device.h> |
28 | #include <linux/io.h> | |
174cd4b1 | 29 | #include <linux/sched/signal.h> |
d5b1a78a EA |
30 | |
31 | #include "uapi/drm/vc4_drm.h" | |
32 | #include "vc4_drv.h" | |
33 | #include "vc4_regs.h" | |
34 | #include "vc4_trace.h" | |
35 | ||
36 | static void | |
37 | vc4_queue_hangcheck(struct drm_device *dev) | |
38 | { | |
39 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
40 | ||
41 | mod_timer(&vc4->hangcheck.timer, | |
42 | round_jiffies_up(jiffies + msecs_to_jiffies(100))); | |
43 | } | |
44 | ||
21461365 EA |
45 | struct vc4_hang_state { |
46 | struct drm_vc4_get_hang_state user_state; | |
47 | ||
48 | u32 bo_count; | |
49 | struct drm_gem_object **bo; | |
50 | }; | |
51 | ||
52 | static void | |
53 | vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) | |
54 | { | |
55 | unsigned int i; | |
56 | ||
21461365 | 57 | for (i = 0; i < state->user_state.bo_count; i++) |
db369729 | 58 | drm_gem_object_unreference_unlocked(state->bo[i]); |
21461365 EA |
59 | |
60 | kfree(state); | |
61 | } | |
62 | ||
63 | int | |
64 | vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, | |
65 | struct drm_file *file_priv) | |
66 | { | |
67 | struct drm_vc4_get_hang_state *get_state = data; | |
68 | struct drm_vc4_get_hang_state_bo *bo_state; | |
69 | struct vc4_hang_state *kernel_state; | |
70 | struct drm_vc4_get_hang_state *state; | |
71 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
72 | unsigned long irqflags; | |
73 | u32 i; | |
65c4777d | 74 | int ret = 0; |
21461365 EA |
75 | |
76 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
77 | kernel_state = vc4->hang_state; | |
78 | if (!kernel_state) { | |
79 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
80 | return -ENOENT; | |
81 | } | |
82 | state = &kernel_state->user_state; | |
83 | ||
84 | /* If the user's array isn't big enough, just return the | |
85 | * required array size. | |
86 | */ | |
87 | if (get_state->bo_count < state->bo_count) { | |
88 | get_state->bo_count = state->bo_count; | |
89 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
90 | return 0; | |
91 | } | |
92 | ||
93 | vc4->hang_state = NULL; | |
94 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
95 | ||
96 | /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ | |
97 | state->bo = get_state->bo; | |
98 | memcpy(get_state, state, sizeof(*state)); | |
99 | ||
100 | bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); | |
101 | if (!bo_state) { | |
102 | ret = -ENOMEM; | |
103 | goto err_free; | |
104 | } | |
105 | ||
106 | for (i = 0; i < state->bo_count; i++) { | |
107 | struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); | |
108 | u32 handle; | |
109 | ||
110 | ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], | |
111 | &handle); | |
112 | ||
113 | if (ret) { | |
114 | state->bo_count = i - 1; | |
115 | goto err; | |
116 | } | |
117 | bo_state[i].handle = handle; | |
118 | bo_state[i].paddr = vc4_bo->base.paddr; | |
119 | bo_state[i].size = vc4_bo->base.base.size; | |
120 | } | |
121 | ||
65c4777d DC |
122 | if (copy_to_user((void __user *)(uintptr_t)get_state->bo, |
123 | bo_state, | |
124 | state->bo_count * sizeof(*bo_state))) | |
125 | ret = -EFAULT; | |
126 | ||
21461365 EA |
127 | kfree(bo_state); |
128 | ||
129 | err_free: | |
130 | ||
131 | vc4_free_hang_state(dev, kernel_state); | |
132 | ||
133 | err: | |
134 | return ret; | |
135 | } | |
136 | ||
137 | static void | |
138 | vc4_save_hang_state(struct drm_device *dev) | |
139 | { | |
140 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
141 | struct drm_vc4_get_hang_state *state; | |
142 | struct vc4_hang_state *kernel_state; | |
ca26d28b | 143 | struct vc4_exec_info *exec[2]; |
21461365 EA |
144 | struct vc4_bo *bo; |
145 | unsigned long irqflags; | |
ca26d28b | 146 | unsigned int i, j, unref_list_count, prev_idx; |
21461365 | 147 | |
7e5082fb | 148 | kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); |
21461365 EA |
149 | if (!kernel_state) |
150 | return; | |
151 | ||
152 | state = &kernel_state->user_state; | |
153 | ||
154 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
ca26d28b VG |
155 | exec[0] = vc4_first_bin_job(vc4); |
156 | exec[1] = vc4_first_render_job(vc4); | |
157 | if (!exec[0] && !exec[1]) { | |
21461365 EA |
158 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
159 | return; | |
160 | } | |
161 | ||
ca26d28b VG |
162 | /* Get the bos from both binner and renderer into hang state. */ |
163 | state->bo_count = 0; | |
164 | for (i = 0; i < 2; i++) { | |
165 | if (!exec[i]) | |
166 | continue; | |
167 | ||
168 | unref_list_count = 0; | |
169 | list_for_each_entry(bo, &exec[i]->unref_list, unref_head) | |
170 | unref_list_count++; | |
171 | state->bo_count += exec[i]->bo_count + unref_list_count; | |
172 | } | |
173 | ||
174 | kernel_state->bo = kcalloc(state->bo_count, | |
175 | sizeof(*kernel_state->bo), GFP_ATOMIC); | |
21461365 | 176 | |
21461365 EA |
177 | if (!kernel_state->bo) { |
178 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
179 | return; | |
180 | } | |
181 | ||
ca26d28b VG |
182 | prev_idx = 0; |
183 | for (i = 0; i < 2; i++) { | |
184 | if (!exec[i]) | |
185 | continue; | |
21461365 | 186 | |
ca26d28b VG |
187 | for (j = 0; j < exec[i]->bo_count; j++) { |
188 | drm_gem_object_reference(&exec[i]->bo[j]->base); | |
189 | kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; | |
190 | } | |
191 | ||
192 | list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { | |
193 | drm_gem_object_reference(&bo->base.base); | |
194 | kernel_state->bo[j + prev_idx] = &bo->base.base; | |
195 | j++; | |
196 | } | |
197 | prev_idx = j + 1; | |
21461365 EA |
198 | } |
199 | ||
ca26d28b VG |
200 | if (exec[0]) |
201 | state->start_bin = exec[0]->ct0ca; | |
202 | if (exec[1]) | |
203 | state->start_render = exec[1]->ct1ca; | |
21461365 EA |
204 | |
205 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
206 | ||
207 | state->ct0ca = V3D_READ(V3D_CTNCA(0)); | |
208 | state->ct0ea = V3D_READ(V3D_CTNEA(0)); | |
209 | ||
210 | state->ct1ca = V3D_READ(V3D_CTNCA(1)); | |
211 | state->ct1ea = V3D_READ(V3D_CTNEA(1)); | |
212 | ||
213 | state->ct0cs = V3D_READ(V3D_CTNCS(0)); | |
214 | state->ct1cs = V3D_READ(V3D_CTNCS(1)); | |
215 | ||
216 | state->ct0ra0 = V3D_READ(V3D_CT00RA0); | |
217 | state->ct1ra0 = V3D_READ(V3D_CT01RA0); | |
218 | ||
219 | state->bpca = V3D_READ(V3D_BPCA); | |
220 | state->bpcs = V3D_READ(V3D_BPCS); | |
221 | state->bpoa = V3D_READ(V3D_BPOA); | |
222 | state->bpos = V3D_READ(V3D_BPOS); | |
223 | ||
224 | state->vpmbase = V3D_READ(V3D_VPMBASE); | |
225 | ||
226 | state->dbge = V3D_READ(V3D_DBGE); | |
227 | state->fdbgo = V3D_READ(V3D_FDBGO); | |
228 | state->fdbgb = V3D_READ(V3D_FDBGB); | |
229 | state->fdbgr = V3D_READ(V3D_FDBGR); | |
230 | state->fdbgs = V3D_READ(V3D_FDBGS); | |
231 | state->errstat = V3D_READ(V3D_ERRSTAT); | |
232 | ||
233 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
234 | if (vc4->hang_state) { | |
235 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
236 | vc4_free_hang_state(dev, kernel_state); | |
237 | } else { | |
238 | vc4->hang_state = kernel_state; | |
239 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
240 | } | |
241 | } | |
242 | ||
d5b1a78a EA |
243 | static void |
244 | vc4_reset(struct drm_device *dev) | |
245 | { | |
246 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
247 | ||
248 | DRM_INFO("Resetting GPU.\n"); | |
36cb6253 EA |
249 | |
250 | mutex_lock(&vc4->power_lock); | |
251 | if (vc4->power_refcount) { | |
252 | /* Power the device off and back on the by dropping the | |
253 | * reference on runtime PM. | |
254 | */ | |
255 | pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev); | |
256 | pm_runtime_get_sync(&vc4->v3d->pdev->dev); | |
257 | } | |
258 | mutex_unlock(&vc4->power_lock); | |
d5b1a78a EA |
259 | |
260 | vc4_irq_reset(dev); | |
261 | ||
262 | /* Rearm the hangcheck -- another job might have been waiting | |
263 | * for our hung one to get kicked off, and vc4_irq_reset() | |
264 | * would have started it. | |
265 | */ | |
266 | vc4_queue_hangcheck(dev); | |
267 | } | |
268 | ||
269 | static void | |
270 | vc4_reset_work(struct work_struct *work) | |
271 | { | |
272 | struct vc4_dev *vc4 = | |
273 | container_of(work, struct vc4_dev, hangcheck.reset_work); | |
274 | ||
21461365 EA |
275 | vc4_save_hang_state(vc4->dev); |
276 | ||
d5b1a78a EA |
277 | vc4_reset(vc4->dev); |
278 | } | |
279 | ||
280 | static void | |
281 | vc4_hangcheck_elapsed(unsigned long data) | |
282 | { | |
283 | struct drm_device *dev = (struct drm_device *)data; | |
284 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
285 | uint32_t ct0ca, ct1ca; | |
c4ce60dc | 286 | unsigned long irqflags; |
ca26d28b | 287 | struct vc4_exec_info *bin_exec, *render_exec; |
c4ce60dc EA |
288 | |
289 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
ca26d28b VG |
290 | |
291 | bin_exec = vc4_first_bin_job(vc4); | |
292 | render_exec = vc4_first_render_job(vc4); | |
d5b1a78a EA |
293 | |
294 | /* If idle, we can stop watching for hangs. */ | |
ca26d28b | 295 | if (!bin_exec && !render_exec) { |
c4ce60dc | 296 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
d5b1a78a | 297 | return; |
c4ce60dc | 298 | } |
d5b1a78a EA |
299 | |
300 | ct0ca = V3D_READ(V3D_CTNCA(0)); | |
301 | ct1ca = V3D_READ(V3D_CTNCA(1)); | |
302 | ||
303 | /* If we've made any progress in execution, rearm the timer | |
304 | * and wait. | |
305 | */ | |
ca26d28b VG |
306 | if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || |
307 | (render_exec && ct1ca != render_exec->last_ct1ca)) { | |
308 | if (bin_exec) | |
309 | bin_exec->last_ct0ca = ct0ca; | |
310 | if (render_exec) | |
311 | render_exec->last_ct1ca = ct1ca; | |
c4ce60dc | 312 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
d5b1a78a EA |
313 | vc4_queue_hangcheck(dev); |
314 | return; | |
315 | } | |
316 | ||
c4ce60dc EA |
317 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
318 | ||
d5b1a78a EA |
319 | /* We've gone too long with no progress, reset. This has to |
320 | * be done from a work struct, since resetting can sleep and | |
321 | * this timer hook isn't allowed to. | |
322 | */ | |
323 | schedule_work(&vc4->hangcheck.reset_work); | |
324 | } | |
325 | ||
326 | static void | |
327 | submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) | |
328 | { | |
329 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
330 | ||
331 | /* Set the current and end address of the control list. | |
332 | * Writing the end register is what starts the job. | |
333 | */ | |
334 | V3D_WRITE(V3D_CTNCA(thread), start); | |
335 | V3D_WRITE(V3D_CTNEA(thread), end); | |
336 | } | |
337 | ||
338 | int | |
339 | vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, | |
340 | bool interruptible) | |
341 | { | |
342 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
343 | int ret = 0; | |
344 | unsigned long timeout_expire; | |
345 | DEFINE_WAIT(wait); | |
346 | ||
347 | if (vc4->finished_seqno >= seqno) | |
348 | return 0; | |
349 | ||
350 | if (timeout_ns == 0) | |
351 | return -ETIME; | |
352 | ||
353 | timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); | |
354 | ||
355 | trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); | |
356 | for (;;) { | |
357 | prepare_to_wait(&vc4->job_wait_queue, &wait, | |
358 | interruptible ? TASK_INTERRUPTIBLE : | |
359 | TASK_UNINTERRUPTIBLE); | |
360 | ||
361 | if (interruptible && signal_pending(current)) { | |
362 | ret = -ERESTARTSYS; | |
363 | break; | |
364 | } | |
365 | ||
366 | if (vc4->finished_seqno >= seqno) | |
367 | break; | |
368 | ||
369 | if (timeout_ns != ~0ull) { | |
370 | if (time_after_eq(jiffies, timeout_expire)) { | |
371 | ret = -ETIME; | |
372 | break; | |
373 | } | |
374 | schedule_timeout(timeout_expire - jiffies); | |
375 | } else { | |
376 | schedule(); | |
377 | } | |
378 | } | |
379 | ||
380 | finish_wait(&vc4->job_wait_queue, &wait); | |
381 | trace_vc4_wait_for_seqno_end(dev, seqno); | |
382 | ||
13cf8909 | 383 | return ret; |
d5b1a78a EA |
384 | } |
385 | ||
386 | static void | |
387 | vc4_flush_caches(struct drm_device *dev) | |
388 | { | |
389 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
390 | ||
391 | /* Flush the GPU L2 caches. These caches sit on top of system | |
392 | * L3 (the 128kb or so shared with the CPU), and are | |
393 | * non-allocating in the L3. | |
394 | */ | |
395 | V3D_WRITE(V3D_L2CACTL, | |
396 | V3D_L2CACTL_L2CCLR); | |
397 | ||
398 | V3D_WRITE(V3D_SLCACTL, | |
399 | VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | | |
400 | VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | | |
401 | VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | | |
402 | VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); | |
403 | } | |
404 | ||
405 | /* Sets the registers for the next job to be actually be executed in | |
406 | * the hardware. | |
407 | * | |
408 | * The job_lock should be held during this. | |
409 | */ | |
410 | void | |
ca26d28b | 411 | vc4_submit_next_bin_job(struct drm_device *dev) |
d5b1a78a EA |
412 | { |
413 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
ca26d28b | 414 | struct vc4_exec_info *exec; |
d5b1a78a | 415 | |
ca26d28b VG |
416 | again: |
417 | exec = vc4_first_bin_job(vc4); | |
d5b1a78a EA |
418 | if (!exec) |
419 | return; | |
420 | ||
421 | vc4_flush_caches(dev); | |
422 | ||
ca26d28b VG |
423 | /* Either put the job in the binner if it uses the binner, or |
424 | * immediately move it to the to-be-rendered queue. | |
425 | */ | |
426 | if (exec->ct0ca != exec->ct0ea) { | |
d5b1a78a | 427 | submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); |
ca26d28b VG |
428 | } else { |
429 | vc4_move_job_to_render(dev, exec); | |
430 | goto again; | |
431 | } | |
432 | } | |
433 | ||
434 | void | |
435 | vc4_submit_next_render_job(struct drm_device *dev) | |
436 | { | |
437 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
438 | struct vc4_exec_info *exec = vc4_first_render_job(vc4); | |
439 | ||
440 | if (!exec) | |
441 | return; | |
442 | ||
d5b1a78a EA |
443 | submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); |
444 | } | |
445 | ||
ca26d28b VG |
446 | void |
447 | vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) | |
448 | { | |
449 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
450 | bool was_empty = list_empty(&vc4->render_job_list); | |
451 | ||
452 | list_move_tail(&exec->head, &vc4->render_job_list); | |
453 | if (was_empty) | |
454 | vc4_submit_next_render_job(dev); | |
455 | } | |
456 | ||
d5b1a78a EA |
457 | static void |
458 | vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) | |
459 | { | |
460 | struct vc4_bo *bo; | |
461 | unsigned i; | |
462 | ||
463 | for (i = 0; i < exec->bo_count; i++) { | |
464 | bo = to_vc4_bo(&exec->bo[i]->base); | |
465 | bo->seqno = seqno; | |
466 | } | |
467 | ||
468 | list_for_each_entry(bo, &exec->unref_list, unref_head) { | |
469 | bo->seqno = seqno; | |
470 | } | |
7edabee0 EA |
471 | |
472 | for (i = 0; i < exec->rcl_write_bo_count; i++) { | |
473 | bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); | |
474 | bo->write_seqno = seqno; | |
475 | } | |
d5b1a78a EA |
476 | } |
477 | ||
478 | /* Queues a struct vc4_exec_info for execution. If no job is | |
479 | * currently executing, then submits it. | |
480 | * | |
481 | * Unlike most GPUs, our hardware only handles one command list at a | |
482 | * time. To queue multiple jobs at once, we'd need to edit the | |
483 | * previous command list to have a jump to the new one at the end, and | |
484 | * then bump the end address. That's a change for a later date, | |
485 | * though. | |
486 | */ | |
487 | static void | |
488 | vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) | |
489 | { | |
490 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
491 | uint64_t seqno; | |
492 | unsigned long irqflags; | |
493 | ||
494 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
495 | ||
496 | seqno = ++vc4->emit_seqno; | |
497 | exec->seqno = seqno; | |
498 | vc4_update_bo_seqnos(exec, seqno); | |
499 | ||
ca26d28b | 500 | list_add_tail(&exec->head, &vc4->bin_job_list); |
d5b1a78a EA |
501 | |
502 | /* If no job was executing, kick ours off. Otherwise, it'll | |
ca26d28b | 503 | * get started when the previous job's flush done interrupt |
d5b1a78a EA |
504 | * occurs. |
505 | */ | |
ca26d28b VG |
506 | if (vc4_first_bin_job(vc4) == exec) { |
507 | vc4_submit_next_bin_job(dev); | |
d5b1a78a EA |
508 | vc4_queue_hangcheck(dev); |
509 | } | |
510 | ||
511 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
512 | } | |
513 | ||
514 | /** | |
515 | * Looks up a bunch of GEM handles for BOs and stores the array for | |
516 | * use in the command validator that actually writes relocated | |
517 | * addresses pointing to them. | |
518 | */ | |
519 | static int | |
520 | vc4_cl_lookup_bos(struct drm_device *dev, | |
521 | struct drm_file *file_priv, | |
522 | struct vc4_exec_info *exec) | |
523 | { | |
524 | struct drm_vc4_submit_cl *args = exec->args; | |
525 | uint32_t *handles; | |
526 | int ret = 0; | |
527 | int i; | |
528 | ||
529 | exec->bo_count = args->bo_handle_count; | |
530 | ||
531 | if (!exec->bo_count) { | |
532 | /* See comment on bo_index for why we have to check | |
533 | * this. | |
534 | */ | |
535 | DRM_ERROR("Rendering requires BOs to validate\n"); | |
536 | return -EINVAL; | |
537 | } | |
538 | ||
ece7267d EA |
539 | exec->bo = drm_calloc_large(exec->bo_count, |
540 | sizeof(struct drm_gem_cma_object *)); | |
d5b1a78a EA |
541 | if (!exec->bo) { |
542 | DRM_ERROR("Failed to allocate validated BO pointers\n"); | |
543 | return -ENOMEM; | |
544 | } | |
545 | ||
546 | handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); | |
547 | if (!handles) { | |
b2cdeb19 | 548 | ret = -ENOMEM; |
d5b1a78a EA |
549 | DRM_ERROR("Failed to allocate incoming GEM handles\n"); |
550 | goto fail; | |
551 | } | |
552 | ||
b2cdeb19 DC |
553 | if (copy_from_user(handles, |
554 | (void __user *)(uintptr_t)args->bo_handles, | |
555 | exec->bo_count * sizeof(uint32_t))) { | |
556 | ret = -EFAULT; | |
d5b1a78a EA |
557 | DRM_ERROR("Failed to copy in GEM handles\n"); |
558 | goto fail; | |
559 | } | |
560 | ||
561 | spin_lock(&file_priv->table_lock); | |
562 | for (i = 0; i < exec->bo_count; i++) { | |
563 | struct drm_gem_object *bo = idr_find(&file_priv->object_idr, | |
564 | handles[i]); | |
565 | if (!bo) { | |
566 | DRM_ERROR("Failed to look up GEM BO %d: %d\n", | |
567 | i, handles[i]); | |
568 | ret = -EINVAL; | |
569 | spin_unlock(&file_priv->table_lock); | |
570 | goto fail; | |
571 | } | |
572 | drm_gem_object_reference(bo); | |
573 | exec->bo[i] = (struct drm_gem_cma_object *)bo; | |
574 | } | |
575 | spin_unlock(&file_priv->table_lock); | |
576 | ||
577 | fail: | |
d5fb46e0 | 578 | drm_free_large(handles); |
552416c1 | 579 | return ret; |
d5b1a78a EA |
580 | } |
581 | ||
582 | static int | |
583 | vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) | |
584 | { | |
585 | struct drm_vc4_submit_cl *args = exec->args; | |
586 | void *temp = NULL; | |
587 | void *bin; | |
588 | int ret = 0; | |
589 | uint32_t bin_offset = 0; | |
590 | uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, | |
591 | 16); | |
592 | uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; | |
593 | uint32_t exec_size = uniforms_offset + args->uniforms_size; | |
594 | uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * | |
595 | args->shader_rec_count); | |
596 | struct vc4_bo *bo; | |
597 | ||
0f2ff82e EA |
598 | if (shader_rec_offset < args->bin_cl_size || |
599 | uniforms_offset < shader_rec_offset || | |
d5b1a78a EA |
600 | exec_size < uniforms_offset || |
601 | args->shader_rec_count >= (UINT_MAX / | |
602 | sizeof(struct vc4_shader_state)) || | |
603 | temp_size < exec_size) { | |
604 | DRM_ERROR("overflow in exec arguments\n"); | |
6b8ac638 | 605 | ret = -EINVAL; |
d5b1a78a EA |
606 | goto fail; |
607 | } | |
608 | ||
609 | /* Allocate space where we'll store the copied in user command lists | |
610 | * and shader records. | |
611 | * | |
612 | * We don't just copy directly into the BOs because we need to | |
613 | * read the contents back for validation, and I think the | |
614 | * bo->vaddr is uncached access. | |
615 | */ | |
ece7267d | 616 | temp = drm_malloc_ab(temp_size, 1); |
d5b1a78a EA |
617 | if (!temp) { |
618 | DRM_ERROR("Failed to allocate storage for copying " | |
619 | "in bin/render CLs.\n"); | |
620 | ret = -ENOMEM; | |
621 | goto fail; | |
622 | } | |
623 | bin = temp + bin_offset; | |
624 | exec->shader_rec_u = temp + shader_rec_offset; | |
625 | exec->uniforms_u = temp + uniforms_offset; | |
626 | exec->shader_state = temp + exec_size; | |
627 | exec->shader_state_size = args->shader_rec_count; | |
628 | ||
65c4777d DC |
629 | if (copy_from_user(bin, |
630 | (void __user *)(uintptr_t)args->bin_cl, | |
631 | args->bin_cl_size)) { | |
632 | ret = -EFAULT; | |
d5b1a78a EA |
633 | goto fail; |
634 | } | |
635 | ||
65c4777d DC |
636 | if (copy_from_user(exec->shader_rec_u, |
637 | (void __user *)(uintptr_t)args->shader_rec, | |
638 | args->shader_rec_size)) { | |
639 | ret = -EFAULT; | |
d5b1a78a EA |
640 | goto fail; |
641 | } | |
642 | ||
65c4777d DC |
643 | if (copy_from_user(exec->uniforms_u, |
644 | (void __user *)(uintptr_t)args->uniforms, | |
645 | args->uniforms_size)) { | |
646 | ret = -EFAULT; | |
d5b1a78a EA |
647 | goto fail; |
648 | } | |
649 | ||
650 | bo = vc4_bo_create(dev, exec_size, true); | |
2c68f1fc | 651 | if (IS_ERR(bo)) { |
d5b1a78a | 652 | DRM_ERROR("Couldn't allocate BO for binning\n"); |
2c68f1fc | 653 | ret = PTR_ERR(bo); |
d5b1a78a EA |
654 | goto fail; |
655 | } | |
656 | exec->exec_bo = &bo->base; | |
657 | ||
658 | list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, | |
659 | &exec->unref_list); | |
660 | ||
661 | exec->ct0ca = exec->exec_bo->paddr + bin_offset; | |
662 | ||
663 | exec->bin_u = bin; | |
664 | ||
665 | exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; | |
666 | exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; | |
667 | exec->shader_rec_size = args->shader_rec_size; | |
668 | ||
669 | exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; | |
670 | exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; | |
671 | exec->uniforms_size = args->uniforms_size; | |
672 | ||
673 | ret = vc4_validate_bin_cl(dev, | |
674 | exec->exec_bo->vaddr + bin_offset, | |
675 | bin, | |
676 | exec); | |
677 | if (ret) | |
678 | goto fail; | |
679 | ||
680 | ret = vc4_validate_shader_recs(dev, exec); | |
7edabee0 EA |
681 | if (ret) |
682 | goto fail; | |
683 | ||
684 | /* Block waiting on any previous rendering into the CS's VBO, | |
685 | * IB, or textures, so that pixels are actually written by the | |
686 | * time we try to read them. | |
687 | */ | |
688 | ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); | |
d5b1a78a EA |
689 | |
690 | fail: | |
ece7267d | 691 | drm_free_large(temp); |
d5b1a78a EA |
692 | return ret; |
693 | } | |
694 | ||
695 | static void | |
696 | vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) | |
697 | { | |
001bdb55 | 698 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
d5b1a78a EA |
699 | unsigned i; |
700 | ||
d5b1a78a EA |
701 | if (exec->bo) { |
702 | for (i = 0; i < exec->bo_count; i++) | |
db369729 | 703 | drm_gem_object_unreference_unlocked(&exec->bo[i]->base); |
ece7267d | 704 | drm_free_large(exec->bo); |
d5b1a78a EA |
705 | } |
706 | ||
707 | while (!list_empty(&exec->unref_list)) { | |
708 | struct vc4_bo *bo = list_first_entry(&exec->unref_list, | |
709 | struct vc4_bo, unref_head); | |
710 | list_del(&bo->unref_head); | |
db369729 | 711 | drm_gem_object_unreference_unlocked(&bo->base.base); |
d5b1a78a | 712 | } |
d5b1a78a | 713 | |
36cb6253 | 714 | mutex_lock(&vc4->power_lock); |
3a622346 EA |
715 | if (--vc4->power_refcount == 0) { |
716 | pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); | |
717 | pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); | |
718 | } | |
36cb6253 | 719 | mutex_unlock(&vc4->power_lock); |
001bdb55 | 720 | |
d5b1a78a EA |
721 | kfree(exec); |
722 | } | |
723 | ||
724 | void | |
725 | vc4_job_handle_completed(struct vc4_dev *vc4) | |
726 | { | |
727 | unsigned long irqflags; | |
b501bacc | 728 | struct vc4_seqno_cb *cb, *cb_temp; |
d5b1a78a EA |
729 | |
730 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
731 | while (!list_empty(&vc4->job_done_list)) { | |
732 | struct vc4_exec_info *exec = | |
733 | list_first_entry(&vc4->job_done_list, | |
734 | struct vc4_exec_info, head); | |
735 | list_del(&exec->head); | |
736 | ||
737 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
738 | vc4_complete_exec(vc4->dev, exec); | |
739 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
740 | } | |
b501bacc EA |
741 | |
742 | list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { | |
743 | if (cb->seqno <= vc4->finished_seqno) { | |
744 | list_del_init(&cb->work.entry); | |
745 | schedule_work(&cb->work); | |
746 | } | |
747 | } | |
748 | ||
749 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
750 | } | |
751 | ||
752 | static void vc4_seqno_cb_work(struct work_struct *work) | |
753 | { | |
754 | struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); | |
755 | ||
756 | cb->func(cb); | |
757 | } | |
758 | ||
759 | int vc4_queue_seqno_cb(struct drm_device *dev, | |
760 | struct vc4_seqno_cb *cb, uint64_t seqno, | |
761 | void (*func)(struct vc4_seqno_cb *cb)) | |
762 | { | |
763 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
764 | int ret = 0; | |
765 | unsigned long irqflags; | |
766 | ||
767 | cb->func = func; | |
768 | INIT_WORK(&cb->work, vc4_seqno_cb_work); | |
769 | ||
770 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
771 | if (seqno > vc4->finished_seqno) { | |
772 | cb->seqno = seqno; | |
773 | list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); | |
774 | } else { | |
775 | schedule_work(&cb->work); | |
776 | } | |
d5b1a78a | 777 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
b501bacc EA |
778 | |
779 | return ret; | |
d5b1a78a EA |
780 | } |
781 | ||
782 | /* Scheduled when any job has been completed, this walks the list of | |
783 | * jobs that had completed and unrefs their BOs and frees their exec | |
784 | * structs. | |
785 | */ | |
786 | static void | |
787 | vc4_job_done_work(struct work_struct *work) | |
788 | { | |
789 | struct vc4_dev *vc4 = | |
790 | container_of(work, struct vc4_dev, job_done_work); | |
791 | ||
792 | vc4_job_handle_completed(vc4); | |
793 | } | |
794 | ||
795 | static int | |
796 | vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, | |
797 | uint64_t seqno, | |
798 | uint64_t *timeout_ns) | |
799 | { | |
800 | unsigned long start = jiffies; | |
801 | int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); | |
802 | ||
803 | if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { | |
804 | uint64_t delta = jiffies_to_nsecs(jiffies - start); | |
805 | ||
806 | if (*timeout_ns >= delta) | |
807 | *timeout_ns -= delta; | |
808 | } | |
809 | ||
810 | return ret; | |
811 | } | |
812 | ||
813 | int | |
814 | vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, | |
815 | struct drm_file *file_priv) | |
816 | { | |
817 | struct drm_vc4_wait_seqno *args = data; | |
818 | ||
819 | return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, | |
820 | &args->timeout_ns); | |
821 | } | |
822 | ||
823 | int | |
824 | vc4_wait_bo_ioctl(struct drm_device *dev, void *data, | |
825 | struct drm_file *file_priv) | |
826 | { | |
827 | int ret; | |
828 | struct drm_vc4_wait_bo *args = data; | |
829 | struct drm_gem_object *gem_obj; | |
830 | struct vc4_bo *bo; | |
831 | ||
e0015236 EA |
832 | if (args->pad != 0) |
833 | return -EINVAL; | |
834 | ||
a8ad0bd8 | 835 | gem_obj = drm_gem_object_lookup(file_priv, args->handle); |
d5b1a78a EA |
836 | if (!gem_obj) { |
837 | DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); | |
838 | return -EINVAL; | |
839 | } | |
840 | bo = to_vc4_bo(gem_obj); | |
841 | ||
842 | ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, | |
843 | &args->timeout_ns); | |
844 | ||
845 | drm_gem_object_unreference_unlocked(gem_obj); | |
846 | return ret; | |
847 | } | |
848 | ||
849 | /** | |
850 | * Submits a command list to the VC4. | |
851 | * | |
852 | * This is what is called batchbuffer emitting on other hardware. | |
853 | */ | |
854 | int | |
855 | vc4_submit_cl_ioctl(struct drm_device *dev, void *data, | |
856 | struct drm_file *file_priv) | |
857 | { | |
858 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
859 | struct drm_vc4_submit_cl *args = data; | |
860 | struct vc4_exec_info *exec; | |
36cb6253 | 861 | int ret = 0; |
d5b1a78a EA |
862 | |
863 | if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { | |
864 | DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); | |
865 | return -EINVAL; | |
866 | } | |
867 | ||
868 | exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); | |
869 | if (!exec) { | |
870 | DRM_ERROR("malloc failure on exec struct\n"); | |
871 | return -ENOMEM; | |
872 | } | |
873 | ||
36cb6253 EA |
874 | mutex_lock(&vc4->power_lock); |
875 | if (vc4->power_refcount++ == 0) | |
876 | ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); | |
877 | mutex_unlock(&vc4->power_lock); | |
001bdb55 EA |
878 | if (ret < 0) { |
879 | kfree(exec); | |
880 | return ret; | |
881 | } | |
882 | ||
d5b1a78a EA |
883 | exec->args = args; |
884 | INIT_LIST_HEAD(&exec->unref_list); | |
885 | ||
886 | ret = vc4_cl_lookup_bos(dev, file_priv, exec); | |
887 | if (ret) | |
888 | goto fail; | |
889 | ||
890 | if (exec->args->bin_cl_size != 0) { | |
891 | ret = vc4_get_bcl(dev, exec); | |
892 | if (ret) | |
893 | goto fail; | |
894 | } else { | |
895 | exec->ct0ca = 0; | |
896 | exec->ct0ea = 0; | |
897 | } | |
898 | ||
899 | ret = vc4_get_rcl(dev, exec); | |
900 | if (ret) | |
901 | goto fail; | |
902 | ||
903 | /* Clear this out of the struct we'll be putting in the queue, | |
904 | * since it's part of our stack. | |
905 | */ | |
906 | exec->args = NULL; | |
907 | ||
908 | vc4_queue_submit(dev, exec); | |
909 | ||
910 | /* Return the seqno for our job. */ | |
911 | args->seqno = vc4->emit_seqno; | |
912 | ||
913 | return 0; | |
914 | ||
915 | fail: | |
916 | vc4_complete_exec(vc4->dev, exec); | |
917 | ||
918 | return ret; | |
919 | } | |
920 | ||
921 | void | |
922 | vc4_gem_init(struct drm_device *dev) | |
923 | { | |
924 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
925 | ||
ca26d28b VG |
926 | INIT_LIST_HEAD(&vc4->bin_job_list); |
927 | INIT_LIST_HEAD(&vc4->render_job_list); | |
d5b1a78a | 928 | INIT_LIST_HEAD(&vc4->job_done_list); |
b501bacc | 929 | INIT_LIST_HEAD(&vc4->seqno_cb_list); |
d5b1a78a EA |
930 | spin_lock_init(&vc4->job_lock); |
931 | ||
932 | INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); | |
933 | setup_timer(&vc4->hangcheck.timer, | |
934 | vc4_hangcheck_elapsed, | |
935 | (unsigned long)dev); | |
936 | ||
937 | INIT_WORK(&vc4->job_done_work, vc4_job_done_work); | |
36cb6253 EA |
938 | |
939 | mutex_init(&vc4->power_lock); | |
d5b1a78a EA |
940 | } |
941 | ||
942 | void | |
943 | vc4_gem_destroy(struct drm_device *dev) | |
944 | { | |
945 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
946 | ||
947 | /* Waiting for exec to finish would need to be done before | |
948 | * unregistering V3D. | |
949 | */ | |
950 | WARN_ON(vc4->emit_seqno != vc4->finished_seqno); | |
951 | ||
952 | /* V3D should already have disabled its interrupt and cleared | |
953 | * the overflow allocation registers. Now free the object. | |
954 | */ | |
955 | if (vc4->overflow_mem) { | |
956 | drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); | |
957 | vc4->overflow_mem = NULL; | |
958 | } | |
959 | ||
21461365 EA |
960 | if (vc4->hang_state) |
961 | vc4_free_hang_state(dev, vc4->hang_state); | |
def96527 EA |
962 | |
963 | vc4_bo_cache_destroy(dev); | |
d5b1a78a | 964 | } |