]>
Commit | Line | Data |
---|---|---|
d5b1a78a EA |
1 | /* |
2 | * Copyright © 2014 Broadcom | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | */ | |
23 | ||
24 | #include <linux/module.h> | |
25 | #include <linux/platform_device.h> | |
001bdb55 | 26 | #include <linux/pm_runtime.h> |
d5b1a78a EA |
27 | #include <linux/device.h> |
28 | #include <linux/io.h> | |
174cd4b1 | 29 | #include <linux/sched/signal.h> |
d5b1a78a EA |
30 | |
31 | #include "uapi/drm/vc4_drm.h" | |
32 | #include "vc4_drv.h" | |
33 | #include "vc4_regs.h" | |
34 | #include "vc4_trace.h" | |
35 | ||
36 | static void | |
37 | vc4_queue_hangcheck(struct drm_device *dev) | |
38 | { | |
39 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
40 | ||
41 | mod_timer(&vc4->hangcheck.timer, | |
42 | round_jiffies_up(jiffies + msecs_to_jiffies(100))); | |
43 | } | |
44 | ||
21461365 EA |
45 | struct vc4_hang_state { |
46 | struct drm_vc4_get_hang_state user_state; | |
47 | ||
48 | u32 bo_count; | |
49 | struct drm_gem_object **bo; | |
50 | }; | |
51 | ||
52 | static void | |
53 | vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) | |
54 | { | |
55 | unsigned int i; | |
56 | ||
21461365 | 57 | for (i = 0; i < state->user_state.bo_count; i++) |
1d5494e9 | 58 | drm_gem_object_put_unlocked(state->bo[i]); |
21461365 EA |
59 | |
60 | kfree(state); | |
61 | } | |
62 | ||
63 | int | |
64 | vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, | |
65 | struct drm_file *file_priv) | |
66 | { | |
67 | struct drm_vc4_get_hang_state *get_state = data; | |
68 | struct drm_vc4_get_hang_state_bo *bo_state; | |
69 | struct vc4_hang_state *kernel_state; | |
70 | struct drm_vc4_get_hang_state *state; | |
71 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
72 | unsigned long irqflags; | |
73 | u32 i; | |
65c4777d | 74 | int ret = 0; |
21461365 EA |
75 | |
76 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
77 | kernel_state = vc4->hang_state; | |
78 | if (!kernel_state) { | |
79 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
80 | return -ENOENT; | |
81 | } | |
82 | state = &kernel_state->user_state; | |
83 | ||
84 | /* If the user's array isn't big enough, just return the | |
85 | * required array size. | |
86 | */ | |
87 | if (get_state->bo_count < state->bo_count) { | |
88 | get_state->bo_count = state->bo_count; | |
89 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
90 | return 0; | |
91 | } | |
92 | ||
93 | vc4->hang_state = NULL; | |
94 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
95 | ||
96 | /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ | |
97 | state->bo = get_state->bo; | |
98 | memcpy(get_state, state, sizeof(*state)); | |
99 | ||
100 | bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); | |
101 | if (!bo_state) { | |
102 | ret = -ENOMEM; | |
103 | goto err_free; | |
104 | } | |
105 | ||
106 | for (i = 0; i < state->bo_count; i++) { | |
107 | struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); | |
108 | u32 handle; | |
109 | ||
110 | ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], | |
111 | &handle); | |
112 | ||
113 | if (ret) { | |
d0b1d259 CJ |
114 | state->bo_count = i; |
115 | goto err_delete_handle; | |
21461365 EA |
116 | } |
117 | bo_state[i].handle = handle; | |
118 | bo_state[i].paddr = vc4_bo->base.paddr; | |
119 | bo_state[i].size = vc4_bo->base.base.size; | |
120 | } | |
121 | ||
95d7cbcb | 122 | if (copy_to_user(u64_to_user_ptr(get_state->bo), |
65c4777d DC |
123 | bo_state, |
124 | state->bo_count * sizeof(*bo_state))) | |
125 | ret = -EFAULT; | |
126 | ||
d0b1d259 CJ |
127 | err_delete_handle: |
128 | if (ret) { | |
129 | for (i = 0; i < state->bo_count; i++) | |
130 | drm_gem_handle_delete(file_priv, bo_state[i].handle); | |
131 | } | |
21461365 EA |
132 | |
133 | err_free: | |
21461365 | 134 | vc4_free_hang_state(dev, kernel_state); |
d0b1d259 | 135 | kfree(bo_state); |
21461365 | 136 | |
21461365 EA |
137 | return ret; |
138 | } | |
139 | ||
140 | static void | |
141 | vc4_save_hang_state(struct drm_device *dev) | |
142 | { | |
143 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
144 | struct drm_vc4_get_hang_state *state; | |
145 | struct vc4_hang_state *kernel_state; | |
ca26d28b | 146 | struct vc4_exec_info *exec[2]; |
21461365 EA |
147 | struct vc4_bo *bo; |
148 | unsigned long irqflags; | |
ca26d28b | 149 | unsigned int i, j, unref_list_count, prev_idx; |
21461365 | 150 | |
7e5082fb | 151 | kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); |
21461365 EA |
152 | if (!kernel_state) |
153 | return; | |
154 | ||
155 | state = &kernel_state->user_state; | |
156 | ||
157 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
ca26d28b VG |
158 | exec[0] = vc4_first_bin_job(vc4); |
159 | exec[1] = vc4_first_render_job(vc4); | |
160 | if (!exec[0] && !exec[1]) { | |
21461365 EA |
161 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
162 | return; | |
163 | } | |
164 | ||
ca26d28b VG |
165 | /* Get the bos from both binner and renderer into hang state. */ |
166 | state->bo_count = 0; | |
167 | for (i = 0; i < 2; i++) { | |
168 | if (!exec[i]) | |
169 | continue; | |
170 | ||
171 | unref_list_count = 0; | |
172 | list_for_each_entry(bo, &exec[i]->unref_list, unref_head) | |
173 | unref_list_count++; | |
174 | state->bo_count += exec[i]->bo_count + unref_list_count; | |
175 | } | |
176 | ||
177 | kernel_state->bo = kcalloc(state->bo_count, | |
178 | sizeof(*kernel_state->bo), GFP_ATOMIC); | |
21461365 | 179 | |
21461365 EA |
180 | if (!kernel_state->bo) { |
181 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
182 | return; | |
183 | } | |
184 | ||
ca26d28b VG |
185 | prev_idx = 0; |
186 | for (i = 0; i < 2; i++) { | |
187 | if (!exec[i]) | |
188 | continue; | |
21461365 | 189 | |
ca26d28b | 190 | for (j = 0; j < exec[i]->bo_count; j++) { |
b9f19259 BB |
191 | bo = to_vc4_bo(&exec[i]->bo[j]->base); |
192 | ||
193 | /* Retain BOs just in case they were marked purgeable. | |
194 | * This prevents the BO from being purged before | |
195 | * someone had a chance to dump the hang state. | |
196 | */ | |
197 | WARN_ON(!refcount_read(&bo->usecnt)); | |
198 | refcount_inc(&bo->usecnt); | |
1d5494e9 | 199 | drm_gem_object_get(&exec[i]->bo[j]->base); |
ca26d28b VG |
200 | kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; |
201 | } | |
202 | ||
203 | list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { | |
b9f19259 BB |
204 | /* No need to retain BOs coming from the ->unref_list |
205 | * because they are naturally unpurgeable. | |
206 | */ | |
1d5494e9 | 207 | drm_gem_object_get(&bo->base.base); |
ca26d28b VG |
208 | kernel_state->bo[j + prev_idx] = &bo->base.base; |
209 | j++; | |
210 | } | |
211 | prev_idx = j + 1; | |
21461365 EA |
212 | } |
213 | ||
ca26d28b VG |
214 | if (exec[0]) |
215 | state->start_bin = exec[0]->ct0ca; | |
216 | if (exec[1]) | |
217 | state->start_render = exec[1]->ct1ca; | |
21461365 EA |
218 | |
219 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
220 | ||
221 | state->ct0ca = V3D_READ(V3D_CTNCA(0)); | |
222 | state->ct0ea = V3D_READ(V3D_CTNEA(0)); | |
223 | ||
224 | state->ct1ca = V3D_READ(V3D_CTNCA(1)); | |
225 | state->ct1ea = V3D_READ(V3D_CTNEA(1)); | |
226 | ||
227 | state->ct0cs = V3D_READ(V3D_CTNCS(0)); | |
228 | state->ct1cs = V3D_READ(V3D_CTNCS(1)); | |
229 | ||
230 | state->ct0ra0 = V3D_READ(V3D_CT00RA0); | |
231 | state->ct1ra0 = V3D_READ(V3D_CT01RA0); | |
232 | ||
233 | state->bpca = V3D_READ(V3D_BPCA); | |
234 | state->bpcs = V3D_READ(V3D_BPCS); | |
235 | state->bpoa = V3D_READ(V3D_BPOA); | |
236 | state->bpos = V3D_READ(V3D_BPOS); | |
237 | ||
238 | state->vpmbase = V3D_READ(V3D_VPMBASE); | |
239 | ||
240 | state->dbge = V3D_READ(V3D_DBGE); | |
241 | state->fdbgo = V3D_READ(V3D_FDBGO); | |
242 | state->fdbgb = V3D_READ(V3D_FDBGB); | |
243 | state->fdbgr = V3D_READ(V3D_FDBGR); | |
244 | state->fdbgs = V3D_READ(V3D_FDBGS); | |
245 | state->errstat = V3D_READ(V3D_ERRSTAT); | |
246 | ||
b9f19259 BB |
247 | /* We need to turn purgeable BOs into unpurgeable ones so that |
248 | * userspace has a chance to dump the hang state before the kernel | |
249 | * decides to purge those BOs. | |
250 | * Note that BO consistency at dump time cannot be guaranteed. For | |
251 | * example, if the owner of these BOs decides to re-use them or mark | |
252 | * them purgeable again there's nothing we can do to prevent it. | |
253 | */ | |
254 | for (i = 0; i < kernel_state->user_state.bo_count; i++) { | |
255 | struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]); | |
256 | ||
257 | if (bo->madv == __VC4_MADV_NOTSUPP) | |
258 | continue; | |
259 | ||
260 | mutex_lock(&bo->madv_lock); | |
261 | if (!WARN_ON(bo->madv == __VC4_MADV_PURGED)) | |
262 | bo->madv = VC4_MADV_WILLNEED; | |
263 | refcount_dec(&bo->usecnt); | |
264 | mutex_unlock(&bo->madv_lock); | |
265 | } | |
266 | ||
21461365 EA |
267 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
268 | if (vc4->hang_state) { | |
269 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
270 | vc4_free_hang_state(dev, kernel_state); | |
271 | } else { | |
272 | vc4->hang_state = kernel_state; | |
273 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
274 | } | |
275 | } | |
276 | ||
d5b1a78a EA |
277 | static void |
278 | vc4_reset(struct drm_device *dev) | |
279 | { | |
280 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
281 | ||
282 | DRM_INFO("Resetting GPU.\n"); | |
36cb6253 EA |
283 | |
284 | mutex_lock(&vc4->power_lock); | |
285 | if (vc4->power_refcount) { | |
286 | /* Power the device off and back on the by dropping the | |
287 | * reference on runtime PM. | |
288 | */ | |
289 | pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev); | |
290 | pm_runtime_get_sync(&vc4->v3d->pdev->dev); | |
291 | } | |
292 | mutex_unlock(&vc4->power_lock); | |
d5b1a78a EA |
293 | |
294 | vc4_irq_reset(dev); | |
295 | ||
296 | /* Rearm the hangcheck -- another job might have been waiting | |
297 | * for our hung one to get kicked off, and vc4_irq_reset() | |
298 | * would have started it. | |
299 | */ | |
300 | vc4_queue_hangcheck(dev); | |
301 | } | |
302 | ||
303 | static void | |
304 | vc4_reset_work(struct work_struct *work) | |
305 | { | |
306 | struct vc4_dev *vc4 = | |
307 | container_of(work, struct vc4_dev, hangcheck.reset_work); | |
308 | ||
21461365 EA |
309 | vc4_save_hang_state(vc4->dev); |
310 | ||
d5b1a78a EA |
311 | vc4_reset(vc4->dev); |
312 | } | |
313 | ||
314 | static void | |
0078730f | 315 | vc4_hangcheck_elapsed(struct timer_list *t) |
d5b1a78a | 316 | { |
0078730f KC |
317 | struct vc4_dev *vc4 = from_timer(vc4, t, hangcheck.timer); |
318 | struct drm_device *dev = vc4->dev; | |
d5b1a78a | 319 | uint32_t ct0ca, ct1ca; |
c4ce60dc | 320 | unsigned long irqflags; |
ca26d28b | 321 | struct vc4_exec_info *bin_exec, *render_exec; |
c4ce60dc EA |
322 | |
323 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
ca26d28b VG |
324 | |
325 | bin_exec = vc4_first_bin_job(vc4); | |
326 | render_exec = vc4_first_render_job(vc4); | |
d5b1a78a EA |
327 | |
328 | /* If idle, we can stop watching for hangs. */ | |
ca26d28b | 329 | if (!bin_exec && !render_exec) { |
c4ce60dc | 330 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
d5b1a78a | 331 | return; |
c4ce60dc | 332 | } |
d5b1a78a EA |
333 | |
334 | ct0ca = V3D_READ(V3D_CTNCA(0)); | |
335 | ct1ca = V3D_READ(V3D_CTNCA(1)); | |
336 | ||
337 | /* If we've made any progress in execution, rearm the timer | |
338 | * and wait. | |
339 | */ | |
ca26d28b VG |
340 | if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || |
341 | (render_exec && ct1ca != render_exec->last_ct1ca)) { | |
342 | if (bin_exec) | |
343 | bin_exec->last_ct0ca = ct0ca; | |
344 | if (render_exec) | |
345 | render_exec->last_ct1ca = ct1ca; | |
c4ce60dc | 346 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
d5b1a78a EA |
347 | vc4_queue_hangcheck(dev); |
348 | return; | |
349 | } | |
350 | ||
c4ce60dc EA |
351 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
352 | ||
d5b1a78a EA |
353 | /* We've gone too long with no progress, reset. This has to |
354 | * be done from a work struct, since resetting can sleep and | |
355 | * this timer hook isn't allowed to. | |
356 | */ | |
357 | schedule_work(&vc4->hangcheck.reset_work); | |
358 | } | |
359 | ||
360 | static void | |
361 | submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) | |
362 | { | |
363 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
364 | ||
365 | /* Set the current and end address of the control list. | |
366 | * Writing the end register is what starts the job. | |
367 | */ | |
368 | V3D_WRITE(V3D_CTNCA(thread), start); | |
369 | V3D_WRITE(V3D_CTNEA(thread), end); | |
370 | } | |
371 | ||
372 | int | |
373 | vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, | |
374 | bool interruptible) | |
375 | { | |
376 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
377 | int ret = 0; | |
378 | unsigned long timeout_expire; | |
379 | DEFINE_WAIT(wait); | |
380 | ||
381 | if (vc4->finished_seqno >= seqno) | |
382 | return 0; | |
383 | ||
384 | if (timeout_ns == 0) | |
385 | return -ETIME; | |
386 | ||
387 | timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); | |
388 | ||
389 | trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); | |
390 | for (;;) { | |
391 | prepare_to_wait(&vc4->job_wait_queue, &wait, | |
392 | interruptible ? TASK_INTERRUPTIBLE : | |
393 | TASK_UNINTERRUPTIBLE); | |
394 | ||
395 | if (interruptible && signal_pending(current)) { | |
396 | ret = -ERESTARTSYS; | |
397 | break; | |
398 | } | |
399 | ||
400 | if (vc4->finished_seqno >= seqno) | |
401 | break; | |
402 | ||
403 | if (timeout_ns != ~0ull) { | |
404 | if (time_after_eq(jiffies, timeout_expire)) { | |
405 | ret = -ETIME; | |
406 | break; | |
407 | } | |
408 | schedule_timeout(timeout_expire - jiffies); | |
409 | } else { | |
410 | schedule(); | |
411 | } | |
412 | } | |
413 | ||
414 | finish_wait(&vc4->job_wait_queue, &wait); | |
415 | trace_vc4_wait_for_seqno_end(dev, seqno); | |
416 | ||
13cf8909 | 417 | return ret; |
d5b1a78a EA |
418 | } |
419 | ||
420 | static void | |
421 | vc4_flush_caches(struct drm_device *dev) | |
422 | { | |
423 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
424 | ||
425 | /* Flush the GPU L2 caches. These caches sit on top of system | |
426 | * L3 (the 128kb or so shared with the CPU), and are | |
427 | * non-allocating in the L3. | |
428 | */ | |
429 | V3D_WRITE(V3D_L2CACTL, | |
430 | V3D_L2CACTL_L2CCLR); | |
431 | ||
432 | V3D_WRITE(V3D_SLCACTL, | |
433 | VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | | |
434 | VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | | |
435 | VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | | |
436 | VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); | |
437 | } | |
438 | ||
439 | /* Sets the registers for the next job to be actually be executed in | |
440 | * the hardware. | |
441 | * | |
442 | * The job_lock should be held during this. | |
443 | */ | |
444 | void | |
ca26d28b | 445 | vc4_submit_next_bin_job(struct drm_device *dev) |
d5b1a78a EA |
446 | { |
447 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
ca26d28b | 448 | struct vc4_exec_info *exec; |
d5b1a78a | 449 | |
ca26d28b VG |
450 | again: |
451 | exec = vc4_first_bin_job(vc4); | |
d5b1a78a EA |
452 | if (!exec) |
453 | return; | |
454 | ||
455 | vc4_flush_caches(dev); | |
456 | ||
ca26d28b VG |
457 | /* Either put the job in the binner if it uses the binner, or |
458 | * immediately move it to the to-be-rendered queue. | |
459 | */ | |
460 | if (exec->ct0ca != exec->ct0ea) { | |
d5b1a78a | 461 | submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); |
ca26d28b VG |
462 | } else { |
463 | vc4_move_job_to_render(dev, exec); | |
464 | goto again; | |
465 | } | |
466 | } | |
467 | ||
468 | void | |
469 | vc4_submit_next_render_job(struct drm_device *dev) | |
470 | { | |
471 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
472 | struct vc4_exec_info *exec = vc4_first_render_job(vc4); | |
473 | ||
474 | if (!exec) | |
475 | return; | |
476 | ||
d5b1a78a EA |
477 | submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); |
478 | } | |
479 | ||
ca26d28b VG |
480 | void |
481 | vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) | |
482 | { | |
483 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
484 | bool was_empty = list_empty(&vc4->render_job_list); | |
485 | ||
486 | list_move_tail(&exec->head, &vc4->render_job_list); | |
487 | if (was_empty) | |
488 | vc4_submit_next_render_job(dev); | |
489 | } | |
490 | ||
d5b1a78a EA |
491 | static void |
492 | vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) | |
493 | { | |
494 | struct vc4_bo *bo; | |
495 | unsigned i; | |
496 | ||
497 | for (i = 0; i < exec->bo_count; i++) { | |
498 | bo = to_vc4_bo(&exec->bo[i]->base); | |
499 | bo->seqno = seqno; | |
cdec4d36 EA |
500 | |
501 | reservation_object_add_shared_fence(bo->resv, exec->fence); | |
d5b1a78a EA |
502 | } |
503 | ||
504 | list_for_each_entry(bo, &exec->unref_list, unref_head) { | |
505 | bo->seqno = seqno; | |
506 | } | |
7edabee0 EA |
507 | |
508 | for (i = 0; i < exec->rcl_write_bo_count; i++) { | |
509 | bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); | |
510 | bo->write_seqno = seqno; | |
cdec4d36 EA |
511 | |
512 | reservation_object_add_excl_fence(bo->resv, exec->fence); | |
513 | } | |
514 | } | |
515 | ||
516 | static void | |
517 | vc4_unlock_bo_reservations(struct drm_device *dev, | |
518 | struct vc4_exec_info *exec, | |
519 | struct ww_acquire_ctx *acquire_ctx) | |
520 | { | |
521 | int i; | |
522 | ||
523 | for (i = 0; i < exec->bo_count; i++) { | |
524 | struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); | |
525 | ||
526 | ww_mutex_unlock(&bo->resv->lock); | |
7edabee0 | 527 | } |
cdec4d36 EA |
528 | |
529 | ww_acquire_fini(acquire_ctx); | |
530 | } | |
531 | ||
532 | /* Takes the reservation lock on all the BOs being referenced, so that | |
533 | * at queue submit time we can update the reservations. | |
534 | * | |
535 | * We don't lock the RCL the tile alloc/state BOs, or overflow memory | |
536 | * (all of which are on exec->unref_list). They're entirely private | |
537 | * to vc4, so we don't attach dma-buf fences to them. | |
538 | */ | |
539 | static int | |
540 | vc4_lock_bo_reservations(struct drm_device *dev, | |
541 | struct vc4_exec_info *exec, | |
542 | struct ww_acquire_ctx *acquire_ctx) | |
543 | { | |
544 | int contended_lock = -1; | |
545 | int i, ret; | |
546 | struct vc4_bo *bo; | |
547 | ||
548 | ww_acquire_init(acquire_ctx, &reservation_ww_class); | |
549 | ||
550 | retry: | |
551 | if (contended_lock != -1) { | |
552 | bo = to_vc4_bo(&exec->bo[contended_lock]->base); | |
553 | ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, | |
554 | acquire_ctx); | |
555 | if (ret) { | |
556 | ww_acquire_done(acquire_ctx); | |
557 | return ret; | |
558 | } | |
559 | } | |
560 | ||
561 | for (i = 0; i < exec->bo_count; i++) { | |
562 | if (i == contended_lock) | |
563 | continue; | |
564 | ||
565 | bo = to_vc4_bo(&exec->bo[i]->base); | |
566 | ||
567 | ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); | |
568 | if (ret) { | |
569 | int j; | |
570 | ||
571 | for (j = 0; j < i; j++) { | |
572 | bo = to_vc4_bo(&exec->bo[j]->base); | |
573 | ww_mutex_unlock(&bo->resv->lock); | |
574 | } | |
575 | ||
576 | if (contended_lock != -1 && contended_lock >= i) { | |
577 | bo = to_vc4_bo(&exec->bo[contended_lock]->base); | |
578 | ||
579 | ww_mutex_unlock(&bo->resv->lock); | |
580 | } | |
581 | ||
582 | if (ret == -EDEADLK) { | |
583 | contended_lock = i; | |
584 | goto retry; | |
585 | } | |
586 | ||
587 | ww_acquire_done(acquire_ctx); | |
588 | return ret; | |
589 | } | |
590 | } | |
591 | ||
592 | ww_acquire_done(acquire_ctx); | |
593 | ||
594 | /* Reserve space for our shared (read-only) fence references, | |
595 | * before we commit the CL to the hardware. | |
596 | */ | |
597 | for (i = 0; i < exec->bo_count; i++) { | |
598 | bo = to_vc4_bo(&exec->bo[i]->base); | |
599 | ||
600 | ret = reservation_object_reserve_shared(bo->resv); | |
601 | if (ret) { | |
602 | vc4_unlock_bo_reservations(dev, exec, acquire_ctx); | |
603 | return ret; | |
604 | } | |
605 | } | |
606 | ||
607 | return 0; | |
d5b1a78a EA |
608 | } |
609 | ||
610 | /* Queues a struct vc4_exec_info for execution. If no job is | |
611 | * currently executing, then submits it. | |
612 | * | |
613 | * Unlike most GPUs, our hardware only handles one command list at a | |
614 | * time. To queue multiple jobs at once, we'd need to edit the | |
615 | * previous command list to have a jump to the new one at the end, and | |
616 | * then bump the end address. That's a change for a later date, | |
617 | * though. | |
618 | */ | |
cdec4d36 EA |
619 | static int |
620 | vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, | |
621 | struct ww_acquire_ctx *acquire_ctx) | |
d5b1a78a EA |
622 | { |
623 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
624 | uint64_t seqno; | |
625 | unsigned long irqflags; | |
cdec4d36 EA |
626 | struct vc4_fence *fence; |
627 | ||
628 | fence = kzalloc(sizeof(*fence), GFP_KERNEL); | |
629 | if (!fence) | |
630 | return -ENOMEM; | |
631 | fence->dev = dev; | |
d5b1a78a EA |
632 | |
633 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
634 | ||
635 | seqno = ++vc4->emit_seqno; | |
636 | exec->seqno = seqno; | |
cdec4d36 EA |
637 | |
638 | dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock, | |
639 | vc4->dma_fence_context, exec->seqno); | |
640 | fence->seqno = exec->seqno; | |
641 | exec->fence = &fence->base; | |
642 | ||
d5b1a78a EA |
643 | vc4_update_bo_seqnos(exec, seqno); |
644 | ||
cdec4d36 EA |
645 | vc4_unlock_bo_reservations(dev, exec, acquire_ctx); |
646 | ||
ca26d28b | 647 | list_add_tail(&exec->head, &vc4->bin_job_list); |
d5b1a78a EA |
648 | |
649 | /* If no job was executing, kick ours off. Otherwise, it'll | |
ca26d28b | 650 | * get started when the previous job's flush done interrupt |
d5b1a78a EA |
651 | * occurs. |
652 | */ | |
ca26d28b VG |
653 | if (vc4_first_bin_job(vc4) == exec) { |
654 | vc4_submit_next_bin_job(dev); | |
d5b1a78a EA |
655 | vc4_queue_hangcheck(dev); |
656 | } | |
657 | ||
658 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
cdec4d36 EA |
659 | |
660 | return 0; | |
d5b1a78a EA |
661 | } |
662 | ||
663 | /** | |
72f793f1 EA |
664 | * vc4_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects |
665 | * referenced by the job. | |
666 | * @dev: DRM device | |
667 | * @file_priv: DRM file for this fd | |
668 | * @exec: V3D job being set up | |
669 | * | |
670 | * The command validator needs to reference BOs by their index within | |
671 | * the submitted job's BO list. This does the validation of the job's | |
672 | * BO list and reference counting for the lifetime of the job. | |
d5b1a78a EA |
673 | */ |
674 | static int | |
675 | vc4_cl_lookup_bos(struct drm_device *dev, | |
676 | struct drm_file *file_priv, | |
677 | struct vc4_exec_info *exec) | |
678 | { | |
679 | struct drm_vc4_submit_cl *args = exec->args; | |
680 | uint32_t *handles; | |
681 | int ret = 0; | |
682 | int i; | |
683 | ||
684 | exec->bo_count = args->bo_handle_count; | |
685 | ||
686 | if (!exec->bo_count) { | |
687 | /* See comment on bo_index for why we have to check | |
688 | * this. | |
689 | */ | |
fb95992a | 690 | DRM_DEBUG("Rendering requires BOs to validate\n"); |
d5b1a78a EA |
691 | return -EINVAL; |
692 | } | |
693 | ||
2098105e MH |
694 | exec->bo = kvmalloc_array(exec->bo_count, |
695 | sizeof(struct drm_gem_cma_object *), | |
696 | GFP_KERNEL | __GFP_ZERO); | |
d5b1a78a EA |
697 | if (!exec->bo) { |
698 | DRM_ERROR("Failed to allocate validated BO pointers\n"); | |
699 | return -ENOMEM; | |
700 | } | |
701 | ||
2098105e | 702 | handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL); |
d5b1a78a | 703 | if (!handles) { |
b2cdeb19 | 704 | ret = -ENOMEM; |
d5b1a78a EA |
705 | DRM_ERROR("Failed to allocate incoming GEM handles\n"); |
706 | goto fail; | |
707 | } | |
708 | ||
95d7cbcb | 709 | if (copy_from_user(handles, u64_to_user_ptr(args->bo_handles), |
b2cdeb19 DC |
710 | exec->bo_count * sizeof(uint32_t))) { |
711 | ret = -EFAULT; | |
d5b1a78a EA |
712 | DRM_ERROR("Failed to copy in GEM handles\n"); |
713 | goto fail; | |
714 | } | |
715 | ||
716 | spin_lock(&file_priv->table_lock); | |
717 | for (i = 0; i < exec->bo_count; i++) { | |
718 | struct drm_gem_object *bo = idr_find(&file_priv->object_idr, | |
719 | handles[i]); | |
720 | if (!bo) { | |
fb95992a | 721 | DRM_DEBUG("Failed to look up GEM BO %d: %d\n", |
d5b1a78a EA |
722 | i, handles[i]); |
723 | ret = -EINVAL; | |
b9f19259 | 724 | break; |
d5b1a78a | 725 | } |
b9f19259 | 726 | |
1d5494e9 | 727 | drm_gem_object_get(bo); |
d5b1a78a EA |
728 | exec->bo[i] = (struct drm_gem_cma_object *)bo; |
729 | } | |
730 | spin_unlock(&file_priv->table_lock); | |
731 | ||
b9f19259 BB |
732 | if (ret) |
733 | goto fail_put_bo; | |
734 | ||
735 | for (i = 0; i < exec->bo_count; i++) { | |
736 | ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base)); | |
737 | if (ret) | |
738 | goto fail_dec_usecnt; | |
739 | } | |
740 | ||
741 | kvfree(handles); | |
742 | return 0; | |
743 | ||
744 | fail_dec_usecnt: | |
745 | /* Decrease usecnt on acquired objects. | |
746 | * We cannot rely on vc4_complete_exec() to release resources here, | |
747 | * because vc4_complete_exec() has no information about which BO has | |
748 | * had its ->usecnt incremented. | |
749 | * To make things easier we just free everything explicitly and set | |
750 | * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release' | |
751 | * step. | |
752 | */ | |
753 | for (i-- ; i >= 0; i--) | |
754 | vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base)); | |
755 | ||
756 | fail_put_bo: | |
757 | /* Release any reference to acquired objects. */ | |
758 | for (i = 0; i < exec->bo_count && exec->bo[i]; i++) | |
759 | drm_gem_object_put_unlocked(&exec->bo[i]->base); | |
760 | ||
d5b1a78a | 761 | fail: |
2098105e | 762 | kvfree(handles); |
b9f19259 BB |
763 | kvfree(exec->bo); |
764 | exec->bo = NULL; | |
552416c1 | 765 | return ret; |
d5b1a78a EA |
766 | } |
767 | ||
768 | static int | |
769 | vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) | |
770 | { | |
771 | struct drm_vc4_submit_cl *args = exec->args; | |
772 | void *temp = NULL; | |
773 | void *bin; | |
774 | int ret = 0; | |
775 | uint32_t bin_offset = 0; | |
776 | uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, | |
777 | 16); | |
778 | uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; | |
779 | uint32_t exec_size = uniforms_offset + args->uniforms_size; | |
780 | uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * | |
781 | args->shader_rec_count); | |
782 | struct vc4_bo *bo; | |
783 | ||
0f2ff82e EA |
784 | if (shader_rec_offset < args->bin_cl_size || |
785 | uniforms_offset < shader_rec_offset || | |
d5b1a78a EA |
786 | exec_size < uniforms_offset || |
787 | args->shader_rec_count >= (UINT_MAX / | |
788 | sizeof(struct vc4_shader_state)) || | |
789 | temp_size < exec_size) { | |
fb95992a | 790 | DRM_DEBUG("overflow in exec arguments\n"); |
6b8ac638 | 791 | ret = -EINVAL; |
d5b1a78a EA |
792 | goto fail; |
793 | } | |
794 | ||
795 | /* Allocate space where we'll store the copied in user command lists | |
796 | * and shader records. | |
797 | * | |
798 | * We don't just copy directly into the BOs because we need to | |
799 | * read the contents back for validation, and I think the | |
800 | * bo->vaddr is uncached access. | |
801 | */ | |
2098105e | 802 | temp = kvmalloc_array(temp_size, 1, GFP_KERNEL); |
d5b1a78a EA |
803 | if (!temp) { |
804 | DRM_ERROR("Failed to allocate storage for copying " | |
805 | "in bin/render CLs.\n"); | |
806 | ret = -ENOMEM; | |
807 | goto fail; | |
808 | } | |
809 | bin = temp + bin_offset; | |
810 | exec->shader_rec_u = temp + shader_rec_offset; | |
811 | exec->uniforms_u = temp + uniforms_offset; | |
812 | exec->shader_state = temp + exec_size; | |
813 | exec->shader_state_size = args->shader_rec_count; | |
814 | ||
65c4777d | 815 | if (copy_from_user(bin, |
95d7cbcb | 816 | u64_to_user_ptr(args->bin_cl), |
65c4777d DC |
817 | args->bin_cl_size)) { |
818 | ret = -EFAULT; | |
d5b1a78a EA |
819 | goto fail; |
820 | } | |
821 | ||
65c4777d | 822 | if (copy_from_user(exec->shader_rec_u, |
95d7cbcb | 823 | u64_to_user_ptr(args->shader_rec), |
65c4777d DC |
824 | args->shader_rec_size)) { |
825 | ret = -EFAULT; | |
d5b1a78a EA |
826 | goto fail; |
827 | } | |
828 | ||
65c4777d | 829 | if (copy_from_user(exec->uniforms_u, |
95d7cbcb | 830 | u64_to_user_ptr(args->uniforms), |
65c4777d DC |
831 | args->uniforms_size)) { |
832 | ret = -EFAULT; | |
d5b1a78a EA |
833 | goto fail; |
834 | } | |
835 | ||
f3099462 | 836 | bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL); |
2c68f1fc | 837 | if (IS_ERR(bo)) { |
d5b1a78a | 838 | DRM_ERROR("Couldn't allocate BO for binning\n"); |
2c68f1fc | 839 | ret = PTR_ERR(bo); |
d5b1a78a EA |
840 | goto fail; |
841 | } | |
842 | exec->exec_bo = &bo->base; | |
843 | ||
844 | list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, | |
845 | &exec->unref_list); | |
846 | ||
847 | exec->ct0ca = exec->exec_bo->paddr + bin_offset; | |
848 | ||
849 | exec->bin_u = bin; | |
850 | ||
851 | exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; | |
852 | exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; | |
853 | exec->shader_rec_size = args->shader_rec_size; | |
854 | ||
855 | exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; | |
856 | exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; | |
857 | exec->uniforms_size = args->uniforms_size; | |
858 | ||
859 | ret = vc4_validate_bin_cl(dev, | |
860 | exec->exec_bo->vaddr + bin_offset, | |
861 | bin, | |
862 | exec); | |
863 | if (ret) | |
864 | goto fail; | |
865 | ||
866 | ret = vc4_validate_shader_recs(dev, exec); | |
7edabee0 EA |
867 | if (ret) |
868 | goto fail; | |
869 | ||
870 | /* Block waiting on any previous rendering into the CS's VBO, | |
871 | * IB, or textures, so that pixels are actually written by the | |
872 | * time we try to read them. | |
873 | */ | |
874 | ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); | |
d5b1a78a EA |
875 | |
876 | fail: | |
2098105e | 877 | kvfree(temp); |
d5b1a78a EA |
878 | return ret; |
879 | } | |
880 | ||
881 | static void | |
882 | vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) | |
883 | { | |
001bdb55 | 884 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
553c942f | 885 | unsigned long irqflags; |
d5b1a78a EA |
886 | unsigned i; |
887 | ||
cdec4d36 EA |
888 | /* If we got force-completed because of GPU reset rather than |
889 | * through our IRQ handler, signal the fence now. | |
890 | */ | |
babc8110 | 891 | if (exec->fence) { |
cdec4d36 | 892 | dma_fence_signal(exec->fence); |
babc8110 SS |
893 | dma_fence_put(exec->fence); |
894 | } | |
cdec4d36 | 895 | |
d5b1a78a | 896 | if (exec->bo) { |
b9f19259 BB |
897 | for (i = 0; i < exec->bo_count; i++) { |
898 | struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); | |
899 | ||
900 | vc4_bo_dec_usecnt(bo); | |
1d5494e9 | 901 | drm_gem_object_put_unlocked(&exec->bo[i]->base); |
b9f19259 | 902 | } |
2098105e | 903 | kvfree(exec->bo); |
d5b1a78a EA |
904 | } |
905 | ||
906 | while (!list_empty(&exec->unref_list)) { | |
907 | struct vc4_bo *bo = list_first_entry(&exec->unref_list, | |
908 | struct vc4_bo, unref_head); | |
909 | list_del(&bo->unref_head); | |
1d5494e9 | 910 | drm_gem_object_put_unlocked(&bo->base.base); |
d5b1a78a | 911 | } |
d5b1a78a | 912 | |
553c942f EA |
913 | /* Free up the allocation of any bin slots we used. */ |
914 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
915 | vc4->bin_alloc_used &= ~exec->bin_slots; | |
916 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
917 | ||
36cb6253 | 918 | mutex_lock(&vc4->power_lock); |
3a622346 EA |
919 | if (--vc4->power_refcount == 0) { |
920 | pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); | |
921 | pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); | |
922 | } | |
36cb6253 | 923 | mutex_unlock(&vc4->power_lock); |
001bdb55 | 924 | |
d5b1a78a EA |
925 | kfree(exec); |
926 | } | |
927 | ||
928 | void | |
929 | vc4_job_handle_completed(struct vc4_dev *vc4) | |
930 | { | |
931 | unsigned long irqflags; | |
b501bacc | 932 | struct vc4_seqno_cb *cb, *cb_temp; |
d5b1a78a EA |
933 | |
934 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
935 | while (!list_empty(&vc4->job_done_list)) { | |
936 | struct vc4_exec_info *exec = | |
937 | list_first_entry(&vc4->job_done_list, | |
938 | struct vc4_exec_info, head); | |
939 | list_del(&exec->head); | |
940 | ||
941 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
942 | vc4_complete_exec(vc4->dev, exec); | |
943 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
944 | } | |
b501bacc EA |
945 | |
946 | list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { | |
947 | if (cb->seqno <= vc4->finished_seqno) { | |
948 | list_del_init(&cb->work.entry); | |
949 | schedule_work(&cb->work); | |
950 | } | |
951 | } | |
952 | ||
953 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | |
954 | } | |
955 | ||
956 | static void vc4_seqno_cb_work(struct work_struct *work) | |
957 | { | |
958 | struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); | |
959 | ||
960 | cb->func(cb); | |
961 | } | |
962 | ||
963 | int vc4_queue_seqno_cb(struct drm_device *dev, | |
964 | struct vc4_seqno_cb *cb, uint64_t seqno, | |
965 | void (*func)(struct vc4_seqno_cb *cb)) | |
966 | { | |
967 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
968 | int ret = 0; | |
969 | unsigned long irqflags; | |
970 | ||
971 | cb->func = func; | |
972 | INIT_WORK(&cb->work, vc4_seqno_cb_work); | |
973 | ||
974 | spin_lock_irqsave(&vc4->job_lock, irqflags); | |
975 | if (seqno > vc4->finished_seqno) { | |
976 | cb->seqno = seqno; | |
977 | list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); | |
978 | } else { | |
979 | schedule_work(&cb->work); | |
980 | } | |
d5b1a78a | 981 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
b501bacc EA |
982 | |
983 | return ret; | |
d5b1a78a EA |
984 | } |
985 | ||
986 | /* Scheduled when any job has been completed, this walks the list of | |
987 | * jobs that had completed and unrefs their BOs and frees their exec | |
988 | * structs. | |
989 | */ | |
990 | static void | |
991 | vc4_job_done_work(struct work_struct *work) | |
992 | { | |
993 | struct vc4_dev *vc4 = | |
994 | container_of(work, struct vc4_dev, job_done_work); | |
995 | ||
996 | vc4_job_handle_completed(vc4); | |
997 | } | |
998 | ||
999 | static int | |
1000 | vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, | |
1001 | uint64_t seqno, | |
1002 | uint64_t *timeout_ns) | |
1003 | { | |
1004 | unsigned long start = jiffies; | |
1005 | int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); | |
1006 | ||
1007 | if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { | |
1008 | uint64_t delta = jiffies_to_nsecs(jiffies - start); | |
1009 | ||
1010 | if (*timeout_ns >= delta) | |
1011 | *timeout_ns -= delta; | |
1012 | } | |
1013 | ||
1014 | return ret; | |
1015 | } | |
1016 | ||
1017 | int | |
1018 | vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, | |
1019 | struct drm_file *file_priv) | |
1020 | { | |
1021 | struct drm_vc4_wait_seqno *args = data; | |
1022 | ||
1023 | return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, | |
1024 | &args->timeout_ns); | |
1025 | } | |
1026 | ||
1027 | int | |
1028 | vc4_wait_bo_ioctl(struct drm_device *dev, void *data, | |
1029 | struct drm_file *file_priv) | |
1030 | { | |
1031 | int ret; | |
1032 | struct drm_vc4_wait_bo *args = data; | |
1033 | struct drm_gem_object *gem_obj; | |
1034 | struct vc4_bo *bo; | |
1035 | ||
e0015236 EA |
1036 | if (args->pad != 0) |
1037 | return -EINVAL; | |
1038 | ||
a8ad0bd8 | 1039 | gem_obj = drm_gem_object_lookup(file_priv, args->handle); |
d5b1a78a | 1040 | if (!gem_obj) { |
fb95992a | 1041 | DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); |
d5b1a78a EA |
1042 | return -EINVAL; |
1043 | } | |
1044 | bo = to_vc4_bo(gem_obj); | |
1045 | ||
1046 | ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, | |
1047 | &args->timeout_ns); | |
1048 | ||
1d5494e9 | 1049 | drm_gem_object_put_unlocked(gem_obj); |
d5b1a78a EA |
1050 | return ret; |
1051 | } | |
1052 | ||
1053 | /** | |
72f793f1 EA |
1054 | * vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4. |
1055 | * @dev: DRM device | |
1056 | * @data: ioctl argument | |
1057 | * @file_priv: DRM file for this fd | |
d5b1a78a | 1058 | * |
72f793f1 EA |
1059 | * This is the main entrypoint for userspace to submit a 3D frame to |
1060 | * the GPU. Userspace provides the binner command list (if | |
1061 | * applicable), and the kernel sets up the render command list to draw | |
1062 | * to the framebuffer described in the ioctl, using the command lists | |
1063 | * that the 3D engine's binner will produce. | |
d5b1a78a EA |
1064 | */ |
1065 | int | |
1066 | vc4_submit_cl_ioctl(struct drm_device *dev, void *data, | |
1067 | struct drm_file *file_priv) | |
1068 | { | |
1069 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
1070 | struct drm_vc4_submit_cl *args = data; | |
1071 | struct vc4_exec_info *exec; | |
cdec4d36 | 1072 | struct ww_acquire_ctx acquire_ctx; |
36cb6253 | 1073 | int ret = 0; |
d5b1a78a | 1074 | |
3be8eddd EA |
1075 | if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | |
1076 | VC4_SUBMIT_CL_FIXED_RCL_ORDER | | |
1077 | VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | | |
1078 | VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) { | |
fb95992a | 1079 | DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags); |
d5b1a78a EA |
1080 | return -EINVAL; |
1081 | } | |
1082 | ||
1083 | exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); | |
1084 | if (!exec) { | |
1085 | DRM_ERROR("malloc failure on exec struct\n"); | |
1086 | return -ENOMEM; | |
1087 | } | |
1088 | ||
36cb6253 | 1089 | mutex_lock(&vc4->power_lock); |
925d05e1 | 1090 | if (vc4->power_refcount++ == 0) { |
36cb6253 | 1091 | ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); |
925d05e1 EA |
1092 | if (ret < 0) { |
1093 | mutex_unlock(&vc4->power_lock); | |
1094 | vc4->power_refcount--; | |
1095 | kfree(exec); | |
1096 | return ret; | |
1097 | } | |
001bdb55 | 1098 | } |
925d05e1 | 1099 | mutex_unlock(&vc4->power_lock); |
001bdb55 | 1100 | |
d5b1a78a EA |
1101 | exec->args = args; |
1102 | INIT_LIST_HEAD(&exec->unref_list); | |
1103 | ||
1104 | ret = vc4_cl_lookup_bos(dev, file_priv, exec); | |
1105 | if (ret) | |
1106 | goto fail; | |
1107 | ||
1108 | if (exec->args->bin_cl_size != 0) { | |
1109 | ret = vc4_get_bcl(dev, exec); | |
1110 | if (ret) | |
1111 | goto fail; | |
1112 | } else { | |
1113 | exec->ct0ca = 0; | |
1114 | exec->ct0ea = 0; | |
1115 | } | |
1116 | ||
1117 | ret = vc4_get_rcl(dev, exec); | |
1118 | if (ret) | |
1119 | goto fail; | |
1120 | ||
cdec4d36 EA |
1121 | ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); |
1122 | if (ret) | |
1123 | goto fail; | |
1124 | ||
d5b1a78a EA |
1125 | /* Clear this out of the struct we'll be putting in the queue, |
1126 | * since it's part of our stack. | |
1127 | */ | |
1128 | exec->args = NULL; | |
1129 | ||
cdec4d36 EA |
1130 | ret = vc4_queue_submit(dev, exec, &acquire_ctx); |
1131 | if (ret) | |
1132 | goto fail; | |
d5b1a78a EA |
1133 | |
1134 | /* Return the seqno for our job. */ | |
1135 | args->seqno = vc4->emit_seqno; | |
1136 | ||
1137 | return 0; | |
1138 | ||
1139 | fail: | |
1140 | vc4_complete_exec(vc4->dev, exec); | |
1141 | ||
1142 | return ret; | |
1143 | } | |
1144 | ||
1145 | void | |
1146 | vc4_gem_init(struct drm_device *dev) | |
1147 | { | |
1148 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
1149 | ||
cdec4d36 EA |
1150 | vc4->dma_fence_context = dma_fence_context_alloc(1); |
1151 | ||
ca26d28b VG |
1152 | INIT_LIST_HEAD(&vc4->bin_job_list); |
1153 | INIT_LIST_HEAD(&vc4->render_job_list); | |
d5b1a78a | 1154 | INIT_LIST_HEAD(&vc4->job_done_list); |
b501bacc | 1155 | INIT_LIST_HEAD(&vc4->seqno_cb_list); |
d5b1a78a EA |
1156 | spin_lock_init(&vc4->job_lock); |
1157 | ||
1158 | INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); | |
0078730f | 1159 | timer_setup(&vc4->hangcheck.timer, vc4_hangcheck_elapsed, 0); |
d5b1a78a EA |
1160 | |
1161 | INIT_WORK(&vc4->job_done_work, vc4_job_done_work); | |
36cb6253 EA |
1162 | |
1163 | mutex_init(&vc4->power_lock); | |
b9f19259 BB |
1164 | |
1165 | INIT_LIST_HEAD(&vc4->purgeable.list); | |
1166 | mutex_init(&vc4->purgeable.lock); | |
d5b1a78a EA |
1167 | } |
1168 | ||
1169 | void | |
1170 | vc4_gem_destroy(struct drm_device *dev) | |
1171 | { | |
1172 | struct vc4_dev *vc4 = to_vc4_dev(dev); | |
1173 | ||
1174 | /* Waiting for exec to finish would need to be done before | |
1175 | * unregistering V3D. | |
1176 | */ | |
1177 | WARN_ON(vc4->emit_seqno != vc4->finished_seqno); | |
1178 | ||
1179 | /* V3D should already have disabled its interrupt and cleared | |
1180 | * the overflow allocation registers. Now free the object. | |
1181 | */ | |
553c942f EA |
1182 | if (vc4->bin_bo) { |
1183 | drm_gem_object_put_unlocked(&vc4->bin_bo->base.base); | |
1184 | vc4->bin_bo = NULL; | |
d5b1a78a EA |
1185 | } |
1186 | ||
21461365 EA |
1187 | if (vc4->hang_state) |
1188 | vc4_free_hang_state(dev, vc4->hang_state); | |
d5b1a78a | 1189 | } |
b9f19259 BB |
1190 | |
1191 | int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, | |
1192 | struct drm_file *file_priv) | |
1193 | { | |
1194 | struct drm_vc4_gem_madvise *args = data; | |
1195 | struct drm_gem_object *gem_obj; | |
1196 | struct vc4_bo *bo; | |
1197 | int ret; | |
1198 | ||
1199 | switch (args->madv) { | |
1200 | case VC4_MADV_DONTNEED: | |
1201 | case VC4_MADV_WILLNEED: | |
1202 | break; | |
1203 | default: | |
1204 | return -EINVAL; | |
1205 | } | |
1206 | ||
1207 | if (args->pad != 0) | |
1208 | return -EINVAL; | |
1209 | ||
1210 | gem_obj = drm_gem_object_lookup(file_priv, args->handle); | |
1211 | if (!gem_obj) { | |
1212 | DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); | |
1213 | return -ENOENT; | |
1214 | } | |
1215 | ||
1216 | bo = to_vc4_bo(gem_obj); | |
1217 | ||
1218 | /* Only BOs exposed to userspace can be purged. */ | |
1219 | if (bo->madv == __VC4_MADV_NOTSUPP) { | |
1220 | DRM_DEBUG("madvise not supported on this BO\n"); | |
1221 | ret = -EINVAL; | |
1222 | goto out_put_gem; | |
1223 | } | |
1224 | ||
1225 | /* Not sure it's safe to purge imported BOs. Let's just assume it's | |
1226 | * not until proven otherwise. | |
1227 | */ | |
1228 | if (gem_obj->import_attach) { | |
1229 | DRM_DEBUG("madvise not supported on imported BOs\n"); | |
1230 | ret = -EINVAL; | |
1231 | goto out_put_gem; | |
1232 | } | |
1233 | ||
1234 | mutex_lock(&bo->madv_lock); | |
1235 | ||
1236 | if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED && | |
1237 | !refcount_read(&bo->usecnt)) { | |
1238 | /* If the BO is about to be marked as purgeable, is not used | |
1239 | * and is not already purgeable or purged, add it to the | |
1240 | * purgeable list. | |
1241 | */ | |
1242 | vc4_bo_add_to_purgeable_pool(bo); | |
1243 | } else if (args->madv == VC4_MADV_WILLNEED && | |
1244 | bo->madv == VC4_MADV_DONTNEED && | |
1245 | !refcount_read(&bo->usecnt)) { | |
1246 | /* The BO has not been purged yet, just remove it from | |
1247 | * the purgeable list. | |
1248 | */ | |
1249 | vc4_bo_remove_from_purgeable_pool(bo); | |
1250 | } | |
1251 | ||
1252 | /* Save the purged state. */ | |
1253 | args->retained = bo->madv != __VC4_MADV_PURGED; | |
1254 | ||
1255 | /* Update internal madv state only if the bo was not purged. */ | |
1256 | if (bo->madv != __VC4_MADV_PURGED) | |
1257 | bo->madv = args->madv; | |
1258 | ||
1259 | mutex_unlock(&bo->madv_lock); | |
1260 | ||
1261 | ret = 0; | |
1262 | ||
1263 | out_put_gem: | |
1264 | drm_gem_object_put_unlocked(gem_obj); | |
1265 | ||
1266 | return ret; | |
1267 | } |