2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include <linux/module.h>
25 #include <linux/platform_device.h>
26 #include <linux/pm_runtime.h>
27 #include <linux/device.h>
29 #include <linux/sched/signal.h>
31 #include "uapi/drm/vc4_drm.h"
34 #include "vc4_trace.h"
37 vc4_queue_hangcheck(struct drm_device
*dev
)
39 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
41 mod_timer(&vc4
->hangcheck
.timer
,
42 round_jiffies_up(jiffies
+ msecs_to_jiffies(100)));
45 struct vc4_hang_state
{
46 struct drm_vc4_get_hang_state user_state
;
49 struct drm_gem_object
**bo
;
53 vc4_free_hang_state(struct drm_device
*dev
, struct vc4_hang_state
*state
)
57 for (i
= 0; i
< state
->user_state
.bo_count
; i
++)
58 drm_gem_object_put_unlocked(state
->bo
[i
]);
64 vc4_get_hang_state_ioctl(struct drm_device
*dev
, void *data
,
65 struct drm_file
*file_priv
)
67 struct drm_vc4_get_hang_state
*get_state
= data
;
68 struct drm_vc4_get_hang_state_bo
*bo_state
;
69 struct vc4_hang_state
*kernel_state
;
70 struct drm_vc4_get_hang_state
*state
;
71 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
72 unsigned long irqflags
;
76 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
77 kernel_state
= vc4
->hang_state
;
79 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
82 state
= &kernel_state
->user_state
;
84 /* If the user's array isn't big enough, just return the
85 * required array size.
87 if (get_state
->bo_count
< state
->bo_count
) {
88 get_state
->bo_count
= state
->bo_count
;
89 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
93 vc4
->hang_state
= NULL
;
94 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
96 /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
97 state
->bo
= get_state
->bo
;
98 memcpy(get_state
, state
, sizeof(*state
));
100 bo_state
= kcalloc(state
->bo_count
, sizeof(*bo_state
), GFP_KERNEL
);
106 for (i
= 0; i
< state
->bo_count
; i
++) {
107 struct vc4_bo
*vc4_bo
= to_vc4_bo(kernel_state
->bo
[i
]);
110 ret
= drm_gem_handle_create(file_priv
, kernel_state
->bo
[i
],
115 goto err_delete_handle
;
117 bo_state
[i
].handle
= handle
;
118 bo_state
[i
].paddr
= vc4_bo
->base
.paddr
;
119 bo_state
[i
].size
= vc4_bo
->base
.base
.size
;
122 if (copy_to_user(u64_to_user_ptr(get_state
->bo
),
124 state
->bo_count
* sizeof(*bo_state
)))
129 for (i
= 0; i
< state
->bo_count
; i
++)
130 drm_gem_handle_delete(file_priv
, bo_state
[i
].handle
);
134 vc4_free_hang_state(dev
, kernel_state
);
141 vc4_save_hang_state(struct drm_device
*dev
)
143 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
144 struct drm_vc4_get_hang_state
*state
;
145 struct vc4_hang_state
*kernel_state
;
146 struct vc4_exec_info
*exec
[2];
148 unsigned long irqflags
;
149 unsigned int i
, j
, unref_list_count
, prev_idx
;
151 kernel_state
= kcalloc(1, sizeof(*kernel_state
), GFP_KERNEL
);
155 state
= &kernel_state
->user_state
;
157 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
158 exec
[0] = vc4_first_bin_job(vc4
);
159 exec
[1] = vc4_first_render_job(vc4
);
160 if (!exec
[0] && !exec
[1]) {
161 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
165 /* Get the bos from both binner and renderer into hang state. */
167 for (i
= 0; i
< 2; i
++) {
171 unref_list_count
= 0;
172 list_for_each_entry(bo
, &exec
[i
]->unref_list
, unref_head
)
174 state
->bo_count
+= exec
[i
]->bo_count
+ unref_list_count
;
177 kernel_state
->bo
= kcalloc(state
->bo_count
,
178 sizeof(*kernel_state
->bo
), GFP_ATOMIC
);
180 if (!kernel_state
->bo
) {
181 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
186 for (i
= 0; i
< 2; i
++) {
190 for (j
= 0; j
< exec
[i
]->bo_count
; j
++) {
191 bo
= to_vc4_bo(&exec
[i
]->bo
[j
]->base
);
193 /* Retain BOs just in case they were marked purgeable.
194 * This prevents the BO from being purged before
195 * someone had a chance to dump the hang state.
197 WARN_ON(!refcount_read(&bo
->usecnt
));
198 refcount_inc(&bo
->usecnt
);
199 drm_gem_object_get(&exec
[i
]->bo
[j
]->base
);
200 kernel_state
->bo
[j
+ prev_idx
] = &exec
[i
]->bo
[j
]->base
;
203 list_for_each_entry(bo
, &exec
[i
]->unref_list
, unref_head
) {
204 /* No need to retain BOs coming from the ->unref_list
205 * because they are naturally unpurgeable.
207 drm_gem_object_get(&bo
->base
.base
);
208 kernel_state
->bo
[j
+ prev_idx
] = &bo
->base
.base
;
215 state
->start_bin
= exec
[0]->ct0ca
;
217 state
->start_render
= exec
[1]->ct1ca
;
219 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
221 state
->ct0ca
= V3D_READ(V3D_CTNCA(0));
222 state
->ct0ea
= V3D_READ(V3D_CTNEA(0));
224 state
->ct1ca
= V3D_READ(V3D_CTNCA(1));
225 state
->ct1ea
= V3D_READ(V3D_CTNEA(1));
227 state
->ct0cs
= V3D_READ(V3D_CTNCS(0));
228 state
->ct1cs
= V3D_READ(V3D_CTNCS(1));
230 state
->ct0ra0
= V3D_READ(V3D_CT00RA0
);
231 state
->ct1ra0
= V3D_READ(V3D_CT01RA0
);
233 state
->bpca
= V3D_READ(V3D_BPCA
);
234 state
->bpcs
= V3D_READ(V3D_BPCS
);
235 state
->bpoa
= V3D_READ(V3D_BPOA
);
236 state
->bpos
= V3D_READ(V3D_BPOS
);
238 state
->vpmbase
= V3D_READ(V3D_VPMBASE
);
240 state
->dbge
= V3D_READ(V3D_DBGE
);
241 state
->fdbgo
= V3D_READ(V3D_FDBGO
);
242 state
->fdbgb
= V3D_READ(V3D_FDBGB
);
243 state
->fdbgr
= V3D_READ(V3D_FDBGR
);
244 state
->fdbgs
= V3D_READ(V3D_FDBGS
);
245 state
->errstat
= V3D_READ(V3D_ERRSTAT
);
247 /* We need to turn purgeable BOs into unpurgeable ones so that
248 * userspace has a chance to dump the hang state before the kernel
249 * decides to purge those BOs.
250 * Note that BO consistency at dump time cannot be guaranteed. For
251 * example, if the owner of these BOs decides to re-use them or mark
252 * them purgeable again there's nothing we can do to prevent it.
254 for (i
= 0; i
< kernel_state
->user_state
.bo_count
; i
++) {
255 struct vc4_bo
*bo
= to_vc4_bo(kernel_state
->bo
[i
]);
257 if (bo
->madv
== __VC4_MADV_NOTSUPP
)
260 mutex_lock(&bo
->madv_lock
);
261 if (!WARN_ON(bo
->madv
== __VC4_MADV_PURGED
))
262 bo
->madv
= VC4_MADV_WILLNEED
;
263 refcount_dec(&bo
->usecnt
);
264 mutex_unlock(&bo
->madv_lock
);
267 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
268 if (vc4
->hang_state
) {
269 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
270 vc4_free_hang_state(dev
, kernel_state
);
272 vc4
->hang_state
= kernel_state
;
273 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
278 vc4_reset(struct drm_device
*dev
)
280 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
282 DRM_INFO("Resetting GPU.\n");
284 mutex_lock(&vc4
->power_lock
);
285 if (vc4
->power_refcount
) {
286 /* Power the device off and back on the by dropping the
287 * reference on runtime PM.
289 pm_runtime_put_sync_suspend(&vc4
->v3d
->pdev
->dev
);
290 pm_runtime_get_sync(&vc4
->v3d
->pdev
->dev
);
292 mutex_unlock(&vc4
->power_lock
);
296 /* Rearm the hangcheck -- another job might have been waiting
297 * for our hung one to get kicked off, and vc4_irq_reset()
298 * would have started it.
300 vc4_queue_hangcheck(dev
);
304 vc4_reset_work(struct work_struct
*work
)
306 struct vc4_dev
*vc4
=
307 container_of(work
, struct vc4_dev
, hangcheck
.reset_work
);
309 vc4_save_hang_state(vc4
->dev
);
315 vc4_hangcheck_elapsed(struct timer_list
*t
)
317 struct vc4_dev
*vc4
= from_timer(vc4
, t
, hangcheck
.timer
);
318 struct drm_device
*dev
= vc4
->dev
;
319 uint32_t ct0ca
, ct1ca
;
320 unsigned long irqflags
;
321 struct vc4_exec_info
*bin_exec
, *render_exec
;
323 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
325 bin_exec
= vc4_first_bin_job(vc4
);
326 render_exec
= vc4_first_render_job(vc4
);
328 /* If idle, we can stop watching for hangs. */
329 if (!bin_exec
&& !render_exec
) {
330 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
334 ct0ca
= V3D_READ(V3D_CTNCA(0));
335 ct1ca
= V3D_READ(V3D_CTNCA(1));
337 /* If we've made any progress in execution, rearm the timer
340 if ((bin_exec
&& ct0ca
!= bin_exec
->last_ct0ca
) ||
341 (render_exec
&& ct1ca
!= render_exec
->last_ct1ca
)) {
343 bin_exec
->last_ct0ca
= ct0ca
;
345 render_exec
->last_ct1ca
= ct1ca
;
346 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
347 vc4_queue_hangcheck(dev
);
351 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
353 /* We've gone too long with no progress, reset. This has to
354 * be done from a work struct, since resetting can sleep and
355 * this timer hook isn't allowed to.
357 schedule_work(&vc4
->hangcheck
.reset_work
);
361 submit_cl(struct drm_device
*dev
, uint32_t thread
, uint32_t start
, uint32_t end
)
363 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
365 /* Set the current and end address of the control list.
366 * Writing the end register is what starts the job.
368 V3D_WRITE(V3D_CTNCA(thread
), start
);
369 V3D_WRITE(V3D_CTNEA(thread
), end
);
373 vc4_wait_for_seqno(struct drm_device
*dev
, uint64_t seqno
, uint64_t timeout_ns
,
376 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
378 unsigned long timeout_expire
;
381 if (vc4
->finished_seqno
>= seqno
)
387 timeout_expire
= jiffies
+ nsecs_to_jiffies(timeout_ns
);
389 trace_vc4_wait_for_seqno_begin(dev
, seqno
, timeout_ns
);
391 prepare_to_wait(&vc4
->job_wait_queue
, &wait
,
392 interruptible
? TASK_INTERRUPTIBLE
:
393 TASK_UNINTERRUPTIBLE
);
395 if (interruptible
&& signal_pending(current
)) {
400 if (vc4
->finished_seqno
>= seqno
)
403 if (timeout_ns
!= ~0ull) {
404 if (time_after_eq(jiffies
, timeout_expire
)) {
408 schedule_timeout(timeout_expire
- jiffies
);
414 finish_wait(&vc4
->job_wait_queue
, &wait
);
415 trace_vc4_wait_for_seqno_end(dev
, seqno
);
421 vc4_flush_caches(struct drm_device
*dev
)
423 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
425 /* Flush the GPU L2 caches. These caches sit on top of system
426 * L3 (the 128kb or so shared with the CPU), and are
427 * non-allocating in the L3.
429 V3D_WRITE(V3D_L2CACTL
,
432 V3D_WRITE(V3D_SLCACTL
,
433 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC
) |
434 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC
) |
435 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC
) |
436 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC
));
439 /* Sets the registers for the next job to be actually be executed in
442 * The job_lock should be held during this.
445 vc4_submit_next_bin_job(struct drm_device
*dev
)
447 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
448 struct vc4_exec_info
*exec
;
451 exec
= vc4_first_bin_job(vc4
);
455 vc4_flush_caches(dev
);
457 /* Either put the job in the binner if it uses the binner, or
458 * immediately move it to the to-be-rendered queue.
460 if (exec
->ct0ca
!= exec
->ct0ea
) {
461 submit_cl(dev
, 0, exec
->ct0ca
, exec
->ct0ea
);
463 vc4_move_job_to_render(dev
, exec
);
469 vc4_submit_next_render_job(struct drm_device
*dev
)
471 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
472 struct vc4_exec_info
*exec
= vc4_first_render_job(vc4
);
477 submit_cl(dev
, 1, exec
->ct1ca
, exec
->ct1ea
);
481 vc4_move_job_to_render(struct drm_device
*dev
, struct vc4_exec_info
*exec
)
483 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
484 bool was_empty
= list_empty(&vc4
->render_job_list
);
486 list_move_tail(&exec
->head
, &vc4
->render_job_list
);
488 vc4_submit_next_render_job(dev
);
492 vc4_update_bo_seqnos(struct vc4_exec_info
*exec
, uint64_t seqno
)
497 for (i
= 0; i
< exec
->bo_count
; i
++) {
498 bo
= to_vc4_bo(&exec
->bo
[i
]->base
);
501 reservation_object_add_shared_fence(bo
->resv
, exec
->fence
);
504 list_for_each_entry(bo
, &exec
->unref_list
, unref_head
) {
508 for (i
= 0; i
< exec
->rcl_write_bo_count
; i
++) {
509 bo
= to_vc4_bo(&exec
->rcl_write_bo
[i
]->base
);
510 bo
->write_seqno
= seqno
;
512 reservation_object_add_excl_fence(bo
->resv
, exec
->fence
);
517 vc4_unlock_bo_reservations(struct drm_device
*dev
,
518 struct vc4_exec_info
*exec
,
519 struct ww_acquire_ctx
*acquire_ctx
)
523 for (i
= 0; i
< exec
->bo_count
; i
++) {
524 struct vc4_bo
*bo
= to_vc4_bo(&exec
->bo
[i
]->base
);
526 ww_mutex_unlock(&bo
->resv
->lock
);
529 ww_acquire_fini(acquire_ctx
);
532 /* Takes the reservation lock on all the BOs being referenced, so that
533 * at queue submit time we can update the reservations.
535 * We don't lock the RCL the tile alloc/state BOs, or overflow memory
536 * (all of which are on exec->unref_list). They're entirely private
537 * to vc4, so we don't attach dma-buf fences to them.
540 vc4_lock_bo_reservations(struct drm_device
*dev
,
541 struct vc4_exec_info
*exec
,
542 struct ww_acquire_ctx
*acquire_ctx
)
544 int contended_lock
= -1;
548 ww_acquire_init(acquire_ctx
, &reservation_ww_class
);
551 if (contended_lock
!= -1) {
552 bo
= to_vc4_bo(&exec
->bo
[contended_lock
]->base
);
553 ret
= ww_mutex_lock_slow_interruptible(&bo
->resv
->lock
,
556 ww_acquire_done(acquire_ctx
);
561 for (i
= 0; i
< exec
->bo_count
; i
++) {
562 if (i
== contended_lock
)
565 bo
= to_vc4_bo(&exec
->bo
[i
]->base
);
567 ret
= ww_mutex_lock_interruptible(&bo
->resv
->lock
, acquire_ctx
);
571 for (j
= 0; j
< i
; j
++) {
572 bo
= to_vc4_bo(&exec
->bo
[j
]->base
);
573 ww_mutex_unlock(&bo
->resv
->lock
);
576 if (contended_lock
!= -1 && contended_lock
>= i
) {
577 bo
= to_vc4_bo(&exec
->bo
[contended_lock
]->base
);
579 ww_mutex_unlock(&bo
->resv
->lock
);
582 if (ret
== -EDEADLK
) {
587 ww_acquire_done(acquire_ctx
);
592 ww_acquire_done(acquire_ctx
);
594 /* Reserve space for our shared (read-only) fence references,
595 * before we commit the CL to the hardware.
597 for (i
= 0; i
< exec
->bo_count
; i
++) {
598 bo
= to_vc4_bo(&exec
->bo
[i
]->base
);
600 ret
= reservation_object_reserve_shared(bo
->resv
);
602 vc4_unlock_bo_reservations(dev
, exec
, acquire_ctx
);
610 /* Queues a struct vc4_exec_info for execution. If no job is
611 * currently executing, then submits it.
613 * Unlike most GPUs, our hardware only handles one command list at a
614 * time. To queue multiple jobs at once, we'd need to edit the
615 * previous command list to have a jump to the new one at the end, and
616 * then bump the end address. That's a change for a later date,
620 vc4_queue_submit(struct drm_device
*dev
, struct vc4_exec_info
*exec
,
621 struct ww_acquire_ctx
*acquire_ctx
)
623 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
625 unsigned long irqflags
;
626 struct vc4_fence
*fence
;
628 fence
= kzalloc(sizeof(*fence
), GFP_KERNEL
);
633 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
635 seqno
= ++vc4
->emit_seqno
;
638 dma_fence_init(&fence
->base
, &vc4_fence_ops
, &vc4
->job_lock
,
639 vc4
->dma_fence_context
, exec
->seqno
);
640 fence
->seqno
= exec
->seqno
;
641 exec
->fence
= &fence
->base
;
643 vc4_update_bo_seqnos(exec
, seqno
);
645 vc4_unlock_bo_reservations(dev
, exec
, acquire_ctx
);
647 list_add_tail(&exec
->head
, &vc4
->bin_job_list
);
649 /* If no job was executing, kick ours off. Otherwise, it'll
650 * get started when the previous job's flush done interrupt
653 if (vc4_first_bin_job(vc4
) == exec
) {
654 vc4_submit_next_bin_job(dev
);
655 vc4_queue_hangcheck(dev
);
658 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
664 * vc4_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
665 * referenced by the job.
667 * @file_priv: DRM file for this fd
668 * @exec: V3D job being set up
670 * The command validator needs to reference BOs by their index within
671 * the submitted job's BO list. This does the validation of the job's
672 * BO list and reference counting for the lifetime of the job.
675 vc4_cl_lookup_bos(struct drm_device
*dev
,
676 struct drm_file
*file_priv
,
677 struct vc4_exec_info
*exec
)
679 struct drm_vc4_submit_cl
*args
= exec
->args
;
684 exec
->bo_count
= args
->bo_handle_count
;
686 if (!exec
->bo_count
) {
687 /* See comment on bo_index for why we have to check
690 DRM_DEBUG("Rendering requires BOs to validate\n");
694 exec
->bo
= kvmalloc_array(exec
->bo_count
,
695 sizeof(struct drm_gem_cma_object
*),
696 GFP_KERNEL
| __GFP_ZERO
);
698 DRM_ERROR("Failed to allocate validated BO pointers\n");
702 handles
= kvmalloc_array(exec
->bo_count
, sizeof(uint32_t), GFP_KERNEL
);
705 DRM_ERROR("Failed to allocate incoming GEM handles\n");
709 if (copy_from_user(handles
, u64_to_user_ptr(args
->bo_handles
),
710 exec
->bo_count
* sizeof(uint32_t))) {
712 DRM_ERROR("Failed to copy in GEM handles\n");
716 spin_lock(&file_priv
->table_lock
);
717 for (i
= 0; i
< exec
->bo_count
; i
++) {
718 struct drm_gem_object
*bo
= idr_find(&file_priv
->object_idr
,
721 DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
727 drm_gem_object_get(bo
);
728 exec
->bo
[i
] = (struct drm_gem_cma_object
*)bo
;
730 spin_unlock(&file_priv
->table_lock
);
735 for (i
= 0; i
< exec
->bo_count
; i
++) {
736 ret
= vc4_bo_inc_usecnt(to_vc4_bo(&exec
->bo
[i
]->base
));
738 goto fail_dec_usecnt
;
745 /* Decrease usecnt on acquired objects.
746 * We cannot rely on vc4_complete_exec() to release resources here,
747 * because vc4_complete_exec() has no information about which BO has
748 * had its ->usecnt incremented.
749 * To make things easier we just free everything explicitly and set
750 * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release'
753 for (i
-- ; i
>= 0; i
--)
754 vc4_bo_dec_usecnt(to_vc4_bo(&exec
->bo
[i
]->base
));
757 /* Release any reference to acquired objects. */
758 for (i
= 0; i
< exec
->bo_count
&& exec
->bo
[i
]; i
++)
759 drm_gem_object_put_unlocked(&exec
->bo
[i
]->base
);
769 vc4_get_bcl(struct drm_device
*dev
, struct vc4_exec_info
*exec
)
771 struct drm_vc4_submit_cl
*args
= exec
->args
;
775 uint32_t bin_offset
= 0;
776 uint32_t shader_rec_offset
= roundup(bin_offset
+ args
->bin_cl_size
,
778 uint32_t uniforms_offset
= shader_rec_offset
+ args
->shader_rec_size
;
779 uint32_t exec_size
= uniforms_offset
+ args
->uniforms_size
;
780 uint32_t temp_size
= exec_size
+ (sizeof(struct vc4_shader_state
) *
781 args
->shader_rec_count
);
784 if (shader_rec_offset
< args
->bin_cl_size
||
785 uniforms_offset
< shader_rec_offset
||
786 exec_size
< uniforms_offset
||
787 args
->shader_rec_count
>= (UINT_MAX
/
788 sizeof(struct vc4_shader_state
)) ||
789 temp_size
< exec_size
) {
790 DRM_DEBUG("overflow in exec arguments\n");
795 /* Allocate space where we'll store the copied in user command lists
796 * and shader records.
798 * We don't just copy directly into the BOs because we need to
799 * read the contents back for validation, and I think the
800 * bo->vaddr is uncached access.
802 temp
= kvmalloc_array(temp_size
, 1, GFP_KERNEL
);
804 DRM_ERROR("Failed to allocate storage for copying "
805 "in bin/render CLs.\n");
809 bin
= temp
+ bin_offset
;
810 exec
->shader_rec_u
= temp
+ shader_rec_offset
;
811 exec
->uniforms_u
= temp
+ uniforms_offset
;
812 exec
->shader_state
= temp
+ exec_size
;
813 exec
->shader_state_size
= args
->shader_rec_count
;
815 if (copy_from_user(bin
,
816 u64_to_user_ptr(args
->bin_cl
),
817 args
->bin_cl_size
)) {
822 if (copy_from_user(exec
->shader_rec_u
,
823 u64_to_user_ptr(args
->shader_rec
),
824 args
->shader_rec_size
)) {
829 if (copy_from_user(exec
->uniforms_u
,
830 u64_to_user_ptr(args
->uniforms
),
831 args
->uniforms_size
)) {
836 bo
= vc4_bo_create(dev
, exec_size
, true, VC4_BO_TYPE_BCL
);
838 DRM_ERROR("Couldn't allocate BO for binning\n");
842 exec
->exec_bo
= &bo
->base
;
844 list_add_tail(&to_vc4_bo(&exec
->exec_bo
->base
)->unref_head
,
847 exec
->ct0ca
= exec
->exec_bo
->paddr
+ bin_offset
;
851 exec
->shader_rec_v
= exec
->exec_bo
->vaddr
+ shader_rec_offset
;
852 exec
->shader_rec_p
= exec
->exec_bo
->paddr
+ shader_rec_offset
;
853 exec
->shader_rec_size
= args
->shader_rec_size
;
855 exec
->uniforms_v
= exec
->exec_bo
->vaddr
+ uniforms_offset
;
856 exec
->uniforms_p
= exec
->exec_bo
->paddr
+ uniforms_offset
;
857 exec
->uniforms_size
= args
->uniforms_size
;
859 ret
= vc4_validate_bin_cl(dev
,
860 exec
->exec_bo
->vaddr
+ bin_offset
,
866 ret
= vc4_validate_shader_recs(dev
, exec
);
870 /* Block waiting on any previous rendering into the CS's VBO,
871 * IB, or textures, so that pixels are actually written by the
872 * time we try to read them.
874 ret
= vc4_wait_for_seqno(dev
, exec
->bin_dep_seqno
, ~0ull, true);
882 vc4_complete_exec(struct drm_device
*dev
, struct vc4_exec_info
*exec
)
884 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
885 unsigned long irqflags
;
888 /* If we got force-completed because of GPU reset rather than
889 * through our IRQ handler, signal the fence now.
892 dma_fence_signal(exec
->fence
);
893 dma_fence_put(exec
->fence
);
897 for (i
= 0; i
< exec
->bo_count
; i
++) {
898 struct vc4_bo
*bo
= to_vc4_bo(&exec
->bo
[i
]->base
);
900 vc4_bo_dec_usecnt(bo
);
901 drm_gem_object_put_unlocked(&exec
->bo
[i
]->base
);
906 while (!list_empty(&exec
->unref_list
)) {
907 struct vc4_bo
*bo
= list_first_entry(&exec
->unref_list
,
908 struct vc4_bo
, unref_head
);
909 list_del(&bo
->unref_head
);
910 drm_gem_object_put_unlocked(&bo
->base
.base
);
913 /* Free up the allocation of any bin slots we used. */
914 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
915 vc4
->bin_alloc_used
&= ~exec
->bin_slots
;
916 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
918 mutex_lock(&vc4
->power_lock
);
919 if (--vc4
->power_refcount
== 0) {
920 pm_runtime_mark_last_busy(&vc4
->v3d
->pdev
->dev
);
921 pm_runtime_put_autosuspend(&vc4
->v3d
->pdev
->dev
);
923 mutex_unlock(&vc4
->power_lock
);
929 vc4_job_handle_completed(struct vc4_dev
*vc4
)
931 unsigned long irqflags
;
932 struct vc4_seqno_cb
*cb
, *cb_temp
;
934 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
935 while (!list_empty(&vc4
->job_done_list
)) {
936 struct vc4_exec_info
*exec
=
937 list_first_entry(&vc4
->job_done_list
,
938 struct vc4_exec_info
, head
);
939 list_del(&exec
->head
);
941 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
942 vc4_complete_exec(vc4
->dev
, exec
);
943 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
946 list_for_each_entry_safe(cb
, cb_temp
, &vc4
->seqno_cb_list
, work
.entry
) {
947 if (cb
->seqno
<= vc4
->finished_seqno
) {
948 list_del_init(&cb
->work
.entry
);
949 schedule_work(&cb
->work
);
953 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
956 static void vc4_seqno_cb_work(struct work_struct
*work
)
958 struct vc4_seqno_cb
*cb
= container_of(work
, struct vc4_seqno_cb
, work
);
963 int vc4_queue_seqno_cb(struct drm_device
*dev
,
964 struct vc4_seqno_cb
*cb
, uint64_t seqno
,
965 void (*func
)(struct vc4_seqno_cb
*cb
))
967 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
969 unsigned long irqflags
;
972 INIT_WORK(&cb
->work
, vc4_seqno_cb_work
);
974 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
975 if (seqno
> vc4
->finished_seqno
) {
977 list_add_tail(&cb
->work
.entry
, &vc4
->seqno_cb_list
);
979 schedule_work(&cb
->work
);
981 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
986 /* Scheduled when any job has been completed, this walks the list of
987 * jobs that had completed and unrefs their BOs and frees their exec
991 vc4_job_done_work(struct work_struct
*work
)
993 struct vc4_dev
*vc4
=
994 container_of(work
, struct vc4_dev
, job_done_work
);
996 vc4_job_handle_completed(vc4
);
1000 vc4_wait_for_seqno_ioctl_helper(struct drm_device
*dev
,
1002 uint64_t *timeout_ns
)
1004 unsigned long start
= jiffies
;
1005 int ret
= vc4_wait_for_seqno(dev
, seqno
, *timeout_ns
, true);
1007 if ((ret
== -EINTR
|| ret
== -ERESTARTSYS
) && *timeout_ns
!= ~0ull) {
1008 uint64_t delta
= jiffies_to_nsecs(jiffies
- start
);
1010 if (*timeout_ns
>= delta
)
1011 *timeout_ns
-= delta
;
1018 vc4_wait_seqno_ioctl(struct drm_device
*dev
, void *data
,
1019 struct drm_file
*file_priv
)
1021 struct drm_vc4_wait_seqno
*args
= data
;
1023 return vc4_wait_for_seqno_ioctl_helper(dev
, args
->seqno
,
1028 vc4_wait_bo_ioctl(struct drm_device
*dev
, void *data
,
1029 struct drm_file
*file_priv
)
1032 struct drm_vc4_wait_bo
*args
= data
;
1033 struct drm_gem_object
*gem_obj
;
1039 gem_obj
= drm_gem_object_lookup(file_priv
, args
->handle
);
1041 DRM_DEBUG("Failed to look up GEM BO %d\n", args
->handle
);
1044 bo
= to_vc4_bo(gem_obj
);
1046 ret
= vc4_wait_for_seqno_ioctl_helper(dev
, bo
->seqno
,
1049 drm_gem_object_put_unlocked(gem_obj
);
1054 * vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4.
1056 * @data: ioctl argument
1057 * @file_priv: DRM file for this fd
1059 * This is the main entrypoint for userspace to submit a 3D frame to
1060 * the GPU. Userspace provides the binner command list (if
1061 * applicable), and the kernel sets up the render command list to draw
1062 * to the framebuffer described in the ioctl, using the command lists
1063 * that the 3D engine's binner will produce.
1066 vc4_submit_cl_ioctl(struct drm_device
*dev
, void *data
,
1067 struct drm_file
*file_priv
)
1069 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
1070 struct drm_vc4_submit_cl
*args
= data
;
1071 struct vc4_exec_info
*exec
;
1072 struct ww_acquire_ctx acquire_ctx
;
1075 if ((args
->flags
& ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR
|
1076 VC4_SUBMIT_CL_FIXED_RCL_ORDER
|
1077 VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X
|
1078 VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y
)) != 0) {
1079 DRM_DEBUG("Unknown flags: 0x%02x\n", args
->flags
);
1083 exec
= kcalloc(1, sizeof(*exec
), GFP_KERNEL
);
1085 DRM_ERROR("malloc failure on exec struct\n");
1089 mutex_lock(&vc4
->power_lock
);
1090 if (vc4
->power_refcount
++ == 0) {
1091 ret
= pm_runtime_get_sync(&vc4
->v3d
->pdev
->dev
);
1093 mutex_unlock(&vc4
->power_lock
);
1094 vc4
->power_refcount
--;
1099 mutex_unlock(&vc4
->power_lock
);
1102 INIT_LIST_HEAD(&exec
->unref_list
);
1104 ret
= vc4_cl_lookup_bos(dev
, file_priv
, exec
);
1108 if (exec
->args
->bin_cl_size
!= 0) {
1109 ret
= vc4_get_bcl(dev
, exec
);
1117 ret
= vc4_get_rcl(dev
, exec
);
1121 ret
= vc4_lock_bo_reservations(dev
, exec
, &acquire_ctx
);
1125 /* Clear this out of the struct we'll be putting in the queue,
1126 * since it's part of our stack.
1130 ret
= vc4_queue_submit(dev
, exec
, &acquire_ctx
);
1134 /* Return the seqno for our job. */
1135 args
->seqno
= vc4
->emit_seqno
;
1140 vc4_complete_exec(vc4
->dev
, exec
);
1146 vc4_gem_init(struct drm_device
*dev
)
1148 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
1150 vc4
->dma_fence_context
= dma_fence_context_alloc(1);
1152 INIT_LIST_HEAD(&vc4
->bin_job_list
);
1153 INIT_LIST_HEAD(&vc4
->render_job_list
);
1154 INIT_LIST_HEAD(&vc4
->job_done_list
);
1155 INIT_LIST_HEAD(&vc4
->seqno_cb_list
);
1156 spin_lock_init(&vc4
->job_lock
);
1158 INIT_WORK(&vc4
->hangcheck
.reset_work
, vc4_reset_work
);
1159 timer_setup(&vc4
->hangcheck
.timer
, vc4_hangcheck_elapsed
, 0);
1161 INIT_WORK(&vc4
->job_done_work
, vc4_job_done_work
);
1163 mutex_init(&vc4
->power_lock
);
1165 INIT_LIST_HEAD(&vc4
->purgeable
.list
);
1166 mutex_init(&vc4
->purgeable
.lock
);
1170 vc4_gem_destroy(struct drm_device
*dev
)
1172 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
1174 /* Waiting for exec to finish would need to be done before
1175 * unregistering V3D.
1177 WARN_ON(vc4
->emit_seqno
!= vc4
->finished_seqno
);
1179 /* V3D should already have disabled its interrupt and cleared
1180 * the overflow allocation registers. Now free the object.
1183 drm_gem_object_put_unlocked(&vc4
->bin_bo
->base
.base
);
1187 if (vc4
->hang_state
)
1188 vc4_free_hang_state(dev
, vc4
->hang_state
);
1191 int vc4_gem_madvise_ioctl(struct drm_device
*dev
, void *data
,
1192 struct drm_file
*file_priv
)
1194 struct drm_vc4_gem_madvise
*args
= data
;
1195 struct drm_gem_object
*gem_obj
;
1199 switch (args
->madv
) {
1200 case VC4_MADV_DONTNEED
:
1201 case VC4_MADV_WILLNEED
:
1210 gem_obj
= drm_gem_object_lookup(file_priv
, args
->handle
);
1212 DRM_DEBUG("Failed to look up GEM BO %d\n", args
->handle
);
1216 bo
= to_vc4_bo(gem_obj
);
1218 /* Only BOs exposed to userspace can be purged. */
1219 if (bo
->madv
== __VC4_MADV_NOTSUPP
) {
1220 DRM_DEBUG("madvise not supported on this BO\n");
1225 /* Not sure it's safe to purge imported BOs. Let's just assume it's
1226 * not until proven otherwise.
1228 if (gem_obj
->import_attach
) {
1229 DRM_DEBUG("madvise not supported on imported BOs\n");
1234 mutex_lock(&bo
->madv_lock
);
1236 if (args
->madv
== VC4_MADV_DONTNEED
&& bo
->madv
== VC4_MADV_WILLNEED
&&
1237 !refcount_read(&bo
->usecnt
)) {
1238 /* If the BO is about to be marked as purgeable, is not used
1239 * and is not already purgeable or purged, add it to the
1242 vc4_bo_add_to_purgeable_pool(bo
);
1243 } else if (args
->madv
== VC4_MADV_WILLNEED
&&
1244 bo
->madv
== VC4_MADV_DONTNEED
&&
1245 !refcount_read(&bo
->usecnt
)) {
1246 /* The BO has not been purged yet, just remove it from
1247 * the purgeable list.
1249 vc4_bo_remove_from_purgeable_pool(bo
);
1252 /* Save the purged state. */
1253 args
->retained
= bo
->madv
!= __VC4_MADV_PURGED
;
1255 /* Update internal madv state only if the bo was not purged. */
1256 if (bo
->madv
!= __VC4_MADV_PURGED
)
1257 bo
->madv
= args
->madv
;
1259 mutex_unlock(&bo
->madv_lock
);
1264 drm_gem_object_put_unlocked(gem_obj
);