2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: monk liu <monk.liu@amd.com>
25 #include <drm/drm_auth.h>
27 #include "amdgpu_sched.h"
28 #include "amdgpu_ras.h"
30 #define to_amdgpu_ctx_entity(e) \
31 container_of((e), struct amdgpu_ctx_entity, entity)
33 const unsigned int amdgpu_ctx_num_entities
[AMDGPU_HW_IP_NUM
] = {
34 [AMDGPU_HW_IP_GFX
] = 1,
35 [AMDGPU_HW_IP_COMPUTE
] = 4,
36 [AMDGPU_HW_IP_DMA
] = 2,
37 [AMDGPU_HW_IP_UVD
] = 1,
38 [AMDGPU_HW_IP_VCE
] = 1,
39 [AMDGPU_HW_IP_UVD_ENC
] = 1,
40 [AMDGPU_HW_IP_VCN_DEC
] = 1,
41 [AMDGPU_HW_IP_VCN_ENC
] = 1,
42 [AMDGPU_HW_IP_VCN_JPEG
] = 1,
45 static int amdgput_ctx_total_num_entities(void)
47 unsigned i
, num_entities
= 0;
49 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
)
50 num_entities
+= amdgpu_ctx_num_entities
[i
];
55 static int amdgpu_ctx_priority_permit(struct drm_file
*filp
,
56 enum drm_sched_priority priority
)
58 /* NORMAL and below are accessible by everyone */
59 if (priority
<= DRM_SCHED_PRIORITY_NORMAL
)
62 if (capable(CAP_SYS_NICE
))
65 if (drm_is_current_master(filp
))
71 static int amdgpu_ctx_init(struct amdgpu_device
*adev
,
72 enum drm_sched_priority priority
,
73 struct drm_file
*filp
,
74 struct amdgpu_ctx
*ctx
)
76 unsigned num_entities
= amdgput_ctx_total_num_entities();
80 if (priority
< 0 || priority
>= DRM_SCHED_PRIORITY_MAX
)
83 r
= amdgpu_ctx_priority_permit(filp
, priority
);
87 memset(ctx
, 0, sizeof(*ctx
));
90 ctx
->fences
= kcalloc(amdgpu_sched_jobs
* num_entities
,
91 sizeof(struct dma_fence
*), GFP_KERNEL
);
95 ctx
->entities
[0] = kcalloc(num_entities
,
96 sizeof(struct amdgpu_ctx_entity
),
98 if (!ctx
->entities
[0]) {
100 goto error_free_fences
;
103 for (i
= 0; i
< num_entities
; ++i
) {
104 struct amdgpu_ctx_entity
*entity
= &ctx
->entities
[0][i
];
106 entity
->sequence
= 1;
107 entity
->fences
= &ctx
->fences
[amdgpu_sched_jobs
* i
];
109 for (i
= 1; i
< AMDGPU_HW_IP_NUM
; ++i
)
110 ctx
->entities
[i
] = ctx
->entities
[i
- 1] +
111 amdgpu_ctx_num_entities
[i
- 1];
113 kref_init(&ctx
->refcount
);
114 spin_lock_init(&ctx
->ring_lock
);
115 mutex_init(&ctx
->lock
);
117 ctx
->reset_counter
= atomic_read(&adev
->gpu_reset_counter
);
118 ctx
->reset_counter_query
= ctx
->reset_counter
;
119 ctx
->vram_lost_counter
= atomic_read(&adev
->vram_lost_counter
);
120 ctx
->init_priority
= priority
;
121 ctx
->override_priority
= DRM_SCHED_PRIORITY_UNSET
;
123 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
) {
124 struct amdgpu_ring
*rings
[AMDGPU_MAX_RINGS
];
125 struct drm_sched_rq
*rqs
[AMDGPU_MAX_RINGS
];
126 unsigned num_rings
= 0;
127 unsigned num_rqs
= 0;
130 case AMDGPU_HW_IP_GFX
:
131 rings
[0] = &adev
->gfx
.gfx_ring
[0];
134 case AMDGPU_HW_IP_COMPUTE
:
135 for (j
= 0; j
< adev
->gfx
.num_compute_rings
; ++j
)
136 rings
[j
] = &adev
->gfx
.compute_ring
[j
];
137 num_rings
= adev
->gfx
.num_compute_rings
;
139 case AMDGPU_HW_IP_DMA
:
140 for (j
= 0; j
< adev
->sdma
.num_instances
; ++j
)
141 rings
[j
] = &adev
->sdma
.instance
[j
].ring
;
142 num_rings
= adev
->sdma
.num_instances
;
144 case AMDGPU_HW_IP_UVD
:
145 rings
[0] = &adev
->uvd
.inst
[0].ring
;
148 case AMDGPU_HW_IP_VCE
:
149 rings
[0] = &adev
->vce
.ring
[0];
152 case AMDGPU_HW_IP_UVD_ENC
:
153 rings
[0] = &adev
->uvd
.inst
[0].ring_enc
[0];
156 case AMDGPU_HW_IP_VCN_DEC
:
157 for (j
= 0; j
< adev
->vcn
.num_vcn_inst
; ++j
) {
158 if (adev
->vcn
.harvest_config
& (1 << j
))
160 rings
[num_rings
++] = &adev
->vcn
.inst
[j
].ring_dec
;
163 case AMDGPU_HW_IP_VCN_ENC
:
164 for (j
= 0; j
< adev
->vcn
.num_vcn_inst
; ++j
) {
165 if (adev
->vcn
.harvest_config
& (1 << j
))
167 for (k
= 0; k
< adev
->vcn
.num_enc_rings
; ++k
)
168 rings
[num_rings
++] = &adev
->vcn
.inst
[j
].ring_enc
[k
];
171 case AMDGPU_HW_IP_VCN_JPEG
:
172 for (j
= 0; j
< adev
->vcn
.num_vcn_inst
; ++j
) {
173 if (adev
->vcn
.harvest_config
& (1 << j
))
175 rings
[num_rings
++] = &adev
->vcn
.inst
[j
].ring_jpeg
;
180 for (j
= 0; j
< num_rings
; ++j
) {
184 rqs
[num_rqs
++] = &rings
[j
]->sched
.sched_rq
[priority
];
187 for (j
= 0; j
< amdgpu_ctx_num_entities
[i
]; ++j
)
188 r
= drm_sched_entity_init(&ctx
->entities
[i
][j
].entity
,
189 rqs
, num_rqs
, &ctx
->guilty
);
191 goto error_cleanup_entities
;
196 error_cleanup_entities
:
197 for (i
= 0; i
< num_entities
; ++i
)
198 drm_sched_entity_destroy(&ctx
->entities
[0][i
].entity
);
199 kfree(ctx
->entities
[0]);
207 static void amdgpu_ctx_fini(struct kref
*ref
)
209 struct amdgpu_ctx
*ctx
= container_of(ref
, struct amdgpu_ctx
, refcount
);
210 unsigned num_entities
= amdgput_ctx_total_num_entities();
211 struct amdgpu_device
*adev
= ctx
->adev
;
217 for (i
= 0; i
< num_entities
; ++i
)
218 for (j
= 0; j
< amdgpu_sched_jobs
; ++j
)
219 dma_fence_put(ctx
->entities
[0][i
].fences
[j
]);
221 kfree(ctx
->entities
[0]);
223 mutex_destroy(&ctx
->lock
);
228 int amdgpu_ctx_get_entity(struct amdgpu_ctx
*ctx
, u32 hw_ip
, u32 instance
,
229 u32 ring
, struct drm_sched_entity
**entity
)
231 if (hw_ip
>= AMDGPU_HW_IP_NUM
) {
232 DRM_ERROR("unknown HW IP type: %d\n", hw_ip
);
236 /* Right now all IPs have only one instance - multiple rings. */
238 DRM_DEBUG("invalid ip instance: %d\n", instance
);
242 if (ring
>= amdgpu_ctx_num_entities
[hw_ip
]) {
243 DRM_DEBUG("invalid ring: %d %d\n", hw_ip
, ring
);
247 *entity
= &ctx
->entities
[hw_ip
][ring
].entity
;
251 static int amdgpu_ctx_alloc(struct amdgpu_device
*adev
,
252 struct amdgpu_fpriv
*fpriv
,
253 struct drm_file
*filp
,
254 enum drm_sched_priority priority
,
257 struct amdgpu_ctx_mgr
*mgr
= &fpriv
->ctx_mgr
;
258 struct amdgpu_ctx
*ctx
;
261 ctx
= kmalloc(sizeof(*ctx
), GFP_KERNEL
);
265 mutex_lock(&mgr
->lock
);
266 r
= idr_alloc(&mgr
->ctx_handles
, ctx
, 1, AMDGPU_VM_MAX_NUM_CTX
, GFP_KERNEL
);
268 mutex_unlock(&mgr
->lock
);
274 r
= amdgpu_ctx_init(adev
, priority
, filp
, ctx
);
276 idr_remove(&mgr
->ctx_handles
, *id
);
280 mutex_unlock(&mgr
->lock
);
284 static void amdgpu_ctx_do_release(struct kref
*ref
)
286 struct amdgpu_ctx
*ctx
;
287 unsigned num_entities
;
290 ctx
= container_of(ref
, struct amdgpu_ctx
, refcount
);
293 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; i
++)
294 num_entities
+= amdgpu_ctx_num_entities
[i
];
296 for (i
= 0; i
< num_entities
; i
++)
297 drm_sched_entity_destroy(&ctx
->entities
[0][i
].entity
);
299 amdgpu_ctx_fini(ref
);
302 static int amdgpu_ctx_free(struct amdgpu_fpriv
*fpriv
, uint32_t id
)
304 struct amdgpu_ctx_mgr
*mgr
= &fpriv
->ctx_mgr
;
305 struct amdgpu_ctx
*ctx
;
307 mutex_lock(&mgr
->lock
);
308 ctx
= idr_remove(&mgr
->ctx_handles
, id
);
310 kref_put(&ctx
->refcount
, amdgpu_ctx_do_release
);
311 mutex_unlock(&mgr
->lock
);
312 return ctx
? 0 : -EINVAL
;
315 static int amdgpu_ctx_query(struct amdgpu_device
*adev
,
316 struct amdgpu_fpriv
*fpriv
, uint32_t id
,
317 union drm_amdgpu_ctx_out
*out
)
319 struct amdgpu_ctx
*ctx
;
320 struct amdgpu_ctx_mgr
*mgr
;
321 unsigned reset_counter
;
326 mgr
= &fpriv
->ctx_mgr
;
327 mutex_lock(&mgr
->lock
);
328 ctx
= idr_find(&mgr
->ctx_handles
, id
);
330 mutex_unlock(&mgr
->lock
);
334 /* TODO: these two are always zero */
335 out
->state
.flags
= 0x0;
336 out
->state
.hangs
= 0x0;
338 /* determine if a GPU reset has occured since the last call */
339 reset_counter
= atomic_read(&adev
->gpu_reset_counter
);
340 /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
341 if (ctx
->reset_counter_query
== reset_counter
)
342 out
->state
.reset_status
= AMDGPU_CTX_NO_RESET
;
344 out
->state
.reset_status
= AMDGPU_CTX_UNKNOWN_RESET
;
345 ctx
->reset_counter_query
= reset_counter
;
347 mutex_unlock(&mgr
->lock
);
351 static int amdgpu_ctx_query2(struct amdgpu_device
*adev
,
352 struct amdgpu_fpriv
*fpriv
, uint32_t id
,
353 union drm_amdgpu_ctx_out
*out
)
355 struct amdgpu_ctx
*ctx
;
356 struct amdgpu_ctx_mgr
*mgr
;
357 uint32_t ras_counter
;
362 mgr
= &fpriv
->ctx_mgr
;
363 mutex_lock(&mgr
->lock
);
364 ctx
= idr_find(&mgr
->ctx_handles
, id
);
366 mutex_unlock(&mgr
->lock
);
370 out
->state
.flags
= 0x0;
371 out
->state
.hangs
= 0x0;
373 if (ctx
->reset_counter
!= atomic_read(&adev
->gpu_reset_counter
))
374 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_RESET
;
376 if (ctx
->vram_lost_counter
!= atomic_read(&adev
->vram_lost_counter
))
377 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST
;
379 if (atomic_read(&ctx
->guilty
))
380 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_GUILTY
;
383 ras_counter
= amdgpu_ras_query_error_count(adev
, false);
384 /*ras counter is monotonic increasing*/
385 if (ras_counter
!= ctx
->ras_counter_ue
) {
386 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE
;
387 ctx
->ras_counter_ue
= ras_counter
;
391 ras_counter
= amdgpu_ras_query_error_count(adev
, true);
392 if (ras_counter
!= ctx
->ras_counter_ce
) {
393 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE
;
394 ctx
->ras_counter_ce
= ras_counter
;
397 mutex_unlock(&mgr
->lock
);
401 int amdgpu_ctx_ioctl(struct drm_device
*dev
, void *data
,
402 struct drm_file
*filp
)
406 enum drm_sched_priority priority
;
408 union drm_amdgpu_ctx
*args
= data
;
409 struct amdgpu_device
*adev
= dev
->dev_private
;
410 struct amdgpu_fpriv
*fpriv
= filp
->driver_priv
;
413 id
= args
->in
.ctx_id
;
414 priority
= amdgpu_to_sched_priority(args
->in
.priority
);
416 /* For backwards compatibility reasons, we need to accept
417 * ioctls with garbage in the priority field */
418 if (priority
== DRM_SCHED_PRIORITY_INVALID
)
419 priority
= DRM_SCHED_PRIORITY_NORMAL
;
421 switch (args
->in
.op
) {
422 case AMDGPU_CTX_OP_ALLOC_CTX
:
423 r
= amdgpu_ctx_alloc(adev
, fpriv
, filp
, priority
, &id
);
424 args
->out
.alloc
.ctx_id
= id
;
426 case AMDGPU_CTX_OP_FREE_CTX
:
427 r
= amdgpu_ctx_free(fpriv
, id
);
429 case AMDGPU_CTX_OP_QUERY_STATE
:
430 r
= amdgpu_ctx_query(adev
, fpriv
, id
, &args
->out
);
432 case AMDGPU_CTX_OP_QUERY_STATE2
:
433 r
= amdgpu_ctx_query2(adev
, fpriv
, id
, &args
->out
);
442 struct amdgpu_ctx
*amdgpu_ctx_get(struct amdgpu_fpriv
*fpriv
, uint32_t id
)
444 struct amdgpu_ctx
*ctx
;
445 struct amdgpu_ctx_mgr
*mgr
;
450 mgr
= &fpriv
->ctx_mgr
;
452 mutex_lock(&mgr
->lock
);
453 ctx
= idr_find(&mgr
->ctx_handles
, id
);
455 kref_get(&ctx
->refcount
);
456 mutex_unlock(&mgr
->lock
);
460 int amdgpu_ctx_put(struct amdgpu_ctx
*ctx
)
465 kref_put(&ctx
->refcount
, amdgpu_ctx_do_release
);
469 void amdgpu_ctx_add_fence(struct amdgpu_ctx
*ctx
,
470 struct drm_sched_entity
*entity
,
471 struct dma_fence
*fence
, uint64_t* handle
)
473 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
474 uint64_t seq
= centity
->sequence
;
475 struct dma_fence
*other
= NULL
;
478 idx
= seq
& (amdgpu_sched_jobs
- 1);
479 other
= centity
->fences
[idx
];
481 BUG_ON(!dma_fence_is_signaled(other
));
483 dma_fence_get(fence
);
485 spin_lock(&ctx
->ring_lock
);
486 centity
->fences
[idx
] = fence
;
488 spin_unlock(&ctx
->ring_lock
);
490 dma_fence_put(other
);
495 struct dma_fence
*amdgpu_ctx_get_fence(struct amdgpu_ctx
*ctx
,
496 struct drm_sched_entity
*entity
,
499 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
500 struct dma_fence
*fence
;
502 spin_lock(&ctx
->ring_lock
);
505 seq
= centity
->sequence
- 1;
507 if (seq
>= centity
->sequence
) {
508 spin_unlock(&ctx
->ring_lock
);
509 return ERR_PTR(-EINVAL
);
513 if (seq
+ amdgpu_sched_jobs
< centity
->sequence
) {
514 spin_unlock(&ctx
->ring_lock
);
518 fence
= dma_fence_get(centity
->fences
[seq
& (amdgpu_sched_jobs
- 1)]);
519 spin_unlock(&ctx
->ring_lock
);
524 void amdgpu_ctx_priority_override(struct amdgpu_ctx
*ctx
,
525 enum drm_sched_priority priority
)
527 unsigned num_entities
= amdgput_ctx_total_num_entities();
528 enum drm_sched_priority ctx_prio
;
531 ctx
->override_priority
= priority
;
533 ctx_prio
= (ctx
->override_priority
== DRM_SCHED_PRIORITY_UNSET
) ?
534 ctx
->init_priority
: ctx
->override_priority
;
536 for (i
= 0; i
< num_entities
; i
++) {
537 struct drm_sched_entity
*entity
= &ctx
->entities
[0][i
].entity
;
539 drm_sched_entity_set_priority(entity
, ctx_prio
);
543 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx
*ctx
,
544 struct drm_sched_entity
*entity
)
546 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
547 unsigned idx
= centity
->sequence
& (amdgpu_sched_jobs
- 1);
548 struct dma_fence
*other
= centity
->fences
[idx
];
552 r
= dma_fence_wait(other
, true);
554 if (r
!= -ERESTARTSYS
)
555 DRM_ERROR("Error (%ld) waiting for fence!\n", r
);
564 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr
*mgr
)
566 mutex_init(&mgr
->lock
);
567 idr_init(&mgr
->ctx_handles
);
570 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr
*mgr
, long timeout
)
572 unsigned num_entities
= amdgput_ctx_total_num_entities();
573 struct amdgpu_ctx
*ctx
;
577 idp
= &mgr
->ctx_handles
;
579 mutex_lock(&mgr
->lock
);
580 idr_for_each_entry(idp
, ctx
, id
) {
581 for (i
= 0; i
< num_entities
; i
++) {
582 struct drm_sched_entity
*entity
;
584 entity
= &ctx
->entities
[0][i
].entity
;
585 timeout
= drm_sched_entity_flush(entity
, timeout
);
588 mutex_unlock(&mgr
->lock
);
592 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr
*mgr
)
594 unsigned num_entities
= amdgput_ctx_total_num_entities();
595 struct amdgpu_ctx
*ctx
;
599 idp
= &mgr
->ctx_handles
;
601 idr_for_each_entry(idp
, ctx
, id
) {
602 if (kref_read(&ctx
->refcount
) != 1) {
603 DRM_ERROR("ctx %p is still alive\n", ctx
);
607 for (i
= 0; i
< num_entities
; i
++)
608 drm_sched_entity_fini(&ctx
->entities
[0][i
].entity
);
612 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr
*mgr
)
614 struct amdgpu_ctx
*ctx
;
618 amdgpu_ctx_mgr_entity_fini(mgr
);
620 idp
= &mgr
->ctx_handles
;
622 idr_for_each_entry(idp
, ctx
, id
) {
623 if (kref_put(&ctx
->refcount
, amdgpu_ctx_fini
) != 1)
624 DRM_ERROR("ctx %p is still alive\n", ctx
);
627 idr_destroy(&mgr
->ctx_handles
);
628 mutex_destroy(&mgr
->lock
);