2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: monk liu <monk.liu@amd.com>
25 #include <drm/drm_auth.h>
26 #include <drm/drm_drv.h>
28 #include "amdgpu_sched.h"
29 #include "amdgpu_ras.h"
30 #include <linux/nospec.h>
32 #define to_amdgpu_ctx_entity(e) \
33 container_of((e), struct amdgpu_ctx_entity, entity)
35 const unsigned int amdgpu_ctx_num_entities
[AMDGPU_HW_IP_NUM
] = {
36 [AMDGPU_HW_IP_GFX
] = 1,
37 [AMDGPU_HW_IP_COMPUTE
] = 4,
38 [AMDGPU_HW_IP_DMA
] = 2,
39 [AMDGPU_HW_IP_UVD
] = 1,
40 [AMDGPU_HW_IP_VCE
] = 1,
41 [AMDGPU_HW_IP_UVD_ENC
] = 1,
42 [AMDGPU_HW_IP_VCN_DEC
] = 1,
43 [AMDGPU_HW_IP_VCN_ENC
] = 1,
44 [AMDGPU_HW_IP_VCN_JPEG
] = 1,
47 bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio
)
50 case AMDGPU_CTX_PRIORITY_UNSET
:
51 case AMDGPU_CTX_PRIORITY_VERY_LOW
:
52 case AMDGPU_CTX_PRIORITY_LOW
:
53 case AMDGPU_CTX_PRIORITY_NORMAL
:
54 case AMDGPU_CTX_PRIORITY_HIGH
:
55 case AMDGPU_CTX_PRIORITY_VERY_HIGH
:
62 static enum drm_sched_priority
63 amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio
)
66 case AMDGPU_CTX_PRIORITY_UNSET
:
67 return DRM_SCHED_PRIORITY_UNSET
;
69 case AMDGPU_CTX_PRIORITY_VERY_LOW
:
70 return DRM_SCHED_PRIORITY_MIN
;
72 case AMDGPU_CTX_PRIORITY_LOW
:
73 return DRM_SCHED_PRIORITY_MIN
;
75 case AMDGPU_CTX_PRIORITY_NORMAL
:
76 return DRM_SCHED_PRIORITY_NORMAL
;
78 case AMDGPU_CTX_PRIORITY_HIGH
:
79 return DRM_SCHED_PRIORITY_HIGH
;
81 case AMDGPU_CTX_PRIORITY_VERY_HIGH
:
82 return DRM_SCHED_PRIORITY_HIGH
;
84 /* This should not happen as we sanitized userspace provided priority
85 * already, WARN if this happens.
88 WARN(1, "Invalid context priority %d\n", ctx_prio
);
89 return DRM_SCHED_PRIORITY_NORMAL
;
94 static int amdgpu_ctx_priority_permit(struct drm_file
*filp
,
97 if (!amdgpu_ctx_priority_is_valid(priority
))
100 /* NORMAL and below are accessible by everyone */
101 if (priority
<= AMDGPU_CTX_PRIORITY_NORMAL
)
104 if (capable(CAP_SYS_NICE
))
107 if (drm_is_current_master(filp
))
113 static enum amdgpu_gfx_pipe_priority
amdgpu_ctx_prio_to_compute_prio(int32_t prio
)
116 case AMDGPU_CTX_PRIORITY_HIGH
:
117 case AMDGPU_CTX_PRIORITY_VERY_HIGH
:
118 return AMDGPU_GFX_PIPE_PRIO_HIGH
;
120 return AMDGPU_GFX_PIPE_PRIO_NORMAL
;
124 static enum amdgpu_ring_priority_level
amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio
)
127 case AMDGPU_CTX_PRIORITY_HIGH
:
128 return AMDGPU_RING_PRIO_1
;
129 case AMDGPU_CTX_PRIORITY_VERY_HIGH
:
130 return AMDGPU_RING_PRIO_2
;
132 return AMDGPU_RING_PRIO_0
;
136 static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx
*ctx
, u32 hw_ip
)
138 struct amdgpu_device
*adev
= ctx
->adev
;
140 unsigned int hw_prio
;
142 ctx_prio
= (ctx
->override_priority
== AMDGPU_CTX_PRIORITY_UNSET
) ?
143 ctx
->init_priority
: ctx
->override_priority
;
146 case AMDGPU_HW_IP_COMPUTE
:
147 hw_prio
= amdgpu_ctx_prio_to_compute_prio(ctx_prio
);
149 case AMDGPU_HW_IP_VCE
:
150 case AMDGPU_HW_IP_VCN_ENC
:
151 hw_prio
= amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio
);
154 hw_prio
= AMDGPU_RING_PRIO_DEFAULT
;
158 hw_ip
= array_index_nospec(hw_ip
, AMDGPU_HW_IP_NUM
);
159 if (adev
->gpu_sched
[hw_ip
][hw_prio
].num_scheds
== 0)
160 hw_prio
= AMDGPU_RING_PRIO_DEFAULT
;
166 static int amdgpu_ctx_init_entity(struct amdgpu_ctx
*ctx
, u32 hw_ip
,
169 struct amdgpu_device
*adev
= ctx
->adev
;
170 struct amdgpu_ctx_entity
*entity
;
171 struct drm_gpu_scheduler
**scheds
= NULL
, *sched
= NULL
;
172 unsigned num_scheds
= 0;
174 unsigned int hw_prio
;
175 enum drm_sched_priority drm_prio
;
178 entity
= kzalloc(struct_size(entity
, fences
, amdgpu_sched_jobs
),
183 ctx_prio
= (ctx
->override_priority
== AMDGPU_CTX_PRIORITY_UNSET
) ?
184 ctx
->init_priority
: ctx
->override_priority
;
185 entity
->sequence
= 1;
186 hw_prio
= amdgpu_ctx_get_hw_prio(ctx
, hw_ip
);
187 drm_prio
= amdgpu_ctx_to_drm_sched_prio(ctx_prio
);
189 hw_ip
= array_index_nospec(hw_ip
, AMDGPU_HW_IP_NUM
);
190 scheds
= adev
->gpu_sched
[hw_ip
][hw_prio
].sched
;
191 num_scheds
= adev
->gpu_sched
[hw_ip
][hw_prio
].num_scheds
;
193 /* disable load balance if the hw engine retains context among dependent jobs */
194 if (hw_ip
== AMDGPU_HW_IP_VCN_ENC
||
195 hw_ip
== AMDGPU_HW_IP_VCN_DEC
||
196 hw_ip
== AMDGPU_HW_IP_UVD_ENC
||
197 hw_ip
== AMDGPU_HW_IP_UVD
) {
198 sched
= drm_sched_pick_best(scheds
, num_scheds
);
203 r
= drm_sched_entity_init(&entity
->entity
, drm_prio
, scheds
, num_scheds
,
206 goto error_free_entity
;
208 /* It's not an error if we fail to install the new entity */
209 if (cmpxchg(&ctx
->entities
[hw_ip
][ring
], NULL
, entity
))
215 drm_sched_entity_fini(&entity
->entity
);
223 static int amdgpu_ctx_init(struct amdgpu_device
*adev
,
225 struct drm_file
*filp
,
226 struct amdgpu_ctx
*ctx
)
230 r
= amdgpu_ctx_priority_permit(filp
, priority
);
234 memset(ctx
, 0, sizeof(*ctx
));
238 kref_init(&ctx
->refcount
);
239 spin_lock_init(&ctx
->ring_lock
);
240 mutex_init(&ctx
->lock
);
242 ctx
->reset_counter
= atomic_read(&adev
->gpu_reset_counter
);
243 ctx
->reset_counter_query
= ctx
->reset_counter
;
244 ctx
->vram_lost_counter
= atomic_read(&adev
->vram_lost_counter
);
245 ctx
->init_priority
= priority
;
246 ctx
->override_priority
= AMDGPU_CTX_PRIORITY_UNSET
;
247 ctx
->stable_pstate
= AMDGPU_CTX_STABLE_PSTATE_NONE
;
252 static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity
*entity
)
260 for (i
= 0; i
< amdgpu_sched_jobs
; ++i
)
261 dma_fence_put(entity
->fences
[i
]);
266 static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx
*ctx
,
269 struct amdgpu_device
*adev
= ctx
->adev
;
270 enum amd_dpm_forced_level current_level
;
272 current_level
= amdgpu_dpm_get_performance_level(adev
);
274 switch (current_level
) {
275 case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD
:
276 *stable_pstate
= AMDGPU_CTX_STABLE_PSTATE_STANDARD
;
278 case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK
:
279 *stable_pstate
= AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK
;
281 case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK
:
282 *stable_pstate
= AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK
;
284 case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK
:
285 *stable_pstate
= AMDGPU_CTX_STABLE_PSTATE_PEAK
;
288 *stable_pstate
= AMDGPU_CTX_STABLE_PSTATE_NONE
;
294 static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx
*ctx
,
297 struct amdgpu_device
*adev
= ctx
->adev
;
298 enum amd_dpm_forced_level level
;
299 u32 current_stable_pstate
;
302 mutex_lock(&adev
->pm
.stable_pstate_ctx_lock
);
303 if (adev
->pm
.stable_pstate_ctx
&& adev
->pm
.stable_pstate_ctx
!= ctx
) {
308 r
= amdgpu_ctx_get_stable_pstate(ctx
, ¤t_stable_pstate
);
309 if (r
|| (stable_pstate
== current_stable_pstate
))
312 switch (stable_pstate
) {
313 case AMDGPU_CTX_STABLE_PSTATE_NONE
:
314 level
= AMD_DPM_FORCED_LEVEL_AUTO
;
316 case AMDGPU_CTX_STABLE_PSTATE_STANDARD
:
317 level
= AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD
;
319 case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK
:
320 level
= AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK
;
322 case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK
:
323 level
= AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK
;
325 case AMDGPU_CTX_STABLE_PSTATE_PEAK
:
326 level
= AMD_DPM_FORCED_LEVEL_PROFILE_PEAK
;
333 r
= amdgpu_dpm_force_performance_level(adev
, level
);
335 if (level
== AMD_DPM_FORCED_LEVEL_AUTO
)
336 adev
->pm
.stable_pstate_ctx
= NULL
;
338 adev
->pm
.stable_pstate_ctx
= ctx
;
340 mutex_unlock(&adev
->pm
.stable_pstate_ctx_lock
);
345 static void amdgpu_ctx_fini(struct kref
*ref
)
347 struct amdgpu_ctx
*ctx
= container_of(ref
, struct amdgpu_ctx
, refcount
);
348 struct amdgpu_device
*adev
= ctx
->adev
;
354 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
) {
355 for (j
= 0; j
< AMDGPU_MAX_ENTITY_NUM
; ++j
) {
356 amdgpu_ctx_fini_entity(ctx
->entities
[i
][j
]);
357 ctx
->entities
[i
][j
] = NULL
;
361 if (drm_dev_enter(&adev
->ddev
, &idx
)) {
362 amdgpu_ctx_set_stable_pstate(ctx
, AMDGPU_CTX_STABLE_PSTATE_NONE
);
366 mutex_destroy(&ctx
->lock
);
370 int amdgpu_ctx_get_entity(struct amdgpu_ctx
*ctx
, u32 hw_ip
, u32 instance
,
371 u32 ring
, struct drm_sched_entity
**entity
)
375 if (hw_ip
>= AMDGPU_HW_IP_NUM
) {
376 DRM_ERROR("unknown HW IP type: %d\n", hw_ip
);
380 /* Right now all IPs have only one instance - multiple rings. */
382 DRM_DEBUG("invalid ip instance: %d\n", instance
);
386 if (ring
>= amdgpu_ctx_num_entities
[hw_ip
]) {
387 DRM_DEBUG("invalid ring: %d %d\n", hw_ip
, ring
);
391 if (ctx
->entities
[hw_ip
][ring
] == NULL
) {
392 r
= amdgpu_ctx_init_entity(ctx
, hw_ip
, ring
);
397 *entity
= &ctx
->entities
[hw_ip
][ring
]->entity
;
401 static int amdgpu_ctx_alloc(struct amdgpu_device
*adev
,
402 struct amdgpu_fpriv
*fpriv
,
403 struct drm_file
*filp
,
407 struct amdgpu_ctx_mgr
*mgr
= &fpriv
->ctx_mgr
;
408 struct amdgpu_ctx
*ctx
;
411 ctx
= kmalloc(sizeof(*ctx
), GFP_KERNEL
);
415 mutex_lock(&mgr
->lock
);
416 r
= idr_alloc(&mgr
->ctx_handles
, ctx
, 1, AMDGPU_VM_MAX_NUM_CTX
, GFP_KERNEL
);
418 mutex_unlock(&mgr
->lock
);
424 r
= amdgpu_ctx_init(adev
, priority
, filp
, ctx
);
426 idr_remove(&mgr
->ctx_handles
, *id
);
430 mutex_unlock(&mgr
->lock
);
434 static void amdgpu_ctx_do_release(struct kref
*ref
)
436 struct amdgpu_ctx
*ctx
;
439 ctx
= container_of(ref
, struct amdgpu_ctx
, refcount
);
440 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
) {
441 for (j
= 0; j
< amdgpu_ctx_num_entities
[i
]; ++j
) {
442 if (!ctx
->entities
[i
][j
])
445 drm_sched_entity_destroy(&ctx
->entities
[i
][j
]->entity
);
449 amdgpu_ctx_fini(ref
);
452 static int amdgpu_ctx_free(struct amdgpu_fpriv
*fpriv
, uint32_t id
)
454 struct amdgpu_ctx_mgr
*mgr
= &fpriv
->ctx_mgr
;
455 struct amdgpu_ctx
*ctx
;
457 mutex_lock(&mgr
->lock
);
458 ctx
= idr_remove(&mgr
->ctx_handles
, id
);
460 kref_put(&ctx
->refcount
, amdgpu_ctx_do_release
);
461 mutex_unlock(&mgr
->lock
);
462 return ctx
? 0 : -EINVAL
;
465 static int amdgpu_ctx_query(struct amdgpu_device
*adev
,
466 struct amdgpu_fpriv
*fpriv
, uint32_t id
,
467 union drm_amdgpu_ctx_out
*out
)
469 struct amdgpu_ctx
*ctx
;
470 struct amdgpu_ctx_mgr
*mgr
;
471 unsigned reset_counter
;
476 mgr
= &fpriv
->ctx_mgr
;
477 mutex_lock(&mgr
->lock
);
478 ctx
= idr_find(&mgr
->ctx_handles
, id
);
480 mutex_unlock(&mgr
->lock
);
484 /* TODO: these two are always zero */
485 out
->state
.flags
= 0x0;
486 out
->state
.hangs
= 0x0;
488 /* determine if a GPU reset has occured since the last call */
489 reset_counter
= atomic_read(&adev
->gpu_reset_counter
);
490 /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
491 if (ctx
->reset_counter_query
== reset_counter
)
492 out
->state
.reset_status
= AMDGPU_CTX_NO_RESET
;
494 out
->state
.reset_status
= AMDGPU_CTX_UNKNOWN_RESET
;
495 ctx
->reset_counter_query
= reset_counter
;
497 mutex_unlock(&mgr
->lock
);
501 #define AMDGPU_RAS_COUNTE_DELAY_MS 3000
503 static int amdgpu_ctx_query2(struct amdgpu_device
*adev
,
504 struct amdgpu_fpriv
*fpriv
, uint32_t id
,
505 union drm_amdgpu_ctx_out
*out
)
507 struct amdgpu_ras
*con
= amdgpu_ras_get_context(adev
);
508 struct amdgpu_ctx
*ctx
;
509 struct amdgpu_ctx_mgr
*mgr
;
514 mgr
= &fpriv
->ctx_mgr
;
515 mutex_lock(&mgr
->lock
);
516 ctx
= idr_find(&mgr
->ctx_handles
, id
);
518 mutex_unlock(&mgr
->lock
);
522 out
->state
.flags
= 0x0;
523 out
->state
.hangs
= 0x0;
525 if (ctx
->reset_counter
!= atomic_read(&adev
->gpu_reset_counter
))
526 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_RESET
;
528 if (ctx
->vram_lost_counter
!= atomic_read(&adev
->vram_lost_counter
))
529 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST
;
531 if (atomic_read(&ctx
->guilty
))
532 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_GUILTY
;
534 if (adev
->ras_enabled
&& con
) {
535 /* Return the cached values in O(1),
536 * and schedule delayed work to cache
539 int ce_count
, ue_count
;
541 ce_count
= atomic_read(&con
->ras_ce_count
);
542 ue_count
= atomic_read(&con
->ras_ue_count
);
544 if (ce_count
!= ctx
->ras_counter_ce
) {
545 ctx
->ras_counter_ce
= ce_count
;
546 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE
;
549 if (ue_count
!= ctx
->ras_counter_ue
) {
550 ctx
->ras_counter_ue
= ue_count
;
551 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE
;
554 schedule_delayed_work(&con
->ras_counte_delay_work
,
555 msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS
));
558 mutex_unlock(&mgr
->lock
);
564 static int amdgpu_ctx_stable_pstate(struct amdgpu_device
*adev
,
565 struct amdgpu_fpriv
*fpriv
, uint32_t id
,
566 bool set
, u32
*stable_pstate
)
568 struct amdgpu_ctx
*ctx
;
569 struct amdgpu_ctx_mgr
*mgr
;
575 mgr
= &fpriv
->ctx_mgr
;
576 mutex_lock(&mgr
->lock
);
577 ctx
= idr_find(&mgr
->ctx_handles
, id
);
579 mutex_unlock(&mgr
->lock
);
584 r
= amdgpu_ctx_set_stable_pstate(ctx
, *stable_pstate
);
586 r
= amdgpu_ctx_get_stable_pstate(ctx
, stable_pstate
);
588 mutex_unlock(&mgr
->lock
);
592 int amdgpu_ctx_ioctl(struct drm_device
*dev
, void *data
,
593 struct drm_file
*filp
)
596 uint32_t id
, stable_pstate
;
599 union drm_amdgpu_ctx
*args
= data
;
600 struct amdgpu_device
*adev
= drm_to_adev(dev
);
601 struct amdgpu_fpriv
*fpriv
= filp
->driver_priv
;
603 id
= args
->in
.ctx_id
;
604 priority
= args
->in
.priority
;
606 /* For backwards compatibility reasons, we need to accept
607 * ioctls with garbage in the priority field */
608 if (!amdgpu_ctx_priority_is_valid(priority
))
609 priority
= AMDGPU_CTX_PRIORITY_NORMAL
;
611 switch (args
->in
.op
) {
612 case AMDGPU_CTX_OP_ALLOC_CTX
:
613 r
= amdgpu_ctx_alloc(adev
, fpriv
, filp
, priority
, &id
);
614 args
->out
.alloc
.ctx_id
= id
;
616 case AMDGPU_CTX_OP_FREE_CTX
:
617 r
= amdgpu_ctx_free(fpriv
, id
);
619 case AMDGPU_CTX_OP_QUERY_STATE
:
620 r
= amdgpu_ctx_query(adev
, fpriv
, id
, &args
->out
);
622 case AMDGPU_CTX_OP_QUERY_STATE2
:
623 r
= amdgpu_ctx_query2(adev
, fpriv
, id
, &args
->out
);
625 case AMDGPU_CTX_OP_GET_STABLE_PSTATE
:
628 r
= amdgpu_ctx_stable_pstate(adev
, fpriv
, id
, false, &stable_pstate
);
630 args
->out
.pstate
.flags
= stable_pstate
;
632 case AMDGPU_CTX_OP_SET_STABLE_PSTATE
:
633 if (args
->in
.flags
& ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK
)
635 stable_pstate
= args
->in
.flags
& AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK
;
636 if (stable_pstate
> AMDGPU_CTX_STABLE_PSTATE_PEAK
)
638 r
= amdgpu_ctx_stable_pstate(adev
, fpriv
, id
, true, &stable_pstate
);
647 struct amdgpu_ctx
*amdgpu_ctx_get(struct amdgpu_fpriv
*fpriv
, uint32_t id
)
649 struct amdgpu_ctx
*ctx
;
650 struct amdgpu_ctx_mgr
*mgr
;
655 mgr
= &fpriv
->ctx_mgr
;
657 mutex_lock(&mgr
->lock
);
658 ctx
= idr_find(&mgr
->ctx_handles
, id
);
660 kref_get(&ctx
->refcount
);
661 mutex_unlock(&mgr
->lock
);
665 int amdgpu_ctx_put(struct amdgpu_ctx
*ctx
)
670 kref_put(&ctx
->refcount
, amdgpu_ctx_do_release
);
674 void amdgpu_ctx_add_fence(struct amdgpu_ctx
*ctx
,
675 struct drm_sched_entity
*entity
,
676 struct dma_fence
*fence
, uint64_t *handle
)
678 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
679 uint64_t seq
= centity
->sequence
;
680 struct dma_fence
*other
= NULL
;
683 idx
= seq
& (amdgpu_sched_jobs
- 1);
684 other
= centity
->fences
[idx
];
686 BUG_ON(!dma_fence_is_signaled(other
));
688 dma_fence_get(fence
);
690 spin_lock(&ctx
->ring_lock
);
691 centity
->fences
[idx
] = fence
;
693 spin_unlock(&ctx
->ring_lock
);
695 dma_fence_put(other
);
700 struct dma_fence
*amdgpu_ctx_get_fence(struct amdgpu_ctx
*ctx
,
701 struct drm_sched_entity
*entity
,
704 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
705 struct dma_fence
*fence
;
707 spin_lock(&ctx
->ring_lock
);
710 seq
= centity
->sequence
- 1;
712 if (seq
>= centity
->sequence
) {
713 spin_unlock(&ctx
->ring_lock
);
714 return ERR_PTR(-EINVAL
);
718 if (seq
+ amdgpu_sched_jobs
< centity
->sequence
) {
719 spin_unlock(&ctx
->ring_lock
);
723 fence
= dma_fence_get(centity
->fences
[seq
& (amdgpu_sched_jobs
- 1)]);
724 spin_unlock(&ctx
->ring_lock
);
729 static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx
*ctx
,
730 struct amdgpu_ctx_entity
*aentity
,
734 struct amdgpu_device
*adev
= ctx
->adev
;
735 unsigned int hw_prio
;
736 struct drm_gpu_scheduler
**scheds
= NULL
;
739 /* set sw priority */
740 drm_sched_entity_set_priority(&aentity
->entity
,
741 amdgpu_ctx_to_drm_sched_prio(priority
));
743 /* set hw priority */
744 if (hw_ip
== AMDGPU_HW_IP_COMPUTE
) {
745 hw_prio
= amdgpu_ctx_get_hw_prio(ctx
, hw_ip
);
746 hw_prio
= array_index_nospec(hw_prio
, AMDGPU_RING_PRIO_MAX
);
747 scheds
= adev
->gpu_sched
[hw_ip
][hw_prio
].sched
;
748 num_scheds
= adev
->gpu_sched
[hw_ip
][hw_prio
].num_scheds
;
749 drm_sched_entity_modify_sched(&aentity
->entity
, scheds
,
754 void amdgpu_ctx_priority_override(struct amdgpu_ctx
*ctx
,
760 ctx
->override_priority
= priority
;
762 ctx_prio
= (ctx
->override_priority
== AMDGPU_CTX_PRIORITY_UNSET
) ?
763 ctx
->init_priority
: ctx
->override_priority
;
764 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
) {
765 for (j
= 0; j
< amdgpu_ctx_num_entities
[i
]; ++j
) {
766 if (!ctx
->entities
[i
][j
])
769 amdgpu_ctx_set_entity_priority(ctx
, ctx
->entities
[i
][j
],
775 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx
*ctx
,
776 struct drm_sched_entity
*entity
)
778 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
779 struct dma_fence
*other
;
783 spin_lock(&ctx
->ring_lock
);
784 idx
= centity
->sequence
& (amdgpu_sched_jobs
- 1);
785 other
= dma_fence_get(centity
->fences
[idx
]);
786 spin_unlock(&ctx
->ring_lock
);
791 r
= dma_fence_wait(other
, true);
792 if (r
< 0 && r
!= -ERESTARTSYS
)
793 DRM_ERROR("Error (%ld) waiting for fence!\n", r
);
795 dma_fence_put(other
);
799 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr
*mgr
)
801 mutex_init(&mgr
->lock
);
802 idr_init(&mgr
->ctx_handles
);
805 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr
*mgr
, long timeout
)
807 struct amdgpu_ctx
*ctx
;
811 idp
= &mgr
->ctx_handles
;
813 mutex_lock(&mgr
->lock
);
814 idr_for_each_entry(idp
, ctx
, id
) {
815 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
) {
816 for (j
= 0; j
< amdgpu_ctx_num_entities
[i
]; ++j
) {
817 struct drm_sched_entity
*entity
;
819 if (!ctx
->entities
[i
][j
])
822 entity
= &ctx
->entities
[i
][j
]->entity
;
823 timeout
= drm_sched_entity_flush(entity
, timeout
);
827 mutex_unlock(&mgr
->lock
);
831 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr
*mgr
)
833 struct amdgpu_ctx
*ctx
;
837 idp
= &mgr
->ctx_handles
;
839 idr_for_each_entry(idp
, ctx
, id
) {
840 if (kref_read(&ctx
->refcount
) != 1) {
841 DRM_ERROR("ctx %p is still alive\n", ctx
);
845 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
) {
846 for (j
= 0; j
< amdgpu_ctx_num_entities
[i
]; ++j
) {
847 struct drm_sched_entity
*entity
;
849 if (!ctx
->entities
[i
][j
])
852 entity
= &ctx
->entities
[i
][j
]->entity
;
853 drm_sched_entity_fini(entity
);
859 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr
*mgr
)
861 struct amdgpu_ctx
*ctx
;
865 amdgpu_ctx_mgr_entity_fini(mgr
);
867 idp
= &mgr
->ctx_handles
;
869 idr_for_each_entry(idp
, ctx
, id
) {
870 if (kref_put(&ctx
->refcount
, amdgpu_ctx_fini
) != 1)
871 DRM_ERROR("ctx %p is still alive\n", ctx
);
874 idr_destroy(&mgr
->ctx_handles
);
875 mutex_destroy(&mgr
->lock
);
878 static void amdgpu_ctx_fence_time(struct amdgpu_ctx
*ctx
,
879 struct amdgpu_ctx_entity
*centity
, ktime_t
*total
, ktime_t
*max
)
887 for (i
= 0; i
< amdgpu_sched_jobs
; i
++) {
888 struct dma_fence
*fence
;
889 struct drm_sched_fence
*s_fence
;
891 spin_lock(&ctx
->ring_lock
);
892 fence
= dma_fence_get(centity
->fences
[i
]);
893 spin_unlock(&ctx
->ring_lock
);
896 s_fence
= to_drm_sched_fence(fence
);
897 if (!dma_fence_is_signaled(&s_fence
->scheduled
)) {
898 dma_fence_put(fence
);
901 t1
= s_fence
->scheduled
.timestamp
;
902 if (!ktime_before(t1
, now
)) {
903 dma_fence_put(fence
);
906 if (dma_fence_is_signaled(&s_fence
->finished
) &&
907 s_fence
->finished
.timestamp
< now
)
908 *total
+= ktime_sub(s_fence
->finished
.timestamp
, t1
);
910 *total
+= ktime_sub(now
, t1
);
911 t1
= ktime_sub(now
, t1
);
912 dma_fence_put(fence
);
913 *max
= max(t1
, *max
);
917 ktime_t
amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr
*mgr
, uint32_t hwip
,
918 uint32_t idx
, uint64_t *elapsed
)
921 struct amdgpu_ctx
*ctx
;
923 struct amdgpu_ctx_entity
*centity
;
924 ktime_t total
= 0, max
= 0;
926 if (idx
>= AMDGPU_MAX_ENTITY_NUM
)
928 idp
= &mgr
->ctx_handles
;
929 mutex_lock(&mgr
->lock
);
930 idr_for_each_entry(idp
, ctx
, id
) {
931 ktime_t ttotal
, tmax
;
933 if (!ctx
->entities
[hwip
][idx
])
936 centity
= ctx
->entities
[hwip
][idx
];
937 amdgpu_ctx_fence_time(ctx
, centity
, &ttotal
, &tmax
);
939 /* Harmonic mean approximation diverges for very small
940 * values. If ratio < 0.01% ignore
942 if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax
, ttotal
))
945 total
= ktime_add(total
, ttotal
);
946 max
= ktime_after(tmax
, max
) ? tmax
: max
;
949 mutex_unlock(&mgr
->lock
);