According to Marek a pipeline sync should be inserted for implicit syncs well.
v2: bump the driver version
Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
if (ret)
return ret;
- return amdgpu_sync_fence(sync, vm->last_update, false);
+ return amdgpu_sync_fence(sync, vm->last_update);
}
static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
- amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
+ amdgpu_sync_fence(sync, bo_va->last_pt_update);
return 0;
}
return ret;
}
- return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update);
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
goto validate_map_fail;
}
- ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
+ ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
dma_fence_put(old);
}
- r = amdgpu_sync_fence(&p->job->sync, fence, true);
+ r = amdgpu_sync_fence(&p->job->sync, fence);
dma_fence_put(fence);
if (r)
return r;
return r;
}
- r = amdgpu_sync_fence(&p->job->sync, fence, true);
+ r = amdgpu_sync_fence(&p->job->sync, fence);
dma_fence_put(fence);
return r;
* - 3.36.0 - Allow reading more status registers on si/cik
* - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
* - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
+ * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 38
+#define KMS_DRIVER_MINOR 39
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
need_ctx_switch = ring->current_ctx != fence_ctx;
if (ring->funcs->emit_pipeline_sync && job &&
- ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) ||
+ ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
(amdgpu_sriov_vf(adev) && need_ctx_switch) ||
amdgpu_vm_need_pipeline_sync(ring, job))) {
need_pipe_sync = true;
int r;
if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
- return amdgpu_sync_fence(sync, ring->vmid_wait, false);
+ return amdgpu_sync_fence(sync, ring->vmid_wait);
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
if (!fences)
return -ENOMEM;
}
- r = amdgpu_sync_fence(sync, &array->base, false);
+ r = amdgpu_sync_fence(sync, &array->base);
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = &array->base;
return r;
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) {
*id = NULL;
- r = amdgpu_sync_fence(sync, tmp, false);
+ r = amdgpu_sync_fence(sync, tmp);
return r;
}
needs_flush = true;
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, fence, false);
+ r = amdgpu_sync_fence(&(*id)->active, fence);
if (r)
return r;
/* Good, we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, fence, false);
+ r = amdgpu_sync_fence(&(*id)->active, fence);
if (r)
return r;
id = idle;
/* Remember this submission as user of the VMID */
- r = amdgpu_sync_fence(&id->active, fence, false);
+ r = amdgpu_sync_fence(&id->active, fence);
if (r)
goto error;
struct amdgpu_job *job = to_amdgpu_job(sched_job);
struct amdgpu_vm *vm = job->vm;
struct dma_fence *fence;
- bool explicit = false;
int r;
- fence = amdgpu_sync_get_fence(&job->sync, &explicit);
- if (fence && explicit) {
- if (drm_sched_dependency_optimized(fence, s_entity)) {
- r = amdgpu_sync_fence(&job->sched_sync, fence, false);
- if (r)
- DRM_ERROR("Error adding fence (%d)\n", r);
- }
+ fence = amdgpu_sync_get_fence(&job->sync);
+ if (fence && drm_sched_dependency_optimized(fence, s_entity)) {
+ r = amdgpu_sync_fence(&job->sched_sync, fence);
+ if (r)
+ DRM_ERROR("Error adding fence (%d)\n", r);
}
while (fence == NULL && vm && !job->vmid) {
if (r)
DRM_ERROR("Error getting VM ID (%d)\n", r);
- fence = amdgpu_sync_get_fence(&job->sync, NULL);
+ fence = amdgpu_sync_get_fence(&job->sync);
}
return fence;
struct amdgpu_sync_entry {
struct hlist_node node;
struct dma_fence *fence;
- bool explicit;
};
static struct kmem_cache *amdgpu_sync_slab;
* Tries to add the fence to an existing hash entry. Returns true when an entry
* was found, false otherwise.
*/
-static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
- bool explicit)
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
{
struct amdgpu_sync_entry *e;
continue;
amdgpu_sync_keep_later(&e->fence, f);
-
- /* Preserve eplicit flag to not loose pipe line sync */
- e->explicit |= explicit;
-
return true;
}
return false;
*
* @sync: sync object to add fence to
* @f: fence to sync to
- * @explicit: if this is an explicit dependency
*
* Add the fence to the sync object.
*/
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
- bool explicit)
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
{
struct amdgpu_sync_entry *e;
if (!f)
return 0;
- if (amdgpu_sync_add_later(sync, f, explicit))
+ if (amdgpu_sync_add_later(sync, f))
return 0;
e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
if (!e)
return -ENOMEM;
- e->explicit = explicit;
-
hash_add(sync->fences, &e->node, f->context);
e->fence = dma_fence_get(f);
return 0;
return 0;
amdgpu_sync_keep_later(&sync->last_vm_update, fence);
- return amdgpu_sync_fence(sync, fence, false);
+ return amdgpu_sync_fence(sync, fence);
}
/**
/* always sync to the exclusive fence */
f = dma_resv_get_excl(resv);
- r = amdgpu_sync_fence(sync, f, false);
+ r = amdgpu_sync_fence(sync, f);
flist = dma_resv_get_list(resv);
if (!flist || r)
/* Always sync to moves, no matter what */
if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
- r = amdgpu_sync_fence(sync, f, false);
+ r = amdgpu_sync_fence(sync, f);
if (r)
break;
}
WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
"Adding eviction fence to sync obj");
- r = amdgpu_sync_fence(sync, f, false);
+ r = amdgpu_sync_fence(sync, f);
if (r)
break;
}
* amdgpu_sync_get_fence - get the next fence from the sync object
*
* @sync: sync object to use
- * @explicit: true if the next fence is explicit
*
* Get and removes the next fence from the sync object not signaled yet.
*/
-struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit)
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
hash_for_each_safe(sync->fences, i, tmp, e, node) {
f = e->fence;
- if (explicit)
- *explicit = e->explicit;
hash_del(&e->node);
kmem_cache_free(amdgpu_sync_slab, e);
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
- r = amdgpu_sync_fence(clone, f, e->explicit);
+ r = amdgpu_sync_fence(clone, f);
if (r)
return r;
} else {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
- bool explicit);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
-struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync,
- bool *explicit);
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
int r;
/* Wait for PD/PT moves to be completed */
- r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving, false);
+ r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving);
if (r)
return r;