]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
drm/amdkfd: Fix mm reference in SVM eviction worker
authorFelix Kuehling <Felix.Kuehling@amd.com>
Mon, 8 Aug 2022 21:00:38 +0000 (17:00 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 16 Aug 2022 22:03:23 +0000 (18:03 -0400)
Use the mm reference from the fence. This allows removing the
svm_bo->svms pointer, which was problematic because we cannot assume
that the struct kfd_process containing the svms is still allocated
without holding a refcount on the process.

Use mmget_not_zero to ensure the mm is still valid, and drop the svm_bo
reference if it isn't.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.h

index a67ba8879a56730226cc0ebbdd21a35cce8d68ba..11074cc8c333b274484929dddb0752725e4af24b 100644 (file)
@@ -541,7 +541,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
                kfree(svm_bo);
                return -ESRCH;
        }
-       svm_bo->svms = prange->svms;
        svm_bo->eviction_fence =
                amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
                                           mm,
@@ -3273,7 +3272,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
 static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 {
        struct svm_range_bo *svm_bo;
-       struct kfd_process *p;
        struct mm_struct *mm;
        int r = 0;
 
@@ -3281,13 +3279,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
        if (!svm_bo_ref_unless_zero(svm_bo))
                return; /* svm_bo was freed while eviction was pending */
 
-       /* svm_range_bo_release destroys this worker thread. So during
-        * the lifetime of this thread, kfd_process and mm will be valid.
-        */
-       p = container_of(svm_bo->svms, struct kfd_process, svms);
-       mm = p->mm;
-       if (!mm)
+       if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+               mm = svm_bo->eviction_fence->mm;
+       } else {
+               svm_range_bo_unref(svm_bo);
                return;
+       }
 
        mmap_read_lock(mm);
        spin_lock(&svm_bo->list_lock);
@@ -3305,8 +3302,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 
                mutex_lock(&prange->migrate_mutex);
                do {
-                       r = svm_migrate_vram_to_ram(prange,
-                                               svm_bo->eviction_fence->mm,
+                       r = svm_migrate_vram_to_ram(prange, mm,
                                                KFD_MIGRATE_TRIGGER_TTM_EVICTION);
                } while (!r && prange->actual_loc && --retries);
 
@@ -3324,6 +3320,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
        }
        spin_unlock(&svm_bo->list_lock);
        mmap_read_unlock(mm);
+       mmput(mm);
 
        dma_fence_signal(&svm_bo->eviction_fence->base);
 
index 9156b041ef17519db0094cbc05d8868fda0885e1..cfac13ad06ef0f70e2983bceeb85b59c0427e2fb 100644 (file)
@@ -46,7 +46,6 @@ struct svm_range_bo {
        spinlock_t                      list_lock;
        struct amdgpu_amdkfd_fence      *eviction_fence;
        struct work_struct              eviction_work;
-       struct svm_range_list           *svms;
        uint32_t                        evicting;
        struct work_struct              release_work;
 };