*/
#include <linux/types.h>
+#include <linux/sched/task.h>
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
#include "kfd_priv.h"
#include "kfd_svm.h"
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+
static bool
svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
INIT_LIST_HEAD(&prange->insert_list);
INIT_LIST_HEAD(&prange->deferred_list);
INIT_LIST_HEAD(&prange->child_list);
+ atomic_set(&prange->invalid, 0);
mutex_init(&prange->lock);
svm_range_set_default_attributes(&prange->preferred_loc,
&prange->prefetch_loc,
goto retry_flush_work;
}
+static void svm_range_restore_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info;
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ struct kfd_process *p;
+ struct mm_struct *mm;
+ int evicted_ranges;
+ int invalid;
+ int r;
+
+ svms = container_of(dwork, struct svm_range_list, restore_work);
+ evicted_ranges = atomic_read(&svms->evicted_ranges);
+ if (!evicted_ranges)
+ return;
+
+ pr_debug("restore svm ranges\n");
+
+ /* kfd_process_notifier_release destroys this worker thread. So during
+ * the lifetime of this thread, kfd_process and mm will be valid.
+ */
+ p = container_of(svms, struct kfd_process, svms);
+ process_info = p->kgd_process_info;
+ mm = p->mm;
+ if (!mm)
+ return;
+
+ mutex_lock(&process_info->lock);
+ svm_range_list_lock_and_flush_work(svms, mm);
+ mutex_lock(&svms->lock);
+
+ evicted_ranges = atomic_read(&svms->evicted_ranges);
+
+ list_for_each_entry(prange, &svms->list, list) {
+ invalid = atomic_read(&prange->invalid);
+ if (!invalid)
+ continue;
+
+ pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
+ prange->svms, prange, prange->start, prange->last,
+ invalid);
+
+ r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+ false, true);
+ if (r) {
+ pr_debug("failed %d to map 0x%lx to gpus\n", r,
+ prange->start);
+ goto unlock_out;
+ }
+
+ if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid)
+ goto unlock_out;
+ }
+
+ if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) !=
+ evicted_ranges)
+ goto unlock_out;
+
+ evicted_ranges = 0;
+
+ r = kgd2kfd_resume_mm(mm);
+ if (r) {
+ /* No recovery from this failure. Probably the CP is
+ * hanging. No point trying again.
+ */
+ pr_debug("failed %d to resume KFD\n", r);
+ }
+
+ pr_debug("restore svm ranges successfully\n");
+
+unlock_out:
+ mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+ mutex_unlock(&process_info->lock);
+
+ /* If validation failed, reschedule another attempt */
+ if (evicted_ranges) {
+ pr_debug("reschedule to restore svm range\n");
+ schedule_delayed_work(&svms->restore_work,
+ msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+ }
+}
+
+/**
+ * svm_range_evict - evict svm range
+ *
+ * Stop all queues of the process to ensure GPU doesn't access the memory, then
+ * return to let CPU evict the buffer and proceed CPU pagetable update.
+ *
+ * Don't need use lock to sync cpu pagetable invalidation with GPU execution.
+ * If invalidation happens while restore work is running, restore work will
+ * restart to ensure to get the latest CPU pages mapping to GPU, then start
+ * the queues.
+ */
+static int
+svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
+ unsigned long start, unsigned long last)
+{
+ struct svm_range_list *svms = prange->svms;
+ int evicted_ranges;
+ int r = 0;
+
+ atomic_inc(&prange->invalid);
+ evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
+ if (evicted_ranges != 1)
+ return r;
+
+ pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
+ prange->svms, prange->start, prange->last);
+
+ /* First eviction, stop the queues */
+ r = kgd2kfd_quiesce_mm(mm);
+ if (r)
+ pr_debug("failed to quiesce KFD\n");
+
+ pr_debug("schedule to restore svm %p ranges\n", svms);
+ schedule_delayed_work(&svms->restore_work,
+ msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+
+ return r;
+}
+
static struct svm_range *svm_range_clone(struct svm_range *old)
{
struct svm_range *new;
* svm_range_cpu_invalidate_pagetables - interval notifier callback
*
* MMU range unmap notifier to remove svm ranges
+ *
+ * If GPU vm fault retry is not enabled, evict the svm range, then restore
+ * work will update GPU mapping.
+ * If GPU vm fault retry is enabled, unmap the svm range from GPU, vm fault
+ * will update GPU mapping.
*/
static bool
svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
svm_range_unmap_from_cpu(mni->mm, prange, start, last);
break;
default:
+ svm_range_evict(prange, mni->mm, start, last);
break;
}
svms->objects = RB_ROOT_CACHED;
mutex_init(&svms->lock);
INIT_LIST_HEAD(&svms->list);
+ atomic_set(&svms->evicted_ranges, 0);
+ INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
INIT_LIST_HEAD(&svms->deferred_range_list);
spin_lock_init(&svms->deferred_list_lock);