]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
drm/amdkfd: copy memory through gart table
authorPhilip Yang <Philip.Yang@amd.com>
Fri, 7 Feb 2020 22:08:04 +0000 (17:08 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 21 Apr 2021 01:48:23 +0000 (21:48 -0400)
Use sdma linear copy to migrate data between ram and vram. The sdma
linear copy command uses kernel buffer function queue to access system
memory through gart table.

Use reserved gart table window 0 to map system page address, and vram
page address is direct mapping. Use the same kernel buffer function to
fill in gart table mapping, so this is serialized with memory copy by
sdma job submit. We only need wait for the last memory copy sdma fence
for larger buffer migration.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
drivers/gpu/drm/amd/amdkfd/kfd_migrate.h

index d8cec5ebe1d47deac3d2fbe779986cb67f1aa1dc..74b38856cce339b4bf1414c401b7afe65e044226 100644 (file)
 #include "kfd_svm.h"
 #include "kfd_migrate.h"
 
+static uint64_t
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+       return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
+}
+
+static int
+svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
+                    dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+{
+       struct amdgpu_device *adev = ring->adev;
+       struct amdgpu_job *job;
+       unsigned int num_dw, num_bytes;
+       struct dma_fence *fence;
+       uint64_t src_addr, dst_addr;
+       uint64_t pte_flags;
+       void *cpu_addr;
+       int r;
+
+       /* use gart window 0 */
+       *gart_addr = adev->gmc.gart_start;
+
+       num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+       num_bytes = npages * 8;
+
+       r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
+                                    AMDGPU_IB_POOL_DELAYED, &job);
+       if (r)
+               return r;
+
+       src_addr = num_dw * 4;
+       src_addr += job->ibs[0].gpu_addr;
+
+       dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+       amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
+                               dst_addr, num_bytes, false);
+
+       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+       WARN_ON(job->ibs[0].length_dw > num_dw);
+
+       pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+       pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
+       if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+               pte_flags |= AMDGPU_PTE_WRITEABLE;
+       pte_flags |= adev->gart.gart_pte_flags;
+
+       cpu_addr = &job->ibs[0].ptr[num_dw];
+
+       r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
+       if (r)
+               goto error_free;
+
+       r = amdgpu_job_submit(job, &adev->mman.entity,
+                             AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+       if (r)
+               goto error_free;
+
+       dma_fence_put(fence);
+
+       return r;
+
+error_free:
+       amdgpu_job_free(job);
+       return r;
+}
+
+/**
+ * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
+ *
+ * @adev: amdgpu device the sdma ring running
+ * @src: source page address array
+ * @dst: destination page address array
+ * @npages: number of pages to copy
+ * @direction: enum MIGRATION_COPY_DIR
+ * @mfence: output, sdma fence to signal after sdma is done
+ *
+ * ram address uses GART table continuous entries mapping to ram pages,
+ * vram address uses direct mapping of vram pages, which must have npages
+ * number of continuous pages.
+ * GART update and sdma uses same buf copy function ring, sdma is splited to
+ * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
+ * the last sdma finish fence which is returned to check copy memory is done.
+ *
+ * Context: Process context, takes and releases gtt_window_lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+
+static int
+svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
+                            uint64_t *vram, uint64_t npages,
+                            enum MIGRATION_COPY_DIR direction,
+                            struct dma_fence **mfence)
+{
+       const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+       uint64_t gart_s, gart_d;
+       struct dma_fence *next;
+       uint64_t size;
+       int r;
+
+       mutex_lock(&adev->mman.gtt_window_lock);
+
+       while (npages) {
+               size = min(GTT_MAX_PAGES, npages);
+
+               if (direction == FROM_VRAM_TO_RAM) {
+                       gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
+                       r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
+
+               } else if (direction == FROM_RAM_TO_VRAM) {
+                       r = svm_migrate_gart_map(ring, size, sys, &gart_s,
+                                                KFD_IOCTL_SVM_FLAG_GPU_RO);
+                       gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
+               }
+               if (r) {
+                       pr_debug("failed %d to create gart mapping\n", r);
+                       goto out_unlock;
+               }
+
+               r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
+                                      NULL, &next, false, true, false);
+               if (r) {
+                       pr_debug("failed %d to copy memory\n", r);
+                       goto out_unlock;
+               }
+
+               dma_fence_put(*mfence);
+               *mfence = next;
+               npages -= size;
+               if (npages) {
+                       sys += size;
+                       vram += size;
+               }
+       }
+
+out_unlock:
+       mutex_unlock(&adev->mman.gtt_window_lock);
+
+       return r;
+}
+
+/**
+ * svm_migrate_copy_done - wait for memory copy sdma is done
+ *
+ * @adev: amdgpu device the sdma memory copy is executing on
+ * @mfence: migrate fence
+ *
+ * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
+ * operations, this is the last sdma operation fence.
+ *
+ * Context: called after svm_migrate_copy_memory
+ *
+ * Return:
+ * 0           - success
+ * otherwise   - error code from dma fence signal
+ */
+int
+svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
+{
+       int r = 0;
+
+       if (mfence) {
+               r = dma_fence_wait(mfence, false);
+               dma_fence_put(mfence);
+               pr_debug("sdma copy memory fence done\n");
+       }
+
+       return r;
+}
+
 static void svm_migrate_page_free(struct page *page)
 {
 }
index 89392548ec442fe3aae5e82f20e12ebd64c93f1e..df84e4143e25685babcf0b8667f2af20e1a274e0 100644 (file)
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
+enum MIGRATION_COPY_DIR {
+       FROM_RAM_TO_VRAM = 0,
+       FROM_VRAM_TO_RAM
+};
+
 #if defined(CONFIG_DEVICE_PRIVATE)
 int svm_migrate_init(struct amdgpu_device *adev);
 void svm_migrate_fini(struct amdgpu_device *adev);