]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drm/amdgpu: remove unused display_vblank_wait interface
[mirror_ubuntu-jammy-kernel.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_vm.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
f54d1867 28#include <linux/dma-fence-array.h>
a9f87f64 29#include <linux/interval_tree_generic.h>
02208441 30#include <linux/idr.h>
d38ceaf9
AD
31#include <drm/drmP.h>
32#include <drm/amdgpu_drm.h>
33#include "amdgpu.h"
34#include "amdgpu_trace.h"
35
36/*
37 * GPUVM
38 * GPUVM is similar to the legacy gart on older asics, however
39 * rather than there being a single global gart table
40 * for the entire GPU, there are multiple VM page tables active
41 * at any given time. The VM page tables can contain a mix
42 * vram pages and system memory pages and system memory pages
43 * can be mapped as snooped (cached system pages) or unsnooped
44 * (uncached system pages).
45 * Each VM has an ID associated with it and there is a page table
46 * associated with each VMID. When execting a command buffer,
47 * the kernel tells the the ring what VMID to use for that command
48 * buffer. VMIDs are allocated dynamically as commands are submitted.
49 * The userspace drivers maintain their own address space and the kernel
50 * sets up their pages tables accordingly when they submit their
51 * command buffers and a VMID is assigned.
52 * Cayman/Trinity support up to 8 active VMs at any given time;
53 * SI supports 16.
54 */
55
a9f87f64
CK
56#define START(node) ((node)->start)
57#define LAST(node) ((node)->last)
58
59INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
60 START, LAST, static, amdgpu_vm_it)
61
62#undef START
63#undef LAST
64
f4833c4f
HK
65/* Local structure. Encapsulate some VM table update parameters to reduce
66 * the number of function parameters
67 */
29efc4f5 68struct amdgpu_pte_update_params {
27c5f36f
CK
69 /* amdgpu device we do this update for */
70 struct amdgpu_device *adev;
49ac8a24
CK
71 /* optional amdgpu_vm we do this update for */
72 struct amdgpu_vm *vm;
f4833c4f
HK
73 /* address where to copy page table entries from */
74 uint64_t src;
f4833c4f
HK
75 /* indirect buffer to fill with commands */
76 struct amdgpu_ib *ib;
afef8b8f 77 /* Function which actually does the update */
373ac645
CK
78 void (*func)(struct amdgpu_pte_update_params *params,
79 struct amdgpu_bo *bo, uint64_t pe,
afef8b8f 80 uint64_t addr, unsigned count, uint32_t incr,
6b777607 81 uint64_t flags);
b4d42511
HK
82 /* The next two are used during VM update by CPU
83 * DMA addresses to use for mapping
84 * Kernel pointer of PD/PT BO that needs to be updated
85 */
86 dma_addr_t *pages_addr;
87 void *kptr;
f4833c4f
HK
88};
89
284710fa
CK
90/* Helper to disable partial resident texture feature from a fence callback */
91struct amdgpu_prt_cb {
92 struct amdgpu_device *adev;
93 struct dma_fence_cb cb;
94};
95
50783147
CK
96/**
97 * amdgpu_vm_level_shift - return the addr shift for each level
98 *
99 * @adev: amdgpu_device pointer
100 *
101 * Returns the number of bits the pfn needs to be right shifted for a level.
102 */
103static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
104 unsigned level)
105{
196f7489
CZ
106 unsigned shift = 0xff;
107
108 switch (level) {
109 case AMDGPU_VM_PDB2:
110 case AMDGPU_VM_PDB1:
111 case AMDGPU_VM_PDB0:
112 shift = 9 * (AMDGPU_VM_PDB0 - level) +
50783147 113 adev->vm_manager.block_size;
196f7489
CZ
114 break;
115 case AMDGPU_VM_PTB:
116 shift = 0;
117 break;
118 default:
119 dev_err(adev->dev, "the level%d isn't supported.\n", level);
120 }
121
122 return shift;
50783147
CK
123}
124
d38ceaf9 125/**
72a7ec5c 126 * amdgpu_vm_num_entries - return the number of entries in a PD/PT
d38ceaf9
AD
127 *
128 * @adev: amdgpu_device pointer
129 *
72a7ec5c 130 * Calculate the number of entries in a page directory or page table.
d38ceaf9 131 */
72a7ec5c
CK
132static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
133 unsigned level)
d38ceaf9 134{
196f7489
CZ
135 unsigned shift = amdgpu_vm_level_shift(adev,
136 adev->vm_manager.root_level);
0410c5e5 137
196f7489 138 if (level == adev->vm_manager.root_level)
72a7ec5c 139 /* For the root directory */
0410c5e5 140 return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
196f7489 141 else if (level != AMDGPU_VM_PTB)
0410c5e5
CK
142 /* Everything in between */
143 return 512;
144 else
72a7ec5c 145 /* For the page tables on the leaves */
36b32a68 146 return AMDGPU_VM_PTE_COUNT(adev);
d38ceaf9
AD
147}
148
149/**
72a7ec5c 150 * amdgpu_vm_bo_size - returns the size of the BOs in bytes
d38ceaf9
AD
151 *
152 * @adev: amdgpu_device pointer
153 *
72a7ec5c 154 * Calculate the size of the BO for a page directory or page table in bytes.
d38ceaf9 155 */
72a7ec5c 156static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
d38ceaf9 157{
72a7ec5c 158 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
d38ceaf9
AD
159}
160
161/**
56467ebf 162 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
d38ceaf9
AD
163 *
164 * @vm: vm providing the BOs
3c0eea6c 165 * @validated: head of validation list
56467ebf 166 * @entry: entry to add
d38ceaf9
AD
167 *
168 * Add the page directory to the list of BOs to
56467ebf 169 * validate for command submission.
d38ceaf9 170 */
56467ebf
CK
171void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
172 struct list_head *validated,
173 struct amdgpu_bo_list_entry *entry)
d38ceaf9 174{
3f3333f8 175 entry->robj = vm->root.base.bo;
56467ebf 176 entry->priority = 0;
67003a15 177 entry->tv.bo = &entry->robj->tbo;
56467ebf 178 entry->tv.shared = true;
2f568dbd 179 entry->user_pages = NULL;
56467ebf
CK
180 list_add(&entry->tv.head, validated);
181}
d38ceaf9 182
670fecc8 183/**
f7da30d9 184 * amdgpu_vm_validate_pt_bos - validate the page table BOs
670fecc8 185 *
5a712a87 186 * @adev: amdgpu device pointer
56467ebf 187 * @vm: vm providing the BOs
670fecc8
CK
188 * @validate: callback to do the validation
189 * @param: parameter for the validation callback
190 *
191 * Validate the page table BOs on command submission if neccessary.
192 */
f7da30d9
CK
193int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
194 int (*validate)(void *p, struct amdgpu_bo *bo),
195 void *param)
670fecc8 196{
3f3333f8 197 struct ttm_bo_global *glob = adev->mman.bdev.glob;
670fecc8
CK
198 int r;
199
3f3333f8
CK
200 spin_lock(&vm->status_lock);
201 while (!list_empty(&vm->evicted)) {
202 struct amdgpu_vm_bo_base *bo_base;
203 struct amdgpu_bo *bo;
670fecc8 204
3f3333f8
CK
205 bo_base = list_first_entry(&vm->evicted,
206 struct amdgpu_vm_bo_base,
207 vm_status);
208 spin_unlock(&vm->status_lock);
670fecc8 209
3f3333f8
CK
210 bo = bo_base->bo;
211 BUG_ON(!bo);
212 if (bo->parent) {
213 r = validate(param, bo);
214 if (r)
215 return r;
670fecc8 216
3f3333f8
CK
217 spin_lock(&glob->lru_lock);
218 ttm_bo_move_to_lru_tail(&bo->tbo);
219 if (bo->shadow)
220 ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
221 spin_unlock(&glob->lru_lock);
222 }
670fecc8 223
73fb16e7
CK
224 if (bo->tbo.type == ttm_bo_type_kernel &&
225 vm->use_cpu_for_update) {
3f3333f8
CK
226 r = amdgpu_bo_kmap(bo, NULL);
227 if (r)
228 return r;
229 }
b6369225 230
3f3333f8 231 spin_lock(&vm->status_lock);
73fb16e7
CK
232 if (bo->tbo.type != ttm_bo_type_kernel)
233 list_move(&bo_base->vm_status, &vm->moved);
234 else
235 list_move(&bo_base->vm_status, &vm->relocated);
670fecc8 236 }
3f3333f8 237 spin_unlock(&vm->status_lock);
670fecc8 238
34d7be5d 239 return 0;
670fecc8
CK
240}
241
56467ebf 242/**
34d7be5d 243 * amdgpu_vm_ready - check VM is ready for updates
56467ebf 244 *
34d7be5d 245 * @vm: VM to check
d38ceaf9 246 *
34d7be5d 247 * Check if all VM PDs/PTs are ready for updates
d38ceaf9 248 */
3f3333f8 249bool amdgpu_vm_ready(struct amdgpu_vm *vm)
d38ceaf9 250{
3f3333f8 251 bool ready;
d38ceaf9 252
3f3333f8
CK
253 spin_lock(&vm->status_lock);
254 ready = list_empty(&vm->evicted);
255 spin_unlock(&vm->status_lock);
5a712a87 256
3f3333f8 257 return ready;
d711e139
CK
258}
259
13307f7e
CK
260/**
261 * amdgpu_vm_clear_bo - initially clear the PDs/PTs
262 *
263 * @adev: amdgpu_device pointer
264 * @bo: BO to clear
265 * @level: level this BO is at
266 *
267 * Root PD needs to be reserved when calling this.
268 */
269static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
4584312d
CK
270 struct amdgpu_vm *vm, struct amdgpu_bo *bo,
271 unsigned level, bool pte_support_ats)
13307f7e
CK
272{
273 struct ttm_operation_ctx ctx = { true, false };
274 struct dma_fence *fence = NULL;
4584312d 275 unsigned entries, ats_entries;
13307f7e
CK
276 struct amdgpu_ring *ring;
277 struct amdgpu_job *job;
4584312d 278 uint64_t addr;
13307f7e
CK
279 int r;
280
4584312d
CK
281 addr = amdgpu_bo_gpu_offset(bo);
282 entries = amdgpu_bo_size(bo) / 8;
283
284 if (pte_support_ats) {
285 if (level == adev->vm_manager.root_level) {
286 ats_entries = amdgpu_vm_level_shift(adev, level);
287 ats_entries += AMDGPU_GPU_PAGE_SHIFT;
288 ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
289 ats_entries = min(ats_entries, entries);
290 entries -= ats_entries;
291 } else {
292 ats_entries = entries;
293 entries = 0;
294 }
13307f7e 295 } else {
4584312d 296 ats_entries = 0;
13307f7e
CK
297 }
298
299 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
300
301 r = reservation_object_reserve_shared(bo->tbo.resv);
302 if (r)
303 return r;
304
305 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
306 if (r)
307 goto error;
308
13307f7e
CK
309 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
310 if (r)
311 goto error;
312
4584312d
CK
313 if (ats_entries) {
314 uint64_t ats_value;
315
316 ats_value = AMDGPU_PTE_DEFAULT_ATC;
317 if (level != AMDGPU_VM_PTB)
318 ats_value |= AMDGPU_PDE_PTE;
319
320 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
321 ats_entries, 0, ats_value);
322 addr += ats_entries * 8;
323 }
324
325 if (entries)
326 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
327 entries, 0, 0);
328
13307f7e
CK
329 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
330
331 WARN_ON(job->ibs[0].length_dw > 64);
332 r = amdgpu_job_submit(job, ring, &vm->entity,
333 AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
334 if (r)
335 goto error_free;
336
337 amdgpu_bo_fence(bo, fence, true);
338 dma_fence_put(fence);
339 return 0;
340
341error_free:
342 amdgpu_job_free(job);
343
344error:
345 return r;
346}
347
d711e139 348/**
f566ceb1
CK
349 * amdgpu_vm_alloc_levels - allocate the PD/PT levels
350 *
351 * @adev: amdgpu_device pointer
352 * @vm: requested vm
353 * @saddr: start of the address range
354 * @eaddr: end of the address range
355 *
356 * Make sure the page directories and page tables are allocated
357 */
358static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
359 struct amdgpu_vm *vm,
360 struct amdgpu_vm_pt *parent,
361 uint64_t saddr, uint64_t eaddr,
4584312d 362 unsigned level, bool ats)
f566ceb1 363{
50783147 364 unsigned shift = amdgpu_vm_level_shift(adev, level);
f566ceb1 365 unsigned pt_idx, from, to;
3c824172 366 u64 flags;
13307f7e 367 int r;
f566ceb1
CK
368
369 if (!parent->entries) {
370 unsigned num_entries = amdgpu_vm_num_entries(adev, level);
371
2098105e
MH
372 parent->entries = kvmalloc_array(num_entries,
373 sizeof(struct amdgpu_vm_pt),
374 GFP_KERNEL | __GFP_ZERO);
f566ceb1
CK
375 if (!parent->entries)
376 return -ENOMEM;
377 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
378 }
379
1866bac8
FK
380 from = saddr >> shift;
381 to = eaddr >> shift;
382 if (from >= amdgpu_vm_num_entries(adev, level) ||
383 to >= amdgpu_vm_num_entries(adev, level))
384 return -EINVAL;
f566ceb1 385
f566ceb1 386 ++level;
1866bac8
FK
387 saddr = saddr & ((1 << shift) - 1);
388 eaddr = eaddr & ((1 << shift) - 1);
f566ceb1 389
13307f7e 390 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
3c824172
HK
391 if (vm->use_cpu_for_update)
392 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
393 else
394 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
395 AMDGPU_GEM_CREATE_SHADOW);
396
f566ceb1
CK
397 /* walk over the address space and allocate the page tables */
398 for (pt_idx = from; pt_idx <= to; ++pt_idx) {
3f3333f8 399 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
f566ceb1
CK
400 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
401 struct amdgpu_bo *pt;
402
3f3333f8 403 if (!entry->base.bo) {
f566ceb1
CK
404 r = amdgpu_bo_create(adev,
405 amdgpu_vm_bo_size(adev, level),
406 AMDGPU_GPU_PAGE_SIZE, true,
13307f7e 407 AMDGPU_GEM_DOMAIN_VRAM, flags,
8febe617 408 NULL, resv, &pt);
f566ceb1
CK
409 if (r)
410 return r;
411
4584312d 412 r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
13307f7e
CK
413 if (r) {
414 amdgpu_bo_unref(&pt);
415 return r;
416 }
417
0a096fb6
CK
418 if (vm->use_cpu_for_update) {
419 r = amdgpu_bo_kmap(pt, NULL);
420 if (r) {
421 amdgpu_bo_unref(&pt);
422 return r;
423 }
424 }
425
f566ceb1
CK
426 /* Keep a reference to the root directory to avoid
427 * freeing them up in the wrong order.
428 */
0f2fc435 429 pt->parent = amdgpu_bo_ref(parent->base.bo);
f566ceb1 430
3f3333f8
CK
431 entry->base.vm = vm;
432 entry->base.bo = pt;
433 list_add_tail(&entry->base.bo_list, &pt->va);
ea09729c
CK
434 spin_lock(&vm->status_lock);
435 list_add(&entry->base.vm_status, &vm->relocated);
436 spin_unlock(&vm->status_lock);
f566ceb1
CK
437 }
438
196f7489 439 if (level < AMDGPU_VM_PTB) {
1866bac8
FK
440 uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
441 uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
442 ((1 << shift) - 1);
443 r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
4584312d 444 sub_eaddr, level, ats);
f566ceb1
CK
445 if (r)
446 return r;
447 }
448 }
449
450 return 0;
451}
452
663e4577
CK
453/**
454 * amdgpu_vm_alloc_pts - Allocate page tables.
455 *
456 * @adev: amdgpu_device pointer
457 * @vm: VM to allocate page tables for
458 * @saddr: Start address which needs to be allocated
459 * @size: Size from start address we need.
460 *
461 * Make sure the page tables are allocated.
462 */
463int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
464 struct amdgpu_vm *vm,
465 uint64_t saddr, uint64_t size)
466{
663e4577 467 uint64_t eaddr;
4584312d 468 bool ats = false;
663e4577
CK
469
470 /* validate the parameters */
471 if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
472 return -EINVAL;
473
474 eaddr = saddr + size - 1;
4584312d
CK
475
476 if (vm->pte_support_ats)
477 ats = saddr < AMDGPU_VA_HOLE_START;
663e4577
CK
478
479 saddr /= AMDGPU_GPU_PAGE_SIZE;
480 eaddr /= AMDGPU_GPU_PAGE_SIZE;
481
4584312d
CK
482 if (eaddr >= adev->vm_manager.max_pfn) {
483 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
484 eaddr, adev->vm_manager.max_pfn);
485 return -EINVAL;
486 }
487
196f7489 488 return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
4584312d 489 adev->vm_manager.root_level, ats);
663e4577
CK
490}
491
e59c0205
AX
492/**
493 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
494 *
495 * @adev: amdgpu_device pointer
496 */
497void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
93dcc37d 498{
a1255107 499 const struct amdgpu_ip_block *ip_block;
e59c0205
AX
500 bool has_compute_vm_bug;
501 struct amdgpu_ring *ring;
502 int i;
93dcc37d 503
e59c0205 504 has_compute_vm_bug = false;
93dcc37d 505
2990a1fc 506 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
e59c0205
AX
507 if (ip_block) {
508 /* Compute has a VM bug for GFX version < 7.
509 Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
510 if (ip_block->version->major <= 7)
511 has_compute_vm_bug = true;
512 else if (ip_block->version->major == 8)
513 if (adev->gfx.mec_fw_version < 673)
514 has_compute_vm_bug = true;
515 }
93dcc37d 516
e59c0205
AX
517 for (i = 0; i < adev->num_rings; i++) {
518 ring = adev->rings[i];
519 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
520 /* only compute rings */
521 ring->has_compute_vm_bug = has_compute_vm_bug;
93dcc37d 522 else
e59c0205 523 ring->has_compute_vm_bug = false;
93dcc37d 524 }
93dcc37d
AD
525}
526
b9bf33d5
CZ
527bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
528 struct amdgpu_job *job)
e60f8db5 529{
b9bf33d5
CZ
530 struct amdgpu_device *adev = ring->adev;
531 unsigned vmhub = ring->funcs->vmhub;
620f774f
CK
532 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
533 struct amdgpu_vmid *id;
b9bf33d5 534 bool gds_switch_needed;
e59c0205 535 bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
b9bf33d5 536
c4f46f22 537 if (job->vmid == 0)
b9bf33d5 538 return false;
c4f46f22 539 id = &id_mgr->ids[job->vmid];
b9bf33d5
CZ
540 gds_switch_needed = ring->funcs->emit_gds_switch && (
541 id->gds_base != job->gds_base ||
542 id->gds_size != job->gds_size ||
543 id->gws_base != job->gws_base ||
544 id->gws_size != job->gws_size ||
545 id->oa_base != job->oa_base ||
546 id->oa_size != job->oa_size);
e60f8db5 547
620f774f 548 if (amdgpu_vmid_had_gpu_reset(adev, id))
b9bf33d5 549 return true;
e60f8db5 550
bb37b67d 551 return vm_flush_needed || gds_switch_needed;
b9bf33d5
CZ
552}
553
9a4b7d4c
HK
554static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
555{
770d13b1 556 return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
e60f8db5
AX
557}
558
d38ceaf9
AD
559/**
560 * amdgpu_vm_flush - hardware flush the vm
561 *
562 * @ring: ring to use for flush
c4f46f22 563 * @vmid: vmid number to use
4ff37a83 564 * @pd_addr: address of the page directory
d38ceaf9 565 *
4ff37a83 566 * Emit a VM flush when it is necessary.
d38ceaf9 567 */
8fdf074f 568int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
d38ceaf9 569{
971fe9a9 570 struct amdgpu_device *adev = ring->adev;
7645670d 571 unsigned vmhub = ring->funcs->vmhub;
620f774f 572 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
c4f46f22 573 struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
d564a06e 574 bool gds_switch_needed = ring->funcs->emit_gds_switch && (
fd53be30
CZ
575 id->gds_base != job->gds_base ||
576 id->gds_size != job->gds_size ||
577 id->gws_base != job->gws_base ||
578 id->gws_size != job->gws_size ||
579 id->oa_base != job->oa_base ||
580 id->oa_size != job->oa_size);
de37e68a 581 bool vm_flush_needed = job->vm_needs_flush;
c0e51931 582 unsigned patch_offset = 0;
41d9eb2c 583 int r;
d564a06e 584
620f774f 585 if (amdgpu_vmid_had_gpu_reset(adev, id)) {
f7d015b9
CK
586 gds_switch_needed = true;
587 vm_flush_needed = true;
588 }
971fe9a9 589
8fdf074f 590 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
f7d015b9 591 return 0;
41d9eb2c 592
c0e51931
CK
593 if (ring->funcs->init_cond_exec)
594 patch_offset = amdgpu_ring_init_cond_exec(ring);
41d9eb2c 595
8fdf074f
ML
596 if (need_pipe_sync)
597 amdgpu_ring_emit_pipeline_sync(ring);
598
f7d015b9 599 if (ring->funcs->emit_vm_flush && vm_flush_needed) {
c0e51931 600 struct dma_fence *fence;
41d9eb2c 601
c4f46f22 602 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
5a4633c4
CK
603 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->pasid,
604 job->vm_pd_addr);
e9d672b2 605
c0e51931
CK
606 r = amdgpu_fence_emit(ring, &fence);
607 if (r)
608 return r;
e9d672b2 609
7645670d 610 mutex_lock(&id_mgr->lock);
c0e51931
CK
611 dma_fence_put(id->last_flush);
612 id->last_flush = fence;
bea39672 613 id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
7645670d 614 mutex_unlock(&id_mgr->lock);
c0e51931 615 }
e9d672b2 616
7c4378f4 617 if (ring->funcs->emit_gds_switch && gds_switch_needed) {
c0e51931
CK
618 id->gds_base = job->gds_base;
619 id->gds_size = job->gds_size;
620 id->gws_base = job->gws_base;
621 id->gws_size = job->gws_size;
622 id->oa_base = job->oa_base;
623 id->oa_size = job->oa_size;
c4f46f22 624 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
c0e51931
CK
625 job->gds_size, job->gws_base,
626 job->gws_size, job->oa_base,
627 job->oa_size);
628 }
629
630 if (ring->funcs->patch_cond_exec)
631 amdgpu_ring_patch_cond_exec(ring, patch_offset);
632
633 /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
634 if (ring->funcs->emit_switch_buffer) {
635 amdgpu_ring_emit_switch_buffer(ring);
636 amdgpu_ring_emit_switch_buffer(ring);
e9d672b2 637 }
41d9eb2c 638 return 0;
971fe9a9
CK
639}
640
d38ceaf9
AD
641/**
642 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
643 *
644 * @vm: requested vm
645 * @bo: requested buffer object
646 *
8843dbbb 647 * Find @bo inside the requested vm.
d38ceaf9
AD
648 * Search inside the @bos vm list for the requested vm
649 * Returns the found bo_va or NULL if none is found
650 *
651 * Object has to be reserved!
652 */
653struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
654 struct amdgpu_bo *bo)
655{
656 struct amdgpu_bo_va *bo_va;
657
ec681545
CK
658 list_for_each_entry(bo_va, &bo->va, base.bo_list) {
659 if (bo_va->base.vm == vm) {
d38ceaf9
AD
660 return bo_va;
661 }
662 }
663 return NULL;
664}
665
666/**
afef8b8f 667 * amdgpu_vm_do_set_ptes - helper to call the right asic function
d38ceaf9 668 *
29efc4f5 669 * @params: see amdgpu_pte_update_params definition
373ac645 670 * @bo: PD/PT to update
d38ceaf9
AD
671 * @pe: addr of the page entry
672 * @addr: dst addr to write into pe
673 * @count: number of page entries to update
674 * @incr: increase next addr by incr bytes
675 * @flags: hw access flags
d38ceaf9
AD
676 *
677 * Traces the parameters and calls the right asic functions
678 * to setup the page table using the DMA.
679 */
afef8b8f 680static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
373ac645 681 struct amdgpu_bo *bo,
afef8b8f
CK
682 uint64_t pe, uint64_t addr,
683 unsigned count, uint32_t incr,
6b777607 684 uint64_t flags)
d38ceaf9 685{
373ac645 686 pe += amdgpu_bo_gpu_offset(bo);
ec2f05f0 687 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
d38ceaf9 688
afef8b8f 689 if (count < 3) {
de9ea7bd
CK
690 amdgpu_vm_write_pte(params->adev, params->ib, pe,
691 addr | flags, count, incr);
d38ceaf9
AD
692
693 } else {
27c5f36f 694 amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
d38ceaf9
AD
695 count, incr, flags);
696 }
697}
698
afef8b8f
CK
699/**
700 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
701 *
702 * @params: see amdgpu_pte_update_params definition
373ac645 703 * @bo: PD/PT to update
afef8b8f
CK
704 * @pe: addr of the page entry
705 * @addr: dst addr to write into pe
706 * @count: number of page entries to update
707 * @incr: increase next addr by incr bytes
708 * @flags: hw access flags
709 *
710 * Traces the parameters and calls the DMA function to copy the PTEs.
711 */
712static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
373ac645 713 struct amdgpu_bo *bo,
afef8b8f
CK
714 uint64_t pe, uint64_t addr,
715 unsigned count, uint32_t incr,
6b777607 716 uint64_t flags)
afef8b8f 717{
ec2f05f0 718 uint64_t src = (params->src + (addr >> 12) * 8);
afef8b8f 719
373ac645 720 pe += amdgpu_bo_gpu_offset(bo);
ec2f05f0
CK
721 trace_amdgpu_vm_copy_ptes(pe, src, count);
722
723 amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
afef8b8f
CK
724}
725
d38ceaf9 726/**
b07c9d2a 727 * amdgpu_vm_map_gart - Resolve gart mapping of addr
d38ceaf9 728 *
b07c9d2a 729 * @pages_addr: optional DMA address to use for lookup
d38ceaf9
AD
730 * @addr: the unmapped addr
731 *
732 * Look up the physical address of the page that the pte resolves
b07c9d2a 733 * to and return the pointer for the page table entry.
d38ceaf9 734 */
de9ea7bd 735static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
d38ceaf9
AD
736{
737 uint64_t result;
738
de9ea7bd
CK
739 /* page table offset */
740 result = pages_addr[addr >> PAGE_SHIFT];
b07c9d2a 741
de9ea7bd
CK
742 /* in case cpu page size != gpu page size*/
743 result |= addr & (~PAGE_MASK);
d38ceaf9 744
b07c9d2a 745 result &= 0xFFFFFFFFFFFFF000ULL;
d38ceaf9
AD
746
747 return result;
748}
749
3c824172
HK
750/**
751 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
752 *
753 * @params: see amdgpu_pte_update_params definition
373ac645 754 * @bo: PD/PT to update
3c824172
HK
755 * @pe: kmap addr of the page entry
756 * @addr: dst addr to write into pe
757 * @count: number of page entries to update
758 * @incr: increase next addr by incr bytes
759 * @flags: hw access flags
760 *
761 * Write count number of PT/PD entries directly.
762 */
763static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
373ac645 764 struct amdgpu_bo *bo,
3c824172
HK
765 uint64_t pe, uint64_t addr,
766 unsigned count, uint32_t incr,
767 uint64_t flags)
768{
769 unsigned int i;
b4d42511 770 uint64_t value;
3c824172 771
373ac645
CK
772 pe += (unsigned long)amdgpu_bo_kptr(bo);
773
03918b36
CK
774 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
775
3c824172 776 for (i = 0; i < count; i++) {
b4d42511
HK
777 value = params->pages_addr ?
778 amdgpu_vm_map_gart(params->pages_addr, addr) :
779 addr;
132f34e4
CK
780 amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
781 i, value, flags);
3c824172
HK
782 addr += incr;
783 }
3c824172
HK
784}
785
a33cab7a
CK
786static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
787 void *owner)
3c824172
HK
788{
789 struct amdgpu_sync sync;
790 int r;
791
792 amdgpu_sync_create(&sync);
177ae09b 793 amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
3c824172
HK
794 r = amdgpu_sync_wait(&sync, true);
795 amdgpu_sync_free(&sync);
796
797 return r;
798}
799
f8991bab 800/*
6989f246 801 * amdgpu_vm_update_pde - update a single level in the hierarchy
f8991bab 802 *
6989f246 803 * @param: parameters for the update
f8991bab 804 * @vm: requested vm
194d2161 805 * @parent: parent directory
6989f246 806 * @entry: entry to update
f8991bab 807 *
6989f246 808 * Makes sure the requested entry in parent is up to date.
f8991bab 809 */
6989f246
CK
810static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
811 struct amdgpu_vm *vm,
812 struct amdgpu_vm_pt *parent,
813 struct amdgpu_vm_pt *entry)
d38ceaf9 814{
373ac645 815 struct amdgpu_bo *bo = parent->base.bo, *pbo;
3de676d8
CK
816 uint64_t pde, pt, flags;
817 unsigned level;
d5fc5e82 818
6989f246
CK
819 /* Don't update huge pages here */
820 if (entry->huge)
821 return;
d38ceaf9 822
373ac645 823 for (level = 0, pbo = bo->parent; pbo; ++level)
3de676d8
CK
824 pbo = pbo->parent;
825
196f7489 826 level += params->adev->vm_manager.root_level;
373ac645 827 pt = amdgpu_bo_gpu_offset(entry->base.bo);
3de676d8 828 flags = AMDGPU_PTE_VALID;
132f34e4 829 amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
373ac645
CK
830 pde = (entry - parent->entries) * 8;
831 if (bo->shadow)
832 params->func(params, bo->shadow, pde, pt, 1, 0, flags);
833 params->func(params, bo, pde, pt, 1, 0, flags);
d38ceaf9
AD
834}
835
92456b93
CK
836/*
837 * amdgpu_vm_invalidate_level - mark all PD levels as invalid
838 *
839 * @parent: parent PD
840 *
841 * Mark all PD level as invalid after an error.
842 */
8f19cd78
CK
843static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
844 struct amdgpu_vm *vm,
845 struct amdgpu_vm_pt *parent,
846 unsigned level)
92456b93 847{
8f19cd78 848 unsigned pt_idx, num_entries;
92456b93
CK
849
850 /*
851 * Recurse into the subdirectories. This recursion is harmless because
852 * we only have a maximum of 5 layers.
853 */
8f19cd78
CK
854 num_entries = amdgpu_vm_num_entries(adev, level);
855 for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
92456b93
CK
856 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
857
3f3333f8 858 if (!entry->base.bo)
92456b93
CK
859 continue;
860
ea09729c 861 spin_lock(&vm->status_lock);
481c2e94
CK
862 if (list_empty(&entry->base.vm_status))
863 list_add(&entry->base.vm_status, &vm->relocated);
ea09729c 864 spin_unlock(&vm->status_lock);
8f19cd78 865 amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
92456b93
CK
866 }
867}
868
194d2161
CK
869/*
870 * amdgpu_vm_update_directories - make sure that all directories are valid
871 *
872 * @adev: amdgpu_device pointer
873 * @vm: requested vm
874 *
875 * Makes sure all directories are up to date.
876 * Returns 0 for success, error for failure.
877 */
878int amdgpu_vm_update_directories(struct amdgpu_device *adev,
879 struct amdgpu_vm *vm)
880{
6989f246
CK
881 struct amdgpu_pte_update_params params;
882 struct amdgpu_job *job;
883 unsigned ndw = 0;
78aa02c7 884 int r = 0;
92456b93 885
6989f246
CK
886 if (list_empty(&vm->relocated))
887 return 0;
888
889restart:
890 memset(&params, 0, sizeof(params));
891 params.adev = adev;
892
893 if (vm->use_cpu_for_update) {
894 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
895 if (unlikely(r))
896 return r;
897
898 params.func = amdgpu_vm_cpu_set_ptes;
899 } else {
900 ndw = 512 * 8;
901 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
902 if (r)
903 return r;
904
905 params.ib = &job->ibs[0];
906 params.func = amdgpu_vm_do_set_ptes;
907 }
908
ea09729c
CK
909 spin_lock(&vm->status_lock);
910 while (!list_empty(&vm->relocated)) {
6989f246
CK
911 struct amdgpu_vm_bo_base *bo_base, *parent;
912 struct amdgpu_vm_pt *pt, *entry;
ea09729c
CK
913 struct amdgpu_bo *bo;
914
915 bo_base = list_first_entry(&vm->relocated,
916 struct amdgpu_vm_bo_base,
917 vm_status);
6989f246 918 list_del_init(&bo_base->vm_status);
ea09729c
CK
919 spin_unlock(&vm->status_lock);
920
921 bo = bo_base->bo->parent;
6989f246 922 if (!bo) {
ea09729c 923 spin_lock(&vm->status_lock);
6989f246 924 continue;
ea09729c 925 }
6989f246
CK
926
927 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
928 bo_list);
929 pt = container_of(parent, struct amdgpu_vm_pt, base);
930 entry = container_of(bo_base, struct amdgpu_vm_pt, base);
931
932 amdgpu_vm_update_pde(&params, vm, pt, entry);
933
934 spin_lock(&vm->status_lock);
935 if (!vm->use_cpu_for_update &&
936 (ndw - params.ib->length_dw) < 32)
937 break;
ea09729c
CK
938 }
939 spin_unlock(&vm->status_lock);
92456b93 940
68c62306
CK
941 if (vm->use_cpu_for_update) {
942 /* Flush HDP */
943 mb();
69882565 944 amdgpu_asic_flush_hdp(adev, NULL);
6989f246
CK
945 } else if (params.ib->length_dw == 0) {
946 amdgpu_job_free(job);
947 } else {
948 struct amdgpu_bo *root = vm->root.base.bo;
949 struct amdgpu_ring *ring;
950 struct dma_fence *fence;
951
952 ring = container_of(vm->entity.sched, struct amdgpu_ring,
953 sched);
954
955 amdgpu_ring_pad_ib(ring, params.ib);
956 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
957 AMDGPU_FENCE_OWNER_VM, false);
6989f246
CK
958 WARN_ON(params.ib->length_dw > ndw);
959 r = amdgpu_job_submit(job, ring, &vm->entity,
960 AMDGPU_FENCE_OWNER_VM, &fence);
961 if (r)
962 goto error;
963
964 amdgpu_bo_fence(root, fence, true);
965 dma_fence_put(vm->last_update);
966 vm->last_update = fence;
68c62306
CK
967 }
968
6989f246
CK
969 if (!list_empty(&vm->relocated))
970 goto restart;
971
972 return 0;
973
974error:
196f7489
CZ
975 amdgpu_vm_invalidate_level(adev, vm, &vm->root,
976 adev->vm_manager.root_level);
6989f246 977 amdgpu_job_free(job);
92456b93 978 return r;
194d2161
CK
979}
980
4e2cb640 981/**
cf2f0a37 982 * amdgpu_vm_find_entry - find the entry for an address
4e2cb640
CK
983 *
984 * @p: see amdgpu_pte_update_params definition
985 * @addr: virtual address in question
cf2f0a37
AD
986 * @entry: resulting entry or NULL
987 * @parent: parent entry
4e2cb640 988 *
cf2f0a37 989 * Find the vm_pt entry and it's parent for the given address.
4e2cb640 990 */
cf2f0a37
AD
991void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
992 struct amdgpu_vm_pt **entry,
993 struct amdgpu_vm_pt **parent)
4e2cb640 994{
196f7489 995 unsigned level = p->adev->vm_manager.root_level;
4e2cb640 996
cf2f0a37
AD
997 *parent = NULL;
998 *entry = &p->vm->root;
999 while ((*entry)->entries) {
e3a1b32a 1000 unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
50783147 1001
cf2f0a37 1002 *parent = *entry;
e3a1b32a
CK
1003 *entry = &(*entry)->entries[addr >> shift];
1004 addr &= (1ULL << shift) - 1;
4e2cb640
CK
1005 }
1006
196f7489 1007 if (level != AMDGPU_VM_PTB)
cf2f0a37
AD
1008 *entry = NULL;
1009}
1010
1011/**
1012 * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
1013 *
1014 * @p: see amdgpu_pte_update_params definition
1015 * @entry: vm_pt entry to check
1016 * @parent: parent entry
1017 * @nptes: number of PTEs updated with this operation
1018 * @dst: destination address where the PTEs should point to
1019 * @flags: access flags fro the PTEs
1020 *
1021 * Check if we can update the PD with a huge page.
1022 */
ec5207c9
CK
1023static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
1024 struct amdgpu_vm_pt *entry,
1025 struct amdgpu_vm_pt *parent,
1026 unsigned nptes, uint64_t dst,
1027 uint64_t flags)
cf2f0a37 1028{
373ac645 1029 uint64_t pde;
cf2f0a37
AD
1030
1031 /* In the case of a mixed PT the PDE must point to it*/
3cc1d3ea
CK
1032 if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
1033 nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
4ab4016a 1034 /* Set the huge page flag to stop scanning at this PDE */
cf2f0a37
AD
1035 flags |= AMDGPU_PDE_PTE;
1036 }
1037
3cc1d3ea
CK
1038 if (!(flags & AMDGPU_PDE_PTE)) {
1039 if (entry->huge) {
1040 /* Add the entry to the relocated list to update it. */
1041 entry->huge = false;
1042 spin_lock(&p->vm->status_lock);
1043 list_move(&entry->base.vm_status, &p->vm->relocated);
1044 spin_unlock(&p->vm->status_lock);
1045 }
ec5207c9 1046 return;
3cc1d3ea 1047 }
cf2f0a37 1048
3cc1d3ea 1049 entry->huge = true;
132f34e4 1050 amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
3de676d8 1051
373ac645
CK
1052 pde = (entry - parent->entries) * 8;
1053 if (parent->base.bo->shadow)
1054 p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
1055 p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
4e2cb640
CK
1056}
1057
d38ceaf9
AD
1058/**
1059 * amdgpu_vm_update_ptes - make sure that page tables are valid
1060 *
29efc4f5 1061 * @params: see amdgpu_pte_update_params definition
d38ceaf9
AD
1062 * @vm: requested vm
1063 * @start: start of GPU address range
1064 * @end: end of GPU address range
677131a1 1065 * @dst: destination address to map to, the next dst inside the function
d38ceaf9
AD
1066 * @flags: mapping flags
1067 *
8843dbbb 1068 * Update the page tables in the range @start - @end.
cc28c4ed 1069 * Returns 0 for success, -EINVAL for failure.
d38ceaf9 1070 */
cc28c4ed 1071static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
a1e08d3b 1072 uint64_t start, uint64_t end,
6b777607 1073 uint64_t dst, uint64_t flags)
d38ceaf9 1074{
36b32a68
ZJ
1075 struct amdgpu_device *adev = params->adev;
1076 const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
31f6c1fe 1077
301654a4 1078 uint64_t addr, pe_start;
21718497 1079 struct amdgpu_bo *pt;
301654a4 1080 unsigned nptes;
d38ceaf9
AD
1081
1082 /* walk over the address space and update the page tables */
cf2f0a37
AD
1083 for (addr = start; addr < end; addr += nptes,
1084 dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
1085 struct amdgpu_vm_pt *entry, *parent;
1086
1087 amdgpu_vm_get_entry(params, addr, &entry, &parent);
1088 if (!entry)
1089 return -ENOENT;
4e2cb640 1090
d38ceaf9
AD
1091 if ((addr & ~mask) == (end & ~mask))
1092 nptes = end - addr;
1093 else
36b32a68 1094 nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
d38ceaf9 1095
ec5207c9
CK
1096 amdgpu_vm_handle_huge_pages(params, entry, parent,
1097 nptes, dst, flags);
4ab4016a 1098 /* We don't need to update PTEs for huge pages */
78eb2f0c 1099 if (entry->huge)
cf2f0a37
AD
1100 continue;
1101
3f3333f8 1102 pt = entry->base.bo;
373ac645
CK
1103 pe_start = (addr & mask) * 8;
1104 if (pt->shadow)
1105 params->func(params, pt->shadow, pe_start, dst, nptes,
1106 AMDGPU_GPU_PAGE_SIZE, flags);
1107 params->func(params, pt, pe_start, dst, nptes,
301654a4 1108 AMDGPU_GPU_PAGE_SIZE, flags);
d38ceaf9
AD
1109 }
1110
cc28c4ed 1111 return 0;
92696dd5
CK
1112}
1113
1114/*
1115 * amdgpu_vm_frag_ptes - add fragment information to PTEs
1116 *
1117 * @params: see amdgpu_pte_update_params definition
1118 * @vm: requested vm
1119 * @start: first PTE to handle
1120 * @end: last PTE to handle
1121 * @dst: addr those PTEs should point to
1122 * @flags: hw mapping flags
cc28c4ed 1123 * Returns 0 for success, -EINVAL for failure.
92696dd5 1124 */
cc28c4ed 1125static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
92696dd5 1126 uint64_t start, uint64_t end,
6b777607 1127 uint64_t dst, uint64_t flags)
92696dd5
CK
1128{
1129 /**
1130 * The MC L1 TLB supports variable sized pages, based on a fragment
1131 * field in the PTE. When this field is set to a non-zero value, page
1132 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
1133 * flags are considered valid for all PTEs within the fragment range
1134 * and corresponding mappings are assumed to be physically contiguous.
1135 *
1136 * The L1 TLB can store a single PTE for the whole fragment,
1137 * significantly increasing the space available for translation
1138 * caching. This leads to large improvements in throughput when the
1139 * TLB is under pressure.
1140 *
1141 * The L2 TLB distributes small and large fragments into two
1142 * asymmetric partitions. The large fragment cache is significantly
1143 * larger. Thus, we try to use large fragments wherever possible.
1144 * Userspace can support this by aligning virtual base address and
1145 * allocation size to the fragment size.
1146 */
6849d47c
RH
1147 unsigned max_frag = params->adev->vm_manager.fragment_size;
1148 int r;
92696dd5
CK
1149
1150 /* system pages are non continuously */
6849d47c 1151 if (params->src || !(flags & AMDGPU_PTE_VALID))
cc28c4ed 1152 return amdgpu_vm_update_ptes(params, start, end, dst, flags);
92696dd5 1153
6849d47c
RH
1154 while (start != end) {
1155 uint64_t frag_flags, frag_end;
1156 unsigned frag;
1157
1158 /* This intentionally wraps around if no bit is set */
1159 frag = min((unsigned)ffs(start) - 1,
1160 (unsigned)fls64(end - start) - 1);
1161 if (frag >= max_frag) {
1162 frag_flags = AMDGPU_PTE_FRAG(max_frag);
1163 frag_end = end & ~((1ULL << max_frag) - 1);
1164 } else {
1165 frag_flags = AMDGPU_PTE_FRAG(frag);
1166 frag_end = start + (1 << frag);
1167 }
1168
1169 r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
1170 flags | frag_flags);
cc28c4ed
HK
1171 if (r)
1172 return r;
92696dd5 1173
6849d47c
RH
1174 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
1175 start = frag_end;
92696dd5 1176 }
6849d47c
RH
1177
1178 return 0;
d38ceaf9
AD
1179}
1180
d38ceaf9
AD
1181/**
1182 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
1183 *
1184 * @adev: amdgpu_device pointer
3cabaa54 1185 * @exclusive: fence we need to sync to
fa3ab3c7 1186 * @pages_addr: DMA addresses to use for mapping
d38ceaf9 1187 * @vm: requested vm
a14faa65
CK
1188 * @start: start of mapped range
1189 * @last: last mapped entry
1190 * @flags: flags for the entries
d38ceaf9 1191 * @addr: addr to set the area to
d38ceaf9
AD
1192 * @fence: optional resulting fence
1193 *
a14faa65 1194 * Fill in the page table entries between @start and @last.
d38ceaf9 1195 * Returns 0 for success, -EINVAL for failure.
d38ceaf9
AD
1196 */
1197static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
f54d1867 1198 struct dma_fence *exclusive,
fa3ab3c7 1199 dma_addr_t *pages_addr,
d38ceaf9 1200 struct amdgpu_vm *vm,
a14faa65 1201 uint64_t start, uint64_t last,
6b777607 1202 uint64_t flags, uint64_t addr,
f54d1867 1203 struct dma_fence **fence)
d38ceaf9 1204{
2d55e45a 1205 struct amdgpu_ring *ring;
a1e08d3b 1206 void *owner = AMDGPU_FENCE_OWNER_VM;
d38ceaf9 1207 unsigned nptes, ncmds, ndw;
d71518b5 1208 struct amdgpu_job *job;
29efc4f5 1209 struct amdgpu_pte_update_params params;
f54d1867 1210 struct dma_fence *f = NULL;
d38ceaf9
AD
1211 int r;
1212
afef8b8f
CK
1213 memset(&params, 0, sizeof(params));
1214 params.adev = adev;
49ac8a24 1215 params.vm = vm;
afef8b8f 1216
a33cab7a
CK
1217 /* sync to everything on unmapping */
1218 if (!(flags & AMDGPU_PTE_VALID))
1219 owner = AMDGPU_FENCE_OWNER_UNDEFINED;
1220
b4d42511
HK
1221 if (vm->use_cpu_for_update) {
1222 /* params.src is used as flag to indicate system Memory */
1223 if (pages_addr)
1224 params.src = ~0;
1225
1226 /* Wait for PT BOs to be free. PTs share the same resv. object
1227 * as the root PD BO
1228 */
a33cab7a 1229 r = amdgpu_vm_wait_pd(adev, vm, owner);
b4d42511
HK
1230 if (unlikely(r))
1231 return r;
1232
1233 params.func = amdgpu_vm_cpu_set_ptes;
1234 params.pages_addr = pages_addr;
b4d42511
HK
1235 return amdgpu_vm_frag_ptes(&params, start, last + 1,
1236 addr, flags);
1237 }
1238
2d55e45a 1239 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
27c5f36f 1240
a14faa65 1241 nptes = last - start + 1;
d38ceaf9
AD
1242
1243 /*
86209523 1244 * reserve space for two commands every (1 << BLOCK_SIZE)
d38ceaf9 1245 * entries or 2k dwords (whatever is smaller)
86209523
BN
1246 *
1247 * The second command is for the shadow pagetables.
d38ceaf9 1248 */
104bd2ca
ED
1249 if (vm->root.base.bo->shadow)
1250 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
1251 else
1252 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
d38ceaf9
AD
1253
1254 /* padding, etc. */
1255 ndw = 64;
1256
570144c6 1257 if (pages_addr) {
b0456f93 1258 /* copy commands needed */
e6d92197 1259 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
d38ceaf9 1260
b0456f93 1261 /* and also PTEs */
d38ceaf9
AD
1262 ndw += nptes * 2;
1263
afef8b8f
CK
1264 params.func = amdgpu_vm_do_copy_ptes;
1265
d38ceaf9
AD
1266 } else {
1267 /* set page commands needed */
44e1baeb 1268 ndw += ncmds * 10;
d38ceaf9 1269
6849d47c 1270 /* extra commands for begin/end fragments */
44e1baeb 1271 ndw += 2 * 10 * adev->vm_manager.fragment_size;
afef8b8f
CK
1272
1273 params.func = amdgpu_vm_do_set_ptes;
d38ceaf9
AD
1274 }
1275
d71518b5
CK
1276 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1277 if (r)
d38ceaf9 1278 return r;
d71518b5 1279
29efc4f5 1280 params.ib = &job->ibs[0];
d5fc5e82 1281
570144c6 1282 if (pages_addr) {
b0456f93
CK
1283 uint64_t *pte;
1284 unsigned i;
1285
1286 /* Put the PTEs at the end of the IB. */
1287 i = ndw - nptes * 2;
1288 pte= (uint64_t *)&(job->ibs->ptr[i]);
1289 params.src = job->ibs->gpu_addr + i * 4;
1290
1291 for (i = 0; i < nptes; ++i) {
1292 pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
1293 AMDGPU_GPU_PAGE_SIZE);
1294 pte[i] |= flags;
1295 }
d7a4ac66 1296 addr = 0;
b0456f93
CK
1297 }
1298
cebb52b7 1299 r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
3cabaa54
CK
1300 if (r)
1301 goto error_free;
1302
3f3333f8 1303 r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
177ae09b 1304 owner, false);
a1e08d3b
CK
1305 if (r)
1306 goto error_free;
d38ceaf9 1307
3f3333f8 1308 r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
a1e08d3b
CK
1309 if (r)
1310 goto error_free;
1311
cc28c4ed
HK
1312 r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
1313 if (r)
1314 goto error_free;
d38ceaf9 1315
29efc4f5
CK
1316 amdgpu_ring_pad_ib(ring, params.ib);
1317 WARN_ON(params.ib->length_dw > ndw);
2bd9ccfa
CK
1318 r = amdgpu_job_submit(job, ring, &vm->entity,
1319 AMDGPU_FENCE_OWNER_VM, &f);
4af9f07c
CZ
1320 if (r)
1321 goto error_free;
d38ceaf9 1322
3f3333f8 1323 amdgpu_bo_fence(vm->root.base.bo, f, true);
284710fa
CK
1324 dma_fence_put(*fence);
1325 *fence = f;
d38ceaf9 1326 return 0;
d5fc5e82
CZ
1327
1328error_free:
d71518b5 1329 amdgpu_job_free(job);
4af9f07c 1330 return r;
d38ceaf9
AD
1331}
1332
a14faa65
CK
1333/**
1334 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
1335 *
1336 * @adev: amdgpu_device pointer
3cabaa54 1337 * @exclusive: fence we need to sync to
8358dcee 1338 * @pages_addr: DMA addresses to use for mapping
a14faa65
CK
1339 * @vm: requested vm
1340 * @mapping: mapped range and flags to use for the update
8358dcee 1341 * @flags: HW flags for the mapping
63e0ba40 1342 * @nodes: array of drm_mm_nodes with the MC addresses
a14faa65
CK
1343 * @fence: optional resulting fence
1344 *
1345 * Split the mapping into smaller chunks so that each update fits
1346 * into a SDMA IB.
1347 * Returns 0 for success, -EINVAL for failure.
1348 */
1349static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
f54d1867 1350 struct dma_fence *exclusive,
8358dcee 1351 dma_addr_t *pages_addr,
a14faa65
CK
1352 struct amdgpu_vm *vm,
1353 struct amdgpu_bo_va_mapping *mapping,
6b777607 1354 uint64_t flags,
63e0ba40 1355 struct drm_mm_node *nodes,
f54d1867 1356 struct dma_fence **fence)
a14faa65 1357{
9fc8fc70 1358 unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
570144c6 1359 uint64_t pfn, start = mapping->start;
a14faa65
CK
1360 int r;
1361
1362 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1363 * but in case of something, we filter the flags in first place
1364 */
1365 if (!(mapping->flags & AMDGPU_PTE_READABLE))
1366 flags &= ~AMDGPU_PTE_READABLE;
1367 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
1368 flags &= ~AMDGPU_PTE_WRITEABLE;
1369
15b31c59
AX
1370 flags &= ~AMDGPU_PTE_EXECUTABLE;
1371 flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1372
b0fd18b0
AX
1373 flags &= ~AMDGPU_PTE_MTYPE_MASK;
1374 flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
1375
d0766e98
ZJ
1376 if ((mapping->flags & AMDGPU_PTE_PRT) &&
1377 (adev->asic_type >= CHIP_VEGA10)) {
1378 flags |= AMDGPU_PTE_PRT;
1379 flags &= ~AMDGPU_PTE_VALID;
1380 }
1381
a14faa65
CK
1382 trace_amdgpu_vm_bo_update(mapping);
1383
63e0ba40
CK
1384 pfn = mapping->offset >> PAGE_SHIFT;
1385 if (nodes) {
1386 while (pfn >= nodes->size) {
1387 pfn -= nodes->size;
1388 ++nodes;
1389 }
fa3ab3c7 1390 }
a14faa65 1391
63e0ba40 1392 do {
9fc8fc70 1393 dma_addr_t *dma_addr = NULL;
63e0ba40
CK
1394 uint64_t max_entries;
1395 uint64_t addr, last;
a14faa65 1396
63e0ba40
CK
1397 if (nodes) {
1398 addr = nodes->start << PAGE_SHIFT;
1399 max_entries = (nodes->size - pfn) *
1400 (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
1401 } else {
1402 addr = 0;
1403 max_entries = S64_MAX;
1404 }
a14faa65 1405
63e0ba40 1406 if (pages_addr) {
9fc8fc70
CK
1407 uint64_t count;
1408
457e0fee 1409 max_entries = min(max_entries, 16ull * 1024ull);
9fc8fc70
CK
1410 for (count = 1; count < max_entries; ++count) {
1411 uint64_t idx = pfn + count;
1412
1413 if (pages_addr[idx] !=
1414 (pages_addr[idx - 1] + PAGE_SIZE))
1415 break;
1416 }
1417
1418 if (count < min_linear_pages) {
1419 addr = pfn << PAGE_SHIFT;
1420 dma_addr = pages_addr;
1421 } else {
1422 addr = pages_addr[pfn];
1423 max_entries = count;
1424 }
1425
63e0ba40
CK
1426 } else if (flags & AMDGPU_PTE_VALID) {
1427 addr += adev->vm_manager.vram_base_offset;
9fc8fc70 1428 addr += pfn << PAGE_SHIFT;
63e0ba40 1429 }
63e0ba40 1430
a9f87f64 1431 last = min((uint64_t)mapping->last, start + max_entries - 1);
9fc8fc70 1432 r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
a14faa65
CK
1433 start, last, flags, addr,
1434 fence);
1435 if (r)
1436 return r;
1437
63e0ba40
CK
1438 pfn += last - start + 1;
1439 if (nodes && nodes->size == pfn) {
1440 pfn = 0;
1441 ++nodes;
1442 }
a14faa65 1443 start = last + 1;
63e0ba40 1444
a9f87f64 1445 } while (unlikely(start != mapping->last + 1));
a14faa65
CK
1446
1447 return 0;
1448}
1449
d38ceaf9
AD
1450/**
1451 * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1452 *
1453 * @adev: amdgpu_device pointer
1454 * @bo_va: requested BO and VM object
99e124f4 1455 * @clear: if true clear the entries
d38ceaf9
AD
1456 *
1457 * Fill in the page table entries for @bo_va.
1458 * Returns 0 for success, -EINVAL for failure.
d38ceaf9
AD
1459 */
1460int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1461 struct amdgpu_bo_va *bo_va,
99e124f4 1462 bool clear)
d38ceaf9 1463{
ec681545
CK
1464 struct amdgpu_bo *bo = bo_va->base.bo;
1465 struct amdgpu_vm *vm = bo_va->base.vm;
d38ceaf9 1466 struct amdgpu_bo_va_mapping *mapping;
8358dcee 1467 dma_addr_t *pages_addr = NULL;
99e124f4 1468 struct ttm_mem_reg *mem;
63e0ba40 1469 struct drm_mm_node *nodes;
4e55eb38 1470 struct dma_fence *exclusive, **last_update;
457e0fee 1471 uint64_t flags;
d38ceaf9
AD
1472 int r;
1473
ec681545 1474 if (clear || !bo_va->base.bo) {
99e124f4 1475 mem = NULL;
63e0ba40 1476 nodes = NULL;
99e124f4
CK
1477 exclusive = NULL;
1478 } else {
8358dcee
CK
1479 struct ttm_dma_tt *ttm;
1480
ec681545 1481 mem = &bo_va->base.bo->tbo.mem;
63e0ba40
CK
1482 nodes = mem->mm_node;
1483 if (mem->mem_type == TTM_PL_TT) {
ec681545
CK
1484 ttm = container_of(bo_va->base.bo->tbo.ttm,
1485 struct ttm_dma_tt, ttm);
8358dcee 1486 pages_addr = ttm->dma_address;
9ab21462 1487 }
ec681545 1488 exclusive = reservation_object_get_excl(bo->tbo.resv);
d38ceaf9
AD
1489 }
1490
457e0fee 1491 if (bo)
ec681545 1492 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
457e0fee 1493 else
a5f6b5b1 1494 flags = 0x0;
d38ceaf9 1495
4e55eb38
CK
1496 if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
1497 last_update = &vm->last_update;
1498 else
1499 last_update = &bo_va->last_pt_update;
1500
3d7d4d3a
CK
1501 if (!clear && bo_va->base.moved) {
1502 bo_va->base.moved = false;
7fc11959 1503 list_splice_init(&bo_va->valids, &bo_va->invalids);
3d7d4d3a 1504
cb7b6ec2
CK
1505 } else if (bo_va->cleared != clear) {
1506 list_splice_init(&bo_va->valids, &bo_va->invalids);
3d7d4d3a 1507 }
7fc11959
CK
1508
1509 list_for_each_entry(mapping, &bo_va->invalids, list) {
457e0fee 1510 r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
63e0ba40 1511 mapping, flags, nodes,
4e55eb38 1512 last_update);
d38ceaf9
AD
1513 if (r)
1514 return r;
1515 }
1516
cb7b6ec2
CK
1517 if (vm->use_cpu_for_update) {
1518 /* Flush HDP */
1519 mb();
69882565 1520 amdgpu_asic_flush_hdp(adev, NULL);
d6c10f6b
CK
1521 }
1522
d38ceaf9 1523 spin_lock(&vm->status_lock);
ec681545 1524 list_del_init(&bo_va->base.vm_status);
d38ceaf9
AD
1525 spin_unlock(&vm->status_lock);
1526
cb7b6ec2
CK
1527 list_splice_init(&bo_va->invalids, &bo_va->valids);
1528 bo_va->cleared = clear;
1529
1530 if (trace_amdgpu_vm_bo_mapping_enabled()) {
1531 list_for_each_entry(mapping, &bo_va->valids, list)
1532 trace_amdgpu_vm_bo_mapping(mapping);
68c62306
CK
1533 }
1534
d38ceaf9
AD
1535 return 0;
1536}
1537
284710fa
CK
1538/**
1539 * amdgpu_vm_update_prt_state - update the global PRT state
1540 */
1541static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1542{
1543 unsigned long flags;
1544 bool enable;
1545
1546 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
451bc8eb 1547 enable = !!atomic_read(&adev->vm_manager.num_prt_users);
132f34e4 1548 adev->gmc.gmc_funcs->set_prt(adev, enable);
284710fa
CK
1549 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1550}
1551
451bc8eb 1552/**
4388fc2a 1553 * amdgpu_vm_prt_get - add a PRT user
451bc8eb
CK
1554 */
1555static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1556{
132f34e4 1557 if (!adev->gmc.gmc_funcs->set_prt)
4388fc2a
CK
1558 return;
1559
451bc8eb
CK
1560 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
1561 amdgpu_vm_update_prt_state(adev);
1562}
1563
0b15f2fc
CK
1564/**
1565 * amdgpu_vm_prt_put - drop a PRT user
1566 */
1567static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1568{
451bc8eb 1569 if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
0b15f2fc
CK
1570 amdgpu_vm_update_prt_state(adev);
1571}
1572
284710fa 1573/**
451bc8eb 1574 * amdgpu_vm_prt_cb - callback for updating the PRT status
284710fa
CK
1575 */
1576static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
1577{
1578 struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
1579
0b15f2fc 1580 amdgpu_vm_prt_put(cb->adev);
284710fa
CK
1581 kfree(cb);
1582}
1583
451bc8eb
CK
1584/**
1585 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1586 */
1587static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1588 struct dma_fence *fence)
1589{
4388fc2a 1590 struct amdgpu_prt_cb *cb;
451bc8eb 1591
132f34e4 1592 if (!adev->gmc.gmc_funcs->set_prt)
4388fc2a
CK
1593 return;
1594
1595 cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
451bc8eb
CK
1596 if (!cb) {
1597 /* Last resort when we are OOM */
1598 if (fence)
1599 dma_fence_wait(fence, false);
1600
486a68f5 1601 amdgpu_vm_prt_put(adev);
451bc8eb
CK
1602 } else {
1603 cb->adev = adev;
1604 if (!fence || dma_fence_add_callback(fence, &cb->cb,
1605 amdgpu_vm_prt_cb))
1606 amdgpu_vm_prt_cb(fence, &cb->cb);
1607 }
1608}
1609
284710fa
CK
1610/**
1611 * amdgpu_vm_free_mapping - free a mapping
1612 *
1613 * @adev: amdgpu_device pointer
1614 * @vm: requested vm
1615 * @mapping: mapping to be freed
1616 * @fence: fence of the unmap operation
1617 *
1618 * Free a mapping and make sure we decrease the PRT usage count if applicable.
1619 */
1620static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1621 struct amdgpu_vm *vm,
1622 struct amdgpu_bo_va_mapping *mapping,
1623 struct dma_fence *fence)
1624{
451bc8eb
CK
1625 if (mapping->flags & AMDGPU_PTE_PRT)
1626 amdgpu_vm_add_prt_cb(adev, fence);
1627 kfree(mapping);
1628}
284710fa 1629
451bc8eb
CK
1630/**
1631 * amdgpu_vm_prt_fini - finish all prt mappings
1632 *
1633 * @adev: amdgpu_device pointer
1634 * @vm: requested vm
1635 *
1636 * Register a cleanup callback to disable PRT support after VM dies.
1637 */
1638static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1639{
3f3333f8 1640 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
451bc8eb
CK
1641 struct dma_fence *excl, **shared;
1642 unsigned i, shared_count;
1643 int r;
0b15f2fc 1644
451bc8eb
CK
1645 r = reservation_object_get_fences_rcu(resv, &excl,
1646 &shared_count, &shared);
1647 if (r) {
1648 /* Not enough memory to grab the fence list, as last resort
1649 * block for all the fences to complete.
1650 */
1651 reservation_object_wait_timeout_rcu(resv, true, false,
1652 MAX_SCHEDULE_TIMEOUT);
1653 return;
284710fa 1654 }
451bc8eb
CK
1655
1656 /* Add a callback for each fence in the reservation object */
1657 amdgpu_vm_prt_get(adev);
1658 amdgpu_vm_add_prt_cb(adev, excl);
1659
1660 for (i = 0; i < shared_count; ++i) {
1661 amdgpu_vm_prt_get(adev);
1662 amdgpu_vm_add_prt_cb(adev, shared[i]);
1663 }
1664
1665 kfree(shared);
284710fa
CK
1666}
1667
d38ceaf9
AD
1668/**
1669 * amdgpu_vm_clear_freed - clear freed BOs in the PT
1670 *
1671 * @adev: amdgpu_device pointer
1672 * @vm: requested vm
f3467818
NH
1673 * @fence: optional resulting fence (unchanged if no work needed to be done
1674 * or if an error occurred)
d38ceaf9
AD
1675 *
1676 * Make sure all freed BOs are cleared in the PT.
1677 * Returns 0 for success.
1678 *
1679 * PTs have to be reserved and mutex must be locked!
1680 */
1681int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
f3467818
NH
1682 struct amdgpu_vm *vm,
1683 struct dma_fence **fence)
d38ceaf9
AD
1684{
1685 struct amdgpu_bo_va_mapping *mapping;
4584312d 1686 uint64_t init_pte_value = 0;
f3467818 1687 struct dma_fence *f = NULL;
d38ceaf9
AD
1688 int r;
1689
1690 while (!list_empty(&vm->freed)) {
1691 mapping = list_first_entry(&vm->freed,
1692 struct amdgpu_bo_va_mapping, list);
1693 list_del(&mapping->list);
e17841b9 1694
4584312d 1695 if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
6d16dac8 1696 init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
51ac7eec 1697
570144c6 1698 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
fc6aa33d 1699 mapping->start, mapping->last,
51ac7eec 1700 init_pte_value, 0, &f);
f3467818 1701 amdgpu_vm_free_mapping(adev, vm, mapping, f);
284710fa 1702 if (r) {
f3467818 1703 dma_fence_put(f);
d38ceaf9 1704 return r;
284710fa 1705 }
f3467818 1706 }
d38ceaf9 1707
f3467818
NH
1708 if (fence && f) {
1709 dma_fence_put(*fence);
1710 *fence = f;
1711 } else {
1712 dma_fence_put(f);
d38ceaf9 1713 }
f3467818 1714
d38ceaf9
AD
1715 return 0;
1716
1717}
1718
1719/**
73fb16e7 1720 * amdgpu_vm_handle_moved - handle moved BOs in the PT
d38ceaf9
AD
1721 *
1722 * @adev: amdgpu_device pointer
1723 * @vm: requested vm
73fb16e7 1724 * @sync: sync object to add fences to
d38ceaf9 1725 *
73fb16e7 1726 * Make sure all BOs which are moved are updated in the PTs.
d38ceaf9
AD
1727 * Returns 0 for success.
1728 *
73fb16e7 1729 * PTs have to be reserved!
d38ceaf9 1730 */
73fb16e7 1731int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
4e55eb38 1732 struct amdgpu_vm *vm)
d38ceaf9 1733{
73fb16e7 1734 bool clear;
91e1a520 1735 int r = 0;
d38ceaf9
AD
1736
1737 spin_lock(&vm->status_lock);
27c7b9ae 1738 while (!list_empty(&vm->moved)) {
4e55eb38 1739 struct amdgpu_bo_va *bo_va;
ec363e0d 1740 struct reservation_object *resv;
4e55eb38 1741
27c7b9ae 1742 bo_va = list_first_entry(&vm->moved,
ec681545 1743 struct amdgpu_bo_va, base.vm_status);
d38ceaf9 1744 spin_unlock(&vm->status_lock);
32b41ac2 1745
ec363e0d
CK
1746 resv = bo_va->base.bo->tbo.resv;
1747
73fb16e7 1748 /* Per VM BOs never need to bo cleared in the page tables */
ec363e0d
CK
1749 if (resv == vm->root.base.bo->tbo.resv)
1750 clear = false;
1751 /* Try to reserve the BO to avoid clearing its ptes */
9b8cad20 1752 else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
ec363e0d
CK
1753 clear = false;
1754 /* Somebody else is using the BO right now */
1755 else
1756 clear = true;
73fb16e7
CK
1757
1758 r = amdgpu_vm_bo_update(adev, bo_va, clear);
d38ceaf9
AD
1759 if (r)
1760 return r;
1761
ec363e0d
CK
1762 if (!clear && resv != vm->root.base.bo->tbo.resv)
1763 reservation_object_unlock(resv);
1764
d38ceaf9
AD
1765 spin_lock(&vm->status_lock);
1766 }
1767 spin_unlock(&vm->status_lock);
1768
91e1a520 1769 return r;
d38ceaf9
AD
1770}
1771
1772/**
1773 * amdgpu_vm_bo_add - add a bo to a specific vm
1774 *
1775 * @adev: amdgpu_device pointer
1776 * @vm: requested vm
1777 * @bo: amdgpu buffer object
1778 *
8843dbbb 1779 * Add @bo into the requested vm.
d38ceaf9
AD
1780 * Add @bo to the list of bos associated with the vm
1781 * Returns newly added bo_va or NULL for failure
1782 *
1783 * Object has to be reserved!
1784 */
1785struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1786 struct amdgpu_vm *vm,
1787 struct amdgpu_bo *bo)
1788{
1789 struct amdgpu_bo_va *bo_va;
1790
1791 bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
1792 if (bo_va == NULL) {
1793 return NULL;
1794 }
ec681545
CK
1795 bo_va->base.vm = vm;
1796 bo_va->base.bo = bo;
1797 INIT_LIST_HEAD(&bo_va->base.bo_list);
1798 INIT_LIST_HEAD(&bo_va->base.vm_status);
1799
d38ceaf9 1800 bo_va->ref_count = 1;
7fc11959
CK
1801 INIT_LIST_HEAD(&bo_va->valids);
1802 INIT_LIST_HEAD(&bo_va->invalids);
32b41ac2 1803
727ffdf2
CK
1804 if (!bo)
1805 return bo_va;
1806
1807 list_add_tail(&bo_va->base.bo_list, &bo->va);
1808
1809 if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
1810 return bo_va;
1811
1812 if (bo->preferred_domains &
1813 amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
1814 return bo_va;
1815
1816 /*
1817 * We checked all the prerequisites, but it looks like this per VM BO
1818 * is currently evicted. add the BO to the evicted list to make sure it
1819 * is validated on next VM use to avoid fault.
1820 * */
1821 spin_lock(&vm->status_lock);
1822 list_move_tail(&bo_va->base.vm_status, &vm->evicted);
1823 spin_unlock(&vm->status_lock);
d38ceaf9
AD
1824
1825 return bo_va;
1826}
1827
73fb16e7
CK
1828
1829/**
1830 * amdgpu_vm_bo_insert_mapping - insert a new mapping
1831 *
1832 * @adev: amdgpu_device pointer
1833 * @bo_va: bo_va to store the address
1834 * @mapping: the mapping to insert
1835 *
1836 * Insert a new mapping into all structures.
1837 */
1838static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1839 struct amdgpu_bo_va *bo_va,
1840 struct amdgpu_bo_va_mapping *mapping)
1841{
1842 struct amdgpu_vm *vm = bo_va->base.vm;
1843 struct amdgpu_bo *bo = bo_va->base.bo;
1844
aebc5e6f 1845 mapping->bo_va = bo_va;
73fb16e7
CK
1846 list_add(&mapping->list, &bo_va->invalids);
1847 amdgpu_vm_it_insert(mapping, &vm->va);
1848
1849 if (mapping->flags & AMDGPU_PTE_PRT)
1850 amdgpu_vm_prt_get(adev);
1851
1852 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
1853 spin_lock(&vm->status_lock);
481c2e94
CK
1854 if (list_empty(&bo_va->base.vm_status))
1855 list_add(&bo_va->base.vm_status, &vm->moved);
73fb16e7
CK
1856 spin_unlock(&vm->status_lock);
1857 }
1858 trace_amdgpu_vm_bo_map(bo_va, mapping);
1859}
1860
d38ceaf9
AD
1861/**
1862 * amdgpu_vm_bo_map - map bo inside a vm
1863 *
1864 * @adev: amdgpu_device pointer
1865 * @bo_va: bo_va to store the address
1866 * @saddr: where to map the BO
1867 * @offset: requested offset in the BO
1868 * @flags: attributes of pages (read/write/valid/etc.)
1869 *
1870 * Add a mapping of the BO at the specefied addr into the VM.
1871 * Returns 0 for success, error for failure.
1872 *
49b02b18 1873 * Object has to be reserved and unreserved outside!
d38ceaf9
AD
1874 */
1875int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1876 struct amdgpu_bo_va *bo_va,
1877 uint64_t saddr, uint64_t offset,
268c3001 1878 uint64_t size, uint64_t flags)
d38ceaf9 1879{
a9f87f64 1880 struct amdgpu_bo_va_mapping *mapping, *tmp;
ec681545
CK
1881 struct amdgpu_bo *bo = bo_va->base.bo;
1882 struct amdgpu_vm *vm = bo_va->base.vm;
d38ceaf9 1883 uint64_t eaddr;
d38ceaf9 1884
0be52de9
CK
1885 /* validate the parameters */
1886 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
49b02b18 1887 size == 0 || size & AMDGPU_GPU_PAGE_MASK)
0be52de9 1888 return -EINVAL;
0be52de9 1889
d38ceaf9 1890 /* make sure object fit at this offset */
005ae95e 1891 eaddr = saddr + size - 1;
a5f6b5b1 1892 if (saddr >= eaddr ||
ec681545 1893 (bo && offset + size > amdgpu_bo_size(bo)))
d38ceaf9 1894 return -EINVAL;
d38ceaf9 1895
d38ceaf9
AD
1896 saddr /= AMDGPU_GPU_PAGE_SIZE;
1897 eaddr /= AMDGPU_GPU_PAGE_SIZE;
1898
a9f87f64
CK
1899 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
1900 if (tmp) {
d38ceaf9
AD
1901 /* bo and tmp overlap, invalid addr */
1902 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
ec681545 1903 "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
a9f87f64 1904 tmp->start, tmp->last + 1);
663e4577 1905 return -EINVAL;
d38ceaf9
AD
1906 }
1907
1908 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
663e4577
CK
1909 if (!mapping)
1910 return -ENOMEM;
d38ceaf9 1911
a9f87f64
CK
1912 mapping->start = saddr;
1913 mapping->last = eaddr;
d38ceaf9
AD
1914 mapping->offset = offset;
1915 mapping->flags = flags;
1916
73fb16e7 1917 amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
80f95c57
CK
1918
1919 return 0;
1920}
1921
1922/**
1923 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
1924 *
1925 * @adev: amdgpu_device pointer
1926 * @bo_va: bo_va to store the address
1927 * @saddr: where to map the BO
1928 * @offset: requested offset in the BO
1929 * @flags: attributes of pages (read/write/valid/etc.)
1930 *
1931 * Add a mapping of the BO at the specefied addr into the VM. Replace existing
1932 * mappings as we do so.
1933 * Returns 0 for success, error for failure.
1934 *
1935 * Object has to be reserved and unreserved outside!
1936 */
1937int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
1938 struct amdgpu_bo_va *bo_va,
1939 uint64_t saddr, uint64_t offset,
1940 uint64_t size, uint64_t flags)
1941{
1942 struct amdgpu_bo_va_mapping *mapping;
ec681545 1943 struct amdgpu_bo *bo = bo_va->base.bo;
80f95c57
CK
1944 uint64_t eaddr;
1945 int r;
1946
1947 /* validate the parameters */
1948 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1949 size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1950 return -EINVAL;
1951
1952 /* make sure object fit at this offset */
1953 eaddr = saddr + size - 1;
1954 if (saddr >= eaddr ||
ec681545 1955 (bo && offset + size > amdgpu_bo_size(bo)))
80f95c57
CK
1956 return -EINVAL;
1957
1958 /* Allocate all the needed memory */
1959 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1960 if (!mapping)
1961 return -ENOMEM;
1962
ec681545 1963 r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
80f95c57
CK
1964 if (r) {
1965 kfree(mapping);
1966 return r;
1967 }
1968
1969 saddr /= AMDGPU_GPU_PAGE_SIZE;
1970 eaddr /= AMDGPU_GPU_PAGE_SIZE;
1971
a9f87f64
CK
1972 mapping->start = saddr;
1973 mapping->last = eaddr;
80f95c57
CK
1974 mapping->offset = offset;
1975 mapping->flags = flags;
1976
73fb16e7 1977 amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
4388fc2a 1978
d38ceaf9 1979 return 0;
d38ceaf9
AD
1980}
1981
1982/**
1983 * amdgpu_vm_bo_unmap - remove bo mapping from vm
1984 *
1985 * @adev: amdgpu_device pointer
1986 * @bo_va: bo_va to remove the address from
1987 * @saddr: where to the BO is mapped
1988 *
1989 * Remove a mapping of the BO at the specefied addr from the VM.
1990 * Returns 0 for success, error for failure.
1991 *
49b02b18 1992 * Object has to be reserved and unreserved outside!
d38ceaf9
AD
1993 */
1994int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1995 struct amdgpu_bo_va *bo_va,
1996 uint64_t saddr)
1997{
1998 struct amdgpu_bo_va_mapping *mapping;
ec681545 1999 struct amdgpu_vm *vm = bo_va->base.vm;
7fc11959 2000 bool valid = true;
d38ceaf9 2001
6c7fc503 2002 saddr /= AMDGPU_GPU_PAGE_SIZE;
32b41ac2 2003
7fc11959 2004 list_for_each_entry(mapping, &bo_va->valids, list) {
a9f87f64 2005 if (mapping->start == saddr)
d38ceaf9
AD
2006 break;
2007 }
2008
7fc11959
CK
2009 if (&mapping->list == &bo_va->valids) {
2010 valid = false;
2011
2012 list_for_each_entry(mapping, &bo_va->invalids, list) {
a9f87f64 2013 if (mapping->start == saddr)
7fc11959
CK
2014 break;
2015 }
2016
32b41ac2 2017 if (&mapping->list == &bo_va->invalids)
7fc11959 2018 return -ENOENT;
d38ceaf9 2019 }
32b41ac2 2020
d38ceaf9 2021 list_del(&mapping->list);
a9f87f64 2022 amdgpu_vm_it_remove(mapping, &vm->va);
aebc5e6f 2023 mapping->bo_va = NULL;
93e3e438 2024 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
d38ceaf9 2025
e17841b9 2026 if (valid)
d38ceaf9 2027 list_add(&mapping->list, &vm->freed);
e17841b9 2028 else
284710fa
CK
2029 amdgpu_vm_free_mapping(adev, vm, mapping,
2030 bo_va->last_pt_update);
d38ceaf9
AD
2031
2032 return 0;
2033}
2034
dc54d3d1
CK
2035/**
2036 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
2037 *
2038 * @adev: amdgpu_device pointer
2039 * @vm: VM structure to use
2040 * @saddr: start of the range
2041 * @size: size of the range
2042 *
2043 * Remove all mappings in a range, split them as appropriate.
2044 * Returns 0 for success, error for failure.
2045 */
2046int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2047 struct amdgpu_vm *vm,
2048 uint64_t saddr, uint64_t size)
2049{
2050 struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
dc54d3d1
CK
2051 LIST_HEAD(removed);
2052 uint64_t eaddr;
2053
2054 eaddr = saddr + size - 1;
2055 saddr /= AMDGPU_GPU_PAGE_SIZE;
2056 eaddr /= AMDGPU_GPU_PAGE_SIZE;
2057
2058 /* Allocate all the needed memory */
2059 before = kzalloc(sizeof(*before), GFP_KERNEL);
2060 if (!before)
2061 return -ENOMEM;
27f6d610 2062 INIT_LIST_HEAD(&before->list);
dc54d3d1
CK
2063
2064 after = kzalloc(sizeof(*after), GFP_KERNEL);
2065 if (!after) {
2066 kfree(before);
2067 return -ENOMEM;
2068 }
27f6d610 2069 INIT_LIST_HEAD(&after->list);
dc54d3d1
CK
2070
2071 /* Now gather all removed mappings */
a9f87f64
CK
2072 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2073 while (tmp) {
dc54d3d1 2074 /* Remember mapping split at the start */
a9f87f64
CK
2075 if (tmp->start < saddr) {
2076 before->start = tmp->start;
2077 before->last = saddr - 1;
dc54d3d1
CK
2078 before->offset = tmp->offset;
2079 before->flags = tmp->flags;
2080 list_add(&before->list, &tmp->list);
2081 }
2082
2083 /* Remember mapping split at the end */
a9f87f64
CK
2084 if (tmp->last > eaddr) {
2085 after->start = eaddr + 1;
2086 after->last = tmp->last;
dc54d3d1 2087 after->offset = tmp->offset;
a9f87f64 2088 after->offset += after->start - tmp->start;
dc54d3d1
CK
2089 after->flags = tmp->flags;
2090 list_add(&after->list, &tmp->list);
2091 }
2092
2093 list_del(&tmp->list);
2094 list_add(&tmp->list, &removed);
a9f87f64
CK
2095
2096 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
dc54d3d1
CK
2097 }
2098
2099 /* And free them up */
2100 list_for_each_entry_safe(tmp, next, &removed, list) {
a9f87f64 2101 amdgpu_vm_it_remove(tmp, &vm->va);
dc54d3d1
CK
2102 list_del(&tmp->list);
2103
a9f87f64
CK
2104 if (tmp->start < saddr)
2105 tmp->start = saddr;
2106 if (tmp->last > eaddr)
2107 tmp->last = eaddr;
dc54d3d1 2108
aebc5e6f 2109 tmp->bo_va = NULL;
dc54d3d1
CK
2110 list_add(&tmp->list, &vm->freed);
2111 trace_amdgpu_vm_bo_unmap(NULL, tmp);
2112 }
2113
27f6d610
JZ
2114 /* Insert partial mapping before the range */
2115 if (!list_empty(&before->list)) {
a9f87f64 2116 amdgpu_vm_it_insert(before, &vm->va);
dc54d3d1
CK
2117 if (before->flags & AMDGPU_PTE_PRT)
2118 amdgpu_vm_prt_get(adev);
2119 } else {
2120 kfree(before);
2121 }
2122
2123 /* Insert partial mapping after the range */
27f6d610 2124 if (!list_empty(&after->list)) {
a9f87f64 2125 amdgpu_vm_it_insert(after, &vm->va);
dc54d3d1
CK
2126 if (after->flags & AMDGPU_PTE_PRT)
2127 amdgpu_vm_prt_get(adev);
2128 } else {
2129 kfree(after);
2130 }
2131
2132 return 0;
2133}
2134
aebc5e6f
CK
2135/**
2136 * amdgpu_vm_bo_lookup_mapping - find mapping by address
2137 *
2138 * @vm: the requested VM
2139 *
2140 * Find a mapping by it's address.
2141 */
2142struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
2143 uint64_t addr)
2144{
2145 return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
2146}
2147
d38ceaf9
AD
2148/**
2149 * amdgpu_vm_bo_rmv - remove a bo to a specific vm
2150 *
2151 * @adev: amdgpu_device pointer
2152 * @bo_va: requested bo_va
2153 *
8843dbbb 2154 * Remove @bo_va->bo from the requested vm.
d38ceaf9
AD
2155 *
2156 * Object have to be reserved!
2157 */
2158void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2159 struct amdgpu_bo_va *bo_va)
2160{
2161 struct amdgpu_bo_va_mapping *mapping, *next;
ec681545 2162 struct amdgpu_vm *vm = bo_va->base.vm;
d38ceaf9 2163
ec681545 2164 list_del(&bo_va->base.bo_list);
d38ceaf9 2165
d38ceaf9 2166 spin_lock(&vm->status_lock);
ec681545 2167 list_del(&bo_va->base.vm_status);
d38ceaf9
AD
2168 spin_unlock(&vm->status_lock);
2169
7fc11959 2170 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
d38ceaf9 2171 list_del(&mapping->list);
a9f87f64 2172 amdgpu_vm_it_remove(mapping, &vm->va);
aebc5e6f 2173 mapping->bo_va = NULL;
93e3e438 2174 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
7fc11959
CK
2175 list_add(&mapping->list, &vm->freed);
2176 }
2177 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
2178 list_del(&mapping->list);
a9f87f64 2179 amdgpu_vm_it_remove(mapping, &vm->va);
284710fa
CK
2180 amdgpu_vm_free_mapping(adev, vm, mapping,
2181 bo_va->last_pt_update);
d38ceaf9 2182 }
32b41ac2 2183
f54d1867 2184 dma_fence_put(bo_va->last_pt_update);
d38ceaf9 2185 kfree(bo_va);
d38ceaf9
AD
2186}
2187
2188/**
2189 * amdgpu_vm_bo_invalidate - mark the bo as invalid
2190 *
2191 * @adev: amdgpu_device pointer
2192 * @vm: requested vm
2193 * @bo: amdgpu buffer object
2194 *
8843dbbb 2195 * Mark @bo as invalid.
d38ceaf9
AD
2196 */
2197void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
3f3333f8 2198 struct amdgpu_bo *bo, bool evicted)
d38ceaf9 2199{
ec681545
CK
2200 struct amdgpu_vm_bo_base *bo_base;
2201
2202 list_for_each_entry(bo_base, &bo->va, bo_list) {
3f3333f8
CK
2203 struct amdgpu_vm *vm = bo_base->vm;
2204
3d7d4d3a 2205 bo_base->moved = true;
3f3333f8
CK
2206 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
2207 spin_lock(&bo_base->vm->status_lock);
73fb16e7
CK
2208 if (bo->tbo.type == ttm_bo_type_kernel)
2209 list_move(&bo_base->vm_status, &vm->evicted);
2210 else
2211 list_move_tail(&bo_base->vm_status,
2212 &vm->evicted);
3f3333f8
CK
2213 spin_unlock(&bo_base->vm->status_lock);
2214 continue;
2215 }
2216
ea09729c
CK
2217 if (bo->tbo.type == ttm_bo_type_kernel) {
2218 spin_lock(&bo_base->vm->status_lock);
2219 if (list_empty(&bo_base->vm_status))
2220 list_add(&bo_base->vm_status, &vm->relocated);
2221 spin_unlock(&bo_base->vm->status_lock);
3f3333f8 2222 continue;
ea09729c 2223 }
3f3333f8 2224
ec681545
CK
2225 spin_lock(&bo_base->vm->status_lock);
2226 if (list_empty(&bo_base->vm_status))
481c2e94 2227 list_add(&bo_base->vm_status, &vm->moved);
ec681545 2228 spin_unlock(&bo_base->vm->status_lock);
d38ceaf9
AD
2229 }
2230}
2231
bab4fee7
JZ
2232static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2233{
2234 /* Total bits covered by PD + PTs */
2235 unsigned bits = ilog2(vm_size) + 18;
2236
2237 /* Make sure the PD is 4K in size up to 8GB address space.
2238 Above that split equal between PD and PTs */
2239 if (vm_size <= 8)
2240 return (bits - 9);
2241 else
2242 return ((bits + 3) / 2);
2243}
2244
d07f14be
RH
2245/**
2246 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
bab4fee7
JZ
2247 *
2248 * @adev: amdgpu_device pointer
2249 * @vm_size: the default vm size if it's set auto
2250 */
fdd5faaa 2251void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
f3368128
CK
2252 uint32_t fragment_size_default, unsigned max_level,
2253 unsigned max_bits)
bab4fee7 2254{
36539dce
CK
2255 uint64_t tmp;
2256
2257 /* adjust vm size first */
f3368128
CK
2258 if (amdgpu_vm_size != -1) {
2259 unsigned max_size = 1 << (max_bits - 30);
2260
fdd5faaa 2261 vm_size = amdgpu_vm_size;
f3368128
CK
2262 if (vm_size > max_size) {
2263 dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2264 amdgpu_vm_size, max_size);
2265 vm_size = max_size;
2266 }
2267 }
fdd5faaa
CK
2268
2269 adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
36539dce
CK
2270
2271 tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
97489129
CK
2272 if (amdgpu_vm_block_size != -1)
2273 tmp >>= amdgpu_vm_block_size - 9;
36539dce
CK
2274 tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
2275 adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
196f7489
CZ
2276 switch (adev->vm_manager.num_level) {
2277 case 3:
2278 adev->vm_manager.root_level = AMDGPU_VM_PDB2;
2279 break;
2280 case 2:
2281 adev->vm_manager.root_level = AMDGPU_VM_PDB1;
2282 break;
2283 case 1:
2284 adev->vm_manager.root_level = AMDGPU_VM_PDB0;
2285 break;
2286 default:
2287 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
2288 }
b38f41eb 2289 /* block size depends on vm size and hw setup*/
97489129 2290 if (amdgpu_vm_block_size != -1)
bab4fee7 2291 adev->vm_manager.block_size =
97489129
CK
2292 min((unsigned)amdgpu_vm_block_size, max_bits
2293 - AMDGPU_GPU_PAGE_SHIFT
2294 - 9 * adev->vm_manager.num_level);
2295 else if (adev->vm_manager.num_level > 1)
2296 adev->vm_manager.block_size = 9;
bab4fee7 2297 else
97489129 2298 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
bab4fee7 2299
b38f41eb
CK
2300 if (amdgpu_vm_fragment_size == -1)
2301 adev->vm_manager.fragment_size = fragment_size_default;
2302 else
2303 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
d07f14be 2304
36539dce
CK
2305 DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2306 vm_size, adev->vm_manager.num_level + 1,
2307 adev->vm_manager.block_size,
fdd5faaa 2308 adev->vm_manager.fragment_size);
bab4fee7
JZ
2309}
2310
d38ceaf9
AD
2311/**
2312 * amdgpu_vm_init - initialize a vm instance
2313 *
2314 * @adev: amdgpu_device pointer
2315 * @vm: requested vm
9a4b7d4c 2316 * @vm_context: Indicates if it GFX or Compute context
d38ceaf9 2317 *
8843dbbb 2318 * Init @vm fields.
d38ceaf9 2319 */
9a4b7d4c 2320int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
02208441 2321 int vm_context, unsigned int pasid)
d38ceaf9
AD
2322{
2323 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
36b32a68 2324 AMDGPU_VM_PTE_COUNT(adev) * 8);
2d55e45a
CK
2325 unsigned ring_instance;
2326 struct amdgpu_ring *ring;
1b1f42d8 2327 struct drm_sched_rq *rq;
d3aab672 2328 unsigned long size;
13307f7e 2329 uint64_t flags;
36bbf3bf 2330 int r, i;
d38ceaf9 2331
f808c13f 2332 vm->va = RB_ROOT_CACHED;
36bbf3bf
CZ
2333 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2334 vm->reserved_vmid[i] = NULL;
d38ceaf9 2335 spin_lock_init(&vm->status_lock);
3f3333f8 2336 INIT_LIST_HEAD(&vm->evicted);
ea09729c 2337 INIT_LIST_HEAD(&vm->relocated);
27c7b9ae 2338 INIT_LIST_HEAD(&vm->moved);
d38ceaf9 2339 INIT_LIST_HEAD(&vm->freed);
20250215 2340
2bd9ccfa 2341 /* create scheduler entity for page table updates */
2d55e45a
CK
2342
2343 ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
2344 ring_instance %= adev->vm_manager.vm_pte_num_rings;
2345 ring = adev->vm_manager.vm_pte_rings[ring_instance];
1b1f42d8
LS
2346 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
2347 r = drm_sched_entity_init(&ring->sched, &vm->entity,
b3eebe3d 2348 rq, amdgpu_sched_jobs, NULL);
2bd9ccfa 2349 if (r)
f566ceb1 2350 return r;
2bd9ccfa 2351
51ac7eec
YZ
2352 vm->pte_support_ats = false;
2353
2354 if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
9a4b7d4c
HK
2355 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2356 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
51ac7eec 2357
13307f7e 2358 if (adev->asic_type == CHIP_RAVEN)
51ac7eec 2359 vm->pte_support_ats = true;
13307f7e 2360 } else {
9a4b7d4c
HK
2361 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2362 AMDGPU_VM_USE_CPU_FOR_GFX);
13307f7e 2363 }
9a4b7d4c
HK
2364 DRM_DEBUG_DRIVER("VM update mode is %s\n",
2365 vm->use_cpu_for_update ? "CPU" : "SDMA");
2366 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2367 "CPU update of VM recommended only for large BAR system\n");
d5884513 2368 vm->last_update = NULL;
05906dec 2369
13307f7e 2370 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
3c824172
HK
2371 if (vm->use_cpu_for_update)
2372 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2373 else
2374 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
2375 AMDGPU_GEM_CREATE_SHADOW);
2376
d3aab672
CK
2377 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2378 r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM,
8febe617 2379 flags, NULL, NULL, &vm->root.base.bo);
d38ceaf9 2380 if (r)
2bd9ccfa
CK
2381 goto error_free_sched_entity;
2382
d3aab672
CK
2383 r = amdgpu_bo_reserve(vm->root.base.bo, true);
2384 if (r)
2385 goto error_free_root;
2386
13307f7e 2387 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
4584312d
CK
2388 adev->vm_manager.root_level,
2389 vm->pte_support_ats);
13307f7e
CK
2390 if (r)
2391 goto error_unreserve;
2392
3f3333f8
CK
2393 vm->root.base.vm = vm;
2394 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
d3aab672
CK
2395 list_add_tail(&vm->root.base.vm_status, &vm->evicted);
2396 amdgpu_bo_unreserve(vm->root.base.bo);
d38ceaf9 2397
02208441
FK
2398 if (pasid) {
2399 unsigned long flags;
2400
2401 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2402 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
2403 GFP_ATOMIC);
2404 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2405 if (r < 0)
2406 goto error_free_root;
2407
2408 vm->pasid = pasid;
0a096fb6
CK
2409 }
2410
a2f14820 2411 INIT_KFIFO(vm->faults);
c98171cc 2412 vm->fault_credit = 16;
d38ceaf9
AD
2413
2414 return 0;
2bd9ccfa 2415
13307f7e
CK
2416error_unreserve:
2417 amdgpu_bo_unreserve(vm->root.base.bo);
2418
67003a15 2419error_free_root:
3f3333f8
CK
2420 amdgpu_bo_unref(&vm->root.base.bo->shadow);
2421 amdgpu_bo_unref(&vm->root.base.bo);
2422 vm->root.base.bo = NULL;
2bd9ccfa
CK
2423
2424error_free_sched_entity:
1b1f42d8 2425 drm_sched_entity_fini(&ring->sched, &vm->entity);
2bd9ccfa
CK
2426
2427 return r;
d38ceaf9
AD
2428}
2429
f566ceb1
CK
2430/**
2431 * amdgpu_vm_free_levels - free PD/PT levels
2432 *
8f19cd78
CK
2433 * @adev: amdgpu device structure
2434 * @parent: PD/PT starting level to free
2435 * @level: level of parent structure
f566ceb1
CK
2436 *
2437 * Free the page directory or page table level and all sub levels.
2438 */
8f19cd78
CK
2439static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
2440 struct amdgpu_vm_pt *parent,
2441 unsigned level)
f566ceb1 2442{
8f19cd78 2443 unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
f566ceb1 2444
8f19cd78
CK
2445 if (parent->base.bo) {
2446 list_del(&parent->base.bo_list);
2447 list_del(&parent->base.vm_status);
2448 amdgpu_bo_unref(&parent->base.bo->shadow);
2449 amdgpu_bo_unref(&parent->base.bo);
f566ceb1
CK
2450 }
2451
8f19cd78
CK
2452 if (parent->entries)
2453 for (i = 0; i < num_entries; i++)
2454 amdgpu_vm_free_levels(adev, &parent->entries[i],
2455 level + 1);
f566ceb1 2456
8f19cd78 2457 kvfree(parent->entries);
f566ceb1
CK
2458}
2459
d38ceaf9
AD
2460/**
2461 * amdgpu_vm_fini - tear down a vm instance
2462 *
2463 * @adev: amdgpu_device pointer
2464 * @vm: requested vm
2465 *
8843dbbb 2466 * Tear down @vm.
d38ceaf9
AD
2467 * Unbind the VM and remove all bos from the vm bo list
2468 */
2469void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2470{
2471 struct amdgpu_bo_va_mapping *mapping, *tmp;
132f34e4 2472 bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2642cf11 2473 struct amdgpu_bo *root;
a2f14820 2474 u64 fault;
2642cf11 2475 int i, r;
d38ceaf9 2476
a2f14820
FK
2477 /* Clear pending page faults from IH when the VM is destroyed */
2478 while (kfifo_get(&vm->faults, &fault))
2479 amdgpu_ih_clear_fault(adev, fault);
2480
02208441
FK
2481 if (vm->pasid) {
2482 unsigned long flags;
2483
2484 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2485 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2486 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2487 }
2488
1b1f42d8 2489 drm_sched_entity_fini(vm->entity.sched, &vm->entity);
2bd9ccfa 2490
f808c13f 2491 if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
d38ceaf9
AD
2492 dev_err(adev->dev, "still active bo inside vm\n");
2493 }
f808c13f
DB
2494 rbtree_postorder_for_each_entry_safe(mapping, tmp,
2495 &vm->va.rb_root, rb) {
d38ceaf9 2496 list_del(&mapping->list);
a9f87f64 2497 amdgpu_vm_it_remove(mapping, &vm->va);
d38ceaf9
AD
2498 kfree(mapping);
2499 }
2500 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
4388fc2a 2501 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
451bc8eb 2502 amdgpu_vm_prt_fini(adev, vm);
4388fc2a 2503 prt_fini_needed = false;
451bc8eb 2504 }
284710fa 2505
d38ceaf9 2506 list_del(&mapping->list);
451bc8eb 2507 amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
d38ceaf9
AD
2508 }
2509
2642cf11
CK
2510 root = amdgpu_bo_ref(vm->root.base.bo);
2511 r = amdgpu_bo_reserve(root, true);
2512 if (r) {
2513 dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
2514 } else {
196f7489
CZ
2515 amdgpu_vm_free_levels(adev, &vm->root,
2516 adev->vm_manager.root_level);
2642cf11
CK
2517 amdgpu_bo_unreserve(root);
2518 }
2519 amdgpu_bo_unref(&root);
d5884513 2520 dma_fence_put(vm->last_update);
1e9ef26f 2521 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
620f774f 2522 amdgpu_vmid_free_reserved(adev, vm, i);
d38ceaf9 2523}
ea89f8c9 2524
c98171cc
FK
2525/**
2526 * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
2527 *
2528 * @adev: amdgpu_device pointer
2529 * @pasid: PASID do identify the VM
2530 *
2531 * This function is expected to be called in interrupt context. Returns
2532 * true if there was fault credit, false otherwise
2533 */
2534bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
2535 unsigned int pasid)
2536{
2537 struct amdgpu_vm *vm;
2538
2539 spin_lock(&adev->vm_manager.pasid_lock);
2540 vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
d958939a 2541 if (!vm) {
c98171cc 2542 /* VM not found, can't track fault credit */
d958939a 2543 spin_unlock(&adev->vm_manager.pasid_lock);
c98171cc 2544 return true;
d958939a 2545 }
c98171cc
FK
2546
2547 /* No lock needed. only accessed by IRQ handler */
d958939a 2548 if (!vm->fault_credit) {
c98171cc 2549 /* Too many faults in this VM */
d958939a 2550 spin_unlock(&adev->vm_manager.pasid_lock);
c98171cc 2551 return false;
d958939a 2552 }
c98171cc
FK
2553
2554 vm->fault_credit--;
d958939a 2555 spin_unlock(&adev->vm_manager.pasid_lock);
c98171cc
FK
2556 return true;
2557}
2558
a9a78b32
CK
2559/**
2560 * amdgpu_vm_manager_init - init the VM manager
2561 *
2562 * @adev: amdgpu_device pointer
2563 *
2564 * Initialize the VM manager structures
2565 */
2566void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2567{
620f774f 2568 unsigned i;
a9a78b32 2569
620f774f 2570 amdgpu_vmid_mgr_init(adev);
2d55e45a 2571
f54d1867
CW
2572 adev->vm_manager.fence_context =
2573 dma_fence_context_alloc(AMDGPU_MAX_RINGS);
1fbb2e92
CK
2574 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
2575 adev->vm_manager.seqno[i] = 0;
2576
2d55e45a 2577 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
284710fa 2578 spin_lock_init(&adev->vm_manager.prt_lock);
451bc8eb 2579 atomic_set(&adev->vm_manager.num_prt_users, 0);
9a4b7d4c
HK
2580
2581 /* If not overridden by the user, by default, only in large BAR systems
2582 * Compute VM tables will be updated by CPU
2583 */
2584#ifdef CONFIG_X86_64
2585 if (amdgpu_vm_update_mode == -1) {
2586 if (amdgpu_vm_is_large_bar(adev))
2587 adev->vm_manager.vm_update_mode =
2588 AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2589 else
2590 adev->vm_manager.vm_update_mode = 0;
2591 } else
2592 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2593#else
2594 adev->vm_manager.vm_update_mode = 0;
2595#endif
2596
02208441
FK
2597 idr_init(&adev->vm_manager.pasid_idr);
2598 spin_lock_init(&adev->vm_manager.pasid_lock);
a9a78b32
CK
2599}
2600
ea89f8c9
CK
2601/**
2602 * amdgpu_vm_manager_fini - cleanup VM manager
2603 *
2604 * @adev: amdgpu_device pointer
2605 *
2606 * Cleanup the VM manager and free resources.
2607 */
2608void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2609{
02208441
FK
2610 WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
2611 idr_destroy(&adev->vm_manager.pasid_idr);
2612
620f774f 2613 amdgpu_vmid_mgr_fini(adev);
ea89f8c9 2614}
cfbcacf4
CZ
2615
2616int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2617{
2618 union drm_amdgpu_vm *args = data;
1e9ef26f
CZ
2619 struct amdgpu_device *adev = dev->dev_private;
2620 struct amdgpu_fpriv *fpriv = filp->driver_priv;
2621 int r;
cfbcacf4
CZ
2622
2623 switch (args->in.op) {
2624 case AMDGPU_VM_OP_RESERVE_VMID:
1e9ef26f 2625 /* current, we only have requirement to reserve vmid from gfxhub */
620f774f 2626 r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
1e9ef26f
CZ
2627 if (r)
2628 return r;
2629 break;
cfbcacf4 2630 case AMDGPU_VM_OP_UNRESERVE_VMID:
620f774f 2631 amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
cfbcacf4
CZ
2632 break;
2633 default:
2634 return -EINVAL;
2635 }
2636
2637 return 0;
2638}