2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
24 * Authors: Dave Airlie
28 #include <linux/dma-fence-array.h>
29 #include <linux/interval_tree_generic.h>
31 #include <drm/amdgpu_drm.h>
33 #include "amdgpu_trace.h"
37 * GPUVM is similar to the legacy gart on older asics, however
38 * rather than there being a single global gart table
39 * for the entire GPU, there are multiple VM page tables active
40 * at any given time. The VM page tables can contain a mix
41 * vram pages and system memory pages and system memory pages
42 * can be mapped as snooped (cached system pages) or unsnooped
43 * (uncached system pages).
44 * Each VM has an ID associated with it and there is a page table
45 * associated with each VMID. When execting a command buffer,
46 * the kernel tells the the ring what VMID to use for that command
47 * buffer. VMIDs are allocated dynamically as commands are submitted.
48 * The userspace drivers maintain their own address space and the kernel
49 * sets up their pages tables accordingly when they submit their
50 * command buffers and a VMID is assigned.
51 * Cayman/Trinity support up to 8 active VMs at any given time;
55 #define START(node) ((node)->start)
56 #define LAST(node) ((node)->last)
58 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping
, rb
, uint64_t, __subtree_last
,
59 START
, LAST
, static, amdgpu_vm_it
)
64 /* Local structure. Encapsulate some VM table update parameters to reduce
65 * the number of function parameters
67 struct amdgpu_pte_update_params
{
68 /* amdgpu device we do this update for */
69 struct amdgpu_device
*adev
;
70 /* optional amdgpu_vm we do this update for */
72 /* address where to copy page table entries from */
74 /* indirect buffer to fill with commands */
76 /* Function which actually does the update */
77 void (*func
)(struct amdgpu_pte_update_params
*params
, uint64_t pe
,
78 uint64_t addr
, unsigned count
, uint32_t incr
,
80 /* The next two are used during VM update by CPU
81 * DMA addresses to use for mapping
82 * Kernel pointer of PD/PT BO that needs to be updated
84 dma_addr_t
*pages_addr
;
88 /* Helper to disable partial resident texture feature from a fence callback */
89 struct amdgpu_prt_cb
{
90 struct amdgpu_device
*adev
;
91 struct dma_fence_cb cb
;
95 * amdgpu_vm_num_entries - return the number of entries in a PD/PT
97 * @adev: amdgpu_device pointer
99 * Calculate the number of entries in a page directory or page table.
101 static unsigned amdgpu_vm_num_entries(struct amdgpu_device
*adev
,
105 /* For the root directory */
106 return adev
->vm_manager
.max_pfn
>>
107 (adev
->vm_manager
.block_size
*
108 adev
->vm_manager
.num_level
);
109 else if (level
== adev
->vm_manager
.num_level
)
110 /* For the page tables on the leaves */
111 return AMDGPU_VM_PTE_COUNT(adev
);
113 /* Everything in between */
114 return 1 << adev
->vm_manager
.block_size
;
118 * amdgpu_vm_bo_size - returns the size of the BOs in bytes
120 * @adev: amdgpu_device pointer
122 * Calculate the size of the BO for a page directory or page table in bytes.
124 static unsigned amdgpu_vm_bo_size(struct amdgpu_device
*adev
, unsigned level
)
126 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev
, level
) * 8);
130 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
132 * @vm: vm providing the BOs
133 * @validated: head of validation list
134 * @entry: entry to add
136 * Add the page directory to the list of BOs to
137 * validate for command submission.
139 void amdgpu_vm_get_pd_bo(struct amdgpu_vm
*vm
,
140 struct list_head
*validated
,
141 struct amdgpu_bo_list_entry
*entry
)
143 entry
->robj
= vm
->root
.bo
;
145 entry
->tv
.bo
= &entry
->robj
->tbo
;
146 entry
->tv
.shared
= true;
147 entry
->user_pages
= NULL
;
148 list_add(&entry
->tv
.head
, validated
);
152 * amdgpu_vm_validate_layer - validate a single page table level
154 * @parent: parent page table level
155 * @validate: callback to do the validation
156 * @param: parameter for the validation callback
158 * Validate the page table BOs on command submission if neccessary.
160 static int amdgpu_vm_validate_level(struct amdgpu_vm_pt
*parent
,
161 int (*validate
)(void *, struct amdgpu_bo
*),
162 void *param
, bool use_cpu_for_update
,
163 struct ttm_bo_global
*glob
)
168 if (parent
->bo
->shadow
) {
169 struct amdgpu_bo
*shadow
= parent
->bo
->shadow
;
171 r
= amdgpu_ttm_bind(&shadow
->tbo
, &shadow
->tbo
.mem
);
176 if (use_cpu_for_update
) {
177 r
= amdgpu_bo_kmap(parent
->bo
, NULL
);
182 if (!parent
->entries
)
185 for (i
= 0; i
<= parent
->last_entry_used
; ++i
) {
186 struct amdgpu_vm_pt
*entry
= &parent
->entries
[i
];
191 r
= validate(param
, entry
->bo
);
195 spin_lock(&glob
->lru_lock
);
196 ttm_bo_move_to_lru_tail(&entry
->bo
->tbo
);
197 if (entry
->bo
->shadow
)
198 ttm_bo_move_to_lru_tail(&entry
->bo
->shadow
->tbo
);
199 spin_unlock(&glob
->lru_lock
);
202 * Recurse into the sub directory. This is harmless because we
203 * have only a maximum of 5 layers.
205 r
= amdgpu_vm_validate_level(entry
, validate
, param
,
206 use_cpu_for_update
, glob
);
215 * amdgpu_vm_validate_pt_bos - validate the page table BOs
217 * @adev: amdgpu device pointer
218 * @vm: vm providing the BOs
219 * @validate: callback to do the validation
220 * @param: parameter for the validation callback
222 * Validate the page table BOs on command submission if neccessary.
224 int amdgpu_vm_validate_pt_bos(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
,
225 int (*validate
)(void *p
, struct amdgpu_bo
*bo
),
228 uint64_t num_evictions
;
230 /* We only need to validate the page tables
231 * if they aren't already valid.
233 num_evictions
= atomic64_read(&adev
->num_evictions
);
234 if (num_evictions
== vm
->last_eviction_counter
)
237 return amdgpu_vm_validate_level(&vm
->root
, validate
, param
,
238 vm
->use_cpu_for_update
,
239 adev
->mman
.bdev
.glob
);
243 * amdgpu_vm_alloc_levels - allocate the PD/PT levels
245 * @adev: amdgpu_device pointer
247 * @saddr: start of the address range
248 * @eaddr: end of the address range
250 * Make sure the page directories and page tables are allocated
252 static int amdgpu_vm_alloc_levels(struct amdgpu_device
*adev
,
253 struct amdgpu_vm
*vm
,
254 struct amdgpu_vm_pt
*parent
,
255 uint64_t saddr
, uint64_t eaddr
,
258 unsigned shift
= (adev
->vm_manager
.num_level
- level
) *
259 adev
->vm_manager
.block_size
;
260 unsigned pt_idx
, from
, to
;
263 uint64_t init_value
= 0;
265 if (!parent
->entries
) {
266 unsigned num_entries
= amdgpu_vm_num_entries(adev
, level
);
268 parent
->entries
= kvmalloc_array(num_entries
,
269 sizeof(struct amdgpu_vm_pt
),
270 GFP_KERNEL
| __GFP_ZERO
);
271 if (!parent
->entries
)
273 memset(parent
->entries
, 0 , sizeof(struct amdgpu_vm_pt
));
276 from
= saddr
>> shift
;
278 if (from
>= amdgpu_vm_num_entries(adev
, level
) ||
279 to
>= amdgpu_vm_num_entries(adev
, level
))
282 if (to
> parent
->last_entry_used
)
283 parent
->last_entry_used
= to
;
286 saddr
= saddr
& ((1 << shift
) - 1);
287 eaddr
= eaddr
& ((1 << shift
) - 1);
289 flags
= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
|
290 AMDGPU_GEM_CREATE_VRAM_CLEARED
;
291 if (vm
->use_cpu_for_update
)
292 flags
|= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
;
294 flags
|= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS
|
295 AMDGPU_GEM_CREATE_SHADOW
);
297 if (vm
->pte_support_ats
) {
298 init_value
= AMDGPU_PTE_SYSTEM
;
299 if (level
!= adev
->vm_manager
.num_level
- 1)
300 init_value
|= AMDGPU_PDE_PTE
;
303 /* walk over the address space and allocate the page tables */
304 for (pt_idx
= from
; pt_idx
<= to
; ++pt_idx
) {
305 struct reservation_object
*resv
= vm
->root
.bo
->tbo
.resv
;
306 struct amdgpu_vm_pt
*entry
= &parent
->entries
[pt_idx
];
307 struct amdgpu_bo
*pt
;
310 r
= amdgpu_bo_create(adev
,
311 amdgpu_vm_bo_size(adev
, level
),
312 AMDGPU_GPU_PAGE_SIZE
, true,
313 AMDGPU_GEM_DOMAIN_VRAM
,
315 NULL
, resv
, init_value
, &pt
);
319 if (vm
->use_cpu_for_update
) {
320 r
= amdgpu_bo_kmap(pt
, NULL
);
322 amdgpu_bo_unref(&pt
);
327 /* Keep a reference to the root directory to avoid
328 * freeing them up in the wrong order.
330 pt
->parent
= amdgpu_bo_ref(vm
->root
.bo
);
336 if (level
< adev
->vm_manager
.num_level
) {
337 uint64_t sub_saddr
= (pt_idx
== from
) ? saddr
: 0;
338 uint64_t sub_eaddr
= (pt_idx
== to
) ? eaddr
:
340 r
= amdgpu_vm_alloc_levels(adev
, vm
, entry
, sub_saddr
,
351 * amdgpu_vm_alloc_pts - Allocate page tables.
353 * @adev: amdgpu_device pointer
354 * @vm: VM to allocate page tables for
355 * @saddr: Start address which needs to be allocated
356 * @size: Size from start address we need.
358 * Make sure the page tables are allocated.
360 int amdgpu_vm_alloc_pts(struct amdgpu_device
*adev
,
361 struct amdgpu_vm
*vm
,
362 uint64_t saddr
, uint64_t size
)
367 /* validate the parameters */
368 if (saddr
& AMDGPU_GPU_PAGE_MASK
|| size
& AMDGPU_GPU_PAGE_MASK
)
371 eaddr
= saddr
+ size
- 1;
372 last_pfn
= eaddr
/ AMDGPU_GPU_PAGE_SIZE
;
373 if (last_pfn
>= adev
->vm_manager
.max_pfn
) {
374 dev_err(adev
->dev
, "va above limit (0x%08llX >= 0x%08llX)\n",
375 last_pfn
, adev
->vm_manager
.max_pfn
);
379 saddr
/= AMDGPU_GPU_PAGE_SIZE
;
380 eaddr
/= AMDGPU_GPU_PAGE_SIZE
;
382 return amdgpu_vm_alloc_levels(adev
, vm
, &vm
->root
, saddr
, eaddr
, 0);
386 * amdgpu_vm_had_gpu_reset - check if reset occured since last use
388 * @adev: amdgpu_device pointer
389 * @id: VMID structure
391 * Check if GPU reset occured since last use of the VMID.
393 static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device
*adev
,
394 struct amdgpu_vm_id
*id
)
396 return id
->current_gpu_reset_count
!=
397 atomic_read(&adev
->gpu_reset_counter
);
400 static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm
*vm
, unsigned vmhub
)
402 return !!vm
->reserved_vmid
[vmhub
];
405 /* idr_mgr->lock must be held */
406 static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm
*vm
,
407 struct amdgpu_ring
*ring
,
408 struct amdgpu_sync
*sync
,
409 struct dma_fence
*fence
,
410 struct amdgpu_job
*job
)
412 struct amdgpu_device
*adev
= ring
->adev
;
413 unsigned vmhub
= ring
->funcs
->vmhub
;
414 uint64_t fence_context
= adev
->fence_context
+ ring
->idx
;
415 struct amdgpu_vm_id
*id
= vm
->reserved_vmid
[vmhub
];
416 struct amdgpu_vm_id_manager
*id_mgr
= &adev
->vm_manager
.id_mgr
[vmhub
];
417 struct dma_fence
*updates
= sync
->last_vm_update
;
419 struct dma_fence
*flushed
, *tmp
;
420 bool needs_flush
= vm
->use_cpu_for_update
;
422 flushed
= id
->flushed_updates
;
423 if ((amdgpu_vm_had_gpu_reset(adev
, id
)) ||
424 (atomic64_read(&id
->owner
) != vm
->client_id
) ||
425 (job
->vm_pd_addr
!= id
->pd_gpu_addr
) ||
426 (updates
&& (!flushed
|| updates
->context
!= flushed
->context
||
427 dma_fence_is_later(updates
, flushed
))) ||
428 (!id
->last_flush
|| (id
->last_flush
->context
!= fence_context
&&
429 !dma_fence_is_signaled(id
->last_flush
)))) {
431 /* to prevent one context starved by another context */
433 tmp
= amdgpu_sync_peek_fence(&id
->active
, ring
);
435 r
= amdgpu_sync_fence(adev
, sync
, tmp
);
440 /* Good we can use this VMID. Remember this submission as
443 r
= amdgpu_sync_fence(ring
->adev
, &id
->active
, fence
);
447 if (updates
&& (!flushed
|| updates
->context
!= flushed
->context
||
448 dma_fence_is_later(updates
, flushed
))) {
449 dma_fence_put(id
->flushed_updates
);
450 id
->flushed_updates
= dma_fence_get(updates
);
452 id
->pd_gpu_addr
= job
->vm_pd_addr
;
453 atomic64_set(&id
->owner
, vm
->client_id
);
454 job
->vm_needs_flush
= needs_flush
;
456 dma_fence_put(id
->last_flush
);
457 id
->last_flush
= NULL
;
459 job
->vm_id
= id
- id_mgr
->ids
;
460 trace_amdgpu_vm_grab_id(vm
, ring
, job
);
466 * amdgpu_vm_grab_id - allocate the next free VMID
468 * @vm: vm to allocate id for
469 * @ring: ring we want to submit job to
470 * @sync: sync object where we add dependencies
471 * @fence: fence protecting ID from reuse
473 * Allocate an id for the vm, adding fences to the sync obj as necessary.
475 int amdgpu_vm_grab_id(struct amdgpu_vm
*vm
, struct amdgpu_ring
*ring
,
476 struct amdgpu_sync
*sync
, struct dma_fence
*fence
,
477 struct amdgpu_job
*job
)
479 struct amdgpu_device
*adev
= ring
->adev
;
480 unsigned vmhub
= ring
->funcs
->vmhub
;
481 struct amdgpu_vm_id_manager
*id_mgr
= &adev
->vm_manager
.id_mgr
[vmhub
];
482 uint64_t fence_context
= adev
->fence_context
+ ring
->idx
;
483 struct dma_fence
*updates
= sync
->last_vm_update
;
484 struct amdgpu_vm_id
*id
, *idle
;
485 struct dma_fence
**fences
;
489 mutex_lock(&id_mgr
->lock
);
490 if (amdgpu_vm_reserved_vmid_ready(vm
, vmhub
)) {
491 r
= amdgpu_vm_grab_reserved_vmid_locked(vm
, ring
, sync
, fence
, job
);
492 mutex_unlock(&id_mgr
->lock
);
495 fences
= kmalloc_array(sizeof(void *), id_mgr
->num_ids
, GFP_KERNEL
);
497 mutex_unlock(&id_mgr
->lock
);
500 /* Check if we have an idle VMID */
502 list_for_each_entry(idle
, &id_mgr
->ids_lru
, list
) {
503 fences
[i
] = amdgpu_sync_peek_fence(&idle
->active
, ring
);
509 /* If we can't find a idle VMID to use, wait till one becomes available */
510 if (&idle
->list
== &id_mgr
->ids_lru
) {
511 u64 fence_context
= adev
->vm_manager
.fence_context
+ ring
->idx
;
512 unsigned seqno
= ++adev
->vm_manager
.seqno
[ring
->idx
];
513 struct dma_fence_array
*array
;
516 for (j
= 0; j
< i
; ++j
)
517 dma_fence_get(fences
[j
]);
519 array
= dma_fence_array_create(i
, fences
, fence_context
,
522 for (j
= 0; j
< i
; ++j
)
523 dma_fence_put(fences
[j
]);
530 r
= amdgpu_sync_fence(ring
->adev
, sync
, &array
->base
);
531 dma_fence_put(&array
->base
);
535 mutex_unlock(&id_mgr
->lock
);
541 job
->vm_needs_flush
= vm
->use_cpu_for_update
;
542 /* Check if we can use a VMID already assigned to this VM */
543 list_for_each_entry_reverse(id
, &id_mgr
->ids_lru
, list
) {
544 struct dma_fence
*flushed
;
545 bool needs_flush
= vm
->use_cpu_for_update
;
547 /* Check all the prerequisites to using this VMID */
548 if (amdgpu_vm_had_gpu_reset(adev
, id
))
551 if (atomic64_read(&id
->owner
) != vm
->client_id
)
554 if (job
->vm_pd_addr
!= id
->pd_gpu_addr
)
557 if (!id
->last_flush
||
558 (id
->last_flush
->context
!= fence_context
&&
559 !dma_fence_is_signaled(id
->last_flush
)))
562 flushed
= id
->flushed_updates
;
563 if (updates
&& (!flushed
|| dma_fence_is_later(updates
, flushed
)))
566 /* Concurrent flushes are only possible starting with Vega10 */
567 if (adev
->asic_type
< CHIP_VEGA10
&& needs_flush
)
570 /* Good we can use this VMID. Remember this submission as
573 r
= amdgpu_sync_fence(ring
->adev
, &id
->active
, fence
);
577 if (updates
&& (!flushed
|| dma_fence_is_later(updates
, flushed
))) {
578 dma_fence_put(id
->flushed_updates
);
579 id
->flushed_updates
= dma_fence_get(updates
);
585 goto no_flush_needed
;
589 /* Still no ID to use? Then use the idle one found earlier */
592 /* Remember this submission as user of the VMID */
593 r
= amdgpu_sync_fence(ring
->adev
, &id
->active
, fence
);
597 id
->pd_gpu_addr
= job
->vm_pd_addr
;
598 dma_fence_put(id
->flushed_updates
);
599 id
->flushed_updates
= dma_fence_get(updates
);
600 atomic64_set(&id
->owner
, vm
->client_id
);
603 job
->vm_needs_flush
= true;
604 dma_fence_put(id
->last_flush
);
605 id
->last_flush
= NULL
;
608 list_move_tail(&id
->list
, &id_mgr
->ids_lru
);
610 job
->vm_id
= id
- id_mgr
->ids
;
611 trace_amdgpu_vm_grab_id(vm
, ring
, job
);
614 mutex_unlock(&id_mgr
->lock
);
618 static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device
*adev
,
619 struct amdgpu_vm
*vm
,
622 struct amdgpu_vm_id_manager
*id_mgr
= &adev
->vm_manager
.id_mgr
[vmhub
];
624 mutex_lock(&id_mgr
->lock
);
625 if (vm
->reserved_vmid
[vmhub
]) {
626 list_add(&vm
->reserved_vmid
[vmhub
]->list
,
628 vm
->reserved_vmid
[vmhub
] = NULL
;
629 atomic_dec(&id_mgr
->reserved_vmid_num
);
631 mutex_unlock(&id_mgr
->lock
);
634 static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device
*adev
,
635 struct amdgpu_vm
*vm
,
638 struct amdgpu_vm_id_manager
*id_mgr
;
639 struct amdgpu_vm_id
*idle
;
642 id_mgr
= &adev
->vm_manager
.id_mgr
[vmhub
];
643 mutex_lock(&id_mgr
->lock
);
644 if (vm
->reserved_vmid
[vmhub
])
646 if (atomic_inc_return(&id_mgr
->reserved_vmid_num
) >
647 AMDGPU_VM_MAX_RESERVED_VMID
) {
648 DRM_ERROR("Over limitation of reserved vmid\n");
649 atomic_dec(&id_mgr
->reserved_vmid_num
);
653 /* Select the first entry VMID */
654 idle
= list_first_entry(&id_mgr
->ids_lru
, struct amdgpu_vm_id
, list
);
655 list_del_init(&idle
->list
);
656 vm
->reserved_vmid
[vmhub
] = idle
;
657 mutex_unlock(&id_mgr
->lock
);
661 mutex_unlock(&id_mgr
->lock
);
666 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
668 * @adev: amdgpu_device pointer
670 void amdgpu_vm_check_compute_bug(struct amdgpu_device
*adev
)
672 const struct amdgpu_ip_block
*ip_block
;
673 bool has_compute_vm_bug
;
674 struct amdgpu_ring
*ring
;
677 has_compute_vm_bug
= false;
679 ip_block
= amdgpu_get_ip_block(adev
, AMD_IP_BLOCK_TYPE_GFX
);
681 /* Compute has a VM bug for GFX version < 7.
682 Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
683 if (ip_block
->version
->major
<= 7)
684 has_compute_vm_bug
= true;
685 else if (ip_block
->version
->major
== 8)
686 if (adev
->gfx
.mec_fw_version
< 673)
687 has_compute_vm_bug
= true;
690 for (i
= 0; i
< adev
->num_rings
; i
++) {
691 ring
= adev
->rings
[i
];
692 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_COMPUTE
)
693 /* only compute rings */
694 ring
->has_compute_vm_bug
= has_compute_vm_bug
;
696 ring
->has_compute_vm_bug
= false;
700 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring
*ring
,
701 struct amdgpu_job
*job
)
703 struct amdgpu_device
*adev
= ring
->adev
;
704 unsigned vmhub
= ring
->funcs
->vmhub
;
705 struct amdgpu_vm_id_manager
*id_mgr
= &adev
->vm_manager
.id_mgr
[vmhub
];
706 struct amdgpu_vm_id
*id
;
707 bool gds_switch_needed
;
708 bool vm_flush_needed
= job
->vm_needs_flush
|| ring
->has_compute_vm_bug
;
712 id
= &id_mgr
->ids
[job
->vm_id
];
713 gds_switch_needed
= ring
->funcs
->emit_gds_switch
&& (
714 id
->gds_base
!= job
->gds_base
||
715 id
->gds_size
!= job
->gds_size
||
716 id
->gws_base
!= job
->gws_base
||
717 id
->gws_size
!= job
->gws_size
||
718 id
->oa_base
!= job
->oa_base
||
719 id
->oa_size
!= job
->oa_size
);
721 if (amdgpu_vm_had_gpu_reset(adev
, id
))
724 return vm_flush_needed
|| gds_switch_needed
;
727 static bool amdgpu_vm_is_large_bar(struct amdgpu_device
*adev
)
729 return (adev
->mc
.real_vram_size
== adev
->mc
.visible_vram_size
);
733 * amdgpu_vm_flush - hardware flush the vm
735 * @ring: ring to use for flush
736 * @vm_id: vmid number to use
737 * @pd_addr: address of the page directory
739 * Emit a VM flush when it is necessary.
741 int amdgpu_vm_flush(struct amdgpu_ring
*ring
, struct amdgpu_job
*job
, bool need_pipe_sync
)
743 struct amdgpu_device
*adev
= ring
->adev
;
744 unsigned vmhub
= ring
->funcs
->vmhub
;
745 struct amdgpu_vm_id_manager
*id_mgr
= &adev
->vm_manager
.id_mgr
[vmhub
];
746 struct amdgpu_vm_id
*id
= &id_mgr
->ids
[job
->vm_id
];
747 bool gds_switch_needed
= ring
->funcs
->emit_gds_switch
&& (
748 id
->gds_base
!= job
->gds_base
||
749 id
->gds_size
!= job
->gds_size
||
750 id
->gws_base
!= job
->gws_base
||
751 id
->gws_size
!= job
->gws_size
||
752 id
->oa_base
!= job
->oa_base
||
753 id
->oa_size
!= job
->oa_size
);
754 bool vm_flush_needed
= job
->vm_needs_flush
;
755 unsigned patch_offset
= 0;
758 if (amdgpu_vm_had_gpu_reset(adev
, id
)) {
759 gds_switch_needed
= true;
760 vm_flush_needed
= true;
763 if (!vm_flush_needed
&& !gds_switch_needed
&& !need_pipe_sync
)
766 if (ring
->funcs
->init_cond_exec
)
767 patch_offset
= amdgpu_ring_init_cond_exec(ring
);
770 amdgpu_ring_emit_pipeline_sync(ring
);
772 if (ring
->funcs
->emit_vm_flush
&& vm_flush_needed
) {
773 struct dma_fence
*fence
;
775 trace_amdgpu_vm_flush(ring
, job
->vm_id
, job
->vm_pd_addr
);
776 amdgpu_ring_emit_vm_flush(ring
, job
->vm_id
, job
->vm_pd_addr
);
778 r
= amdgpu_fence_emit(ring
, &fence
);
782 mutex_lock(&id_mgr
->lock
);
783 dma_fence_put(id
->last_flush
);
784 id
->last_flush
= fence
;
785 id
->current_gpu_reset_count
= atomic_read(&adev
->gpu_reset_counter
);
786 mutex_unlock(&id_mgr
->lock
);
789 if (ring
->funcs
->emit_gds_switch
&& gds_switch_needed
) {
790 id
->gds_base
= job
->gds_base
;
791 id
->gds_size
= job
->gds_size
;
792 id
->gws_base
= job
->gws_base
;
793 id
->gws_size
= job
->gws_size
;
794 id
->oa_base
= job
->oa_base
;
795 id
->oa_size
= job
->oa_size
;
796 amdgpu_ring_emit_gds_switch(ring
, job
->vm_id
, job
->gds_base
,
797 job
->gds_size
, job
->gws_base
,
798 job
->gws_size
, job
->oa_base
,
802 if (ring
->funcs
->patch_cond_exec
)
803 amdgpu_ring_patch_cond_exec(ring
, patch_offset
);
805 /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
806 if (ring
->funcs
->emit_switch_buffer
) {
807 amdgpu_ring_emit_switch_buffer(ring
);
808 amdgpu_ring_emit_switch_buffer(ring
);
814 * amdgpu_vm_reset_id - reset VMID to zero
816 * @adev: amdgpu device structure
817 * @vm_id: vmid number to use
819 * Reset saved GDW, GWS and OA to force switch on next flush.
821 void amdgpu_vm_reset_id(struct amdgpu_device
*adev
, unsigned vmhub
,
824 struct amdgpu_vm_id_manager
*id_mgr
= &adev
->vm_manager
.id_mgr
[vmhub
];
825 struct amdgpu_vm_id
*id
= &id_mgr
->ids
[vmid
];
827 atomic64_set(&id
->owner
, 0);
837 * amdgpu_vm_reset_all_id - reset VMID to zero
839 * @adev: amdgpu device structure
841 * Reset VMID to force flush on next use
843 void amdgpu_vm_reset_all_ids(struct amdgpu_device
*adev
)
847 for (i
= 0; i
< AMDGPU_MAX_VMHUBS
; ++i
) {
848 struct amdgpu_vm_id_manager
*id_mgr
=
849 &adev
->vm_manager
.id_mgr
[i
];
851 for (j
= 1; j
< id_mgr
->num_ids
; ++j
)
852 amdgpu_vm_reset_id(adev
, i
, j
);
857 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
860 * @bo: requested buffer object
862 * Find @bo inside the requested vm.
863 * Search inside the @bos vm list for the requested vm
864 * Returns the found bo_va or NULL if none is found
866 * Object has to be reserved!
868 struct amdgpu_bo_va
*amdgpu_vm_bo_find(struct amdgpu_vm
*vm
,
869 struct amdgpu_bo
*bo
)
871 struct amdgpu_bo_va
*bo_va
;
873 list_for_each_entry(bo_va
, &bo
->va
, base
.bo_list
) {
874 if (bo_va
->base
.vm
== vm
) {
882 * amdgpu_vm_do_set_ptes - helper to call the right asic function
884 * @params: see amdgpu_pte_update_params definition
885 * @pe: addr of the page entry
886 * @addr: dst addr to write into pe
887 * @count: number of page entries to update
888 * @incr: increase next addr by incr bytes
889 * @flags: hw access flags
891 * Traces the parameters and calls the right asic functions
892 * to setup the page table using the DMA.
894 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params
*params
,
895 uint64_t pe
, uint64_t addr
,
896 unsigned count
, uint32_t incr
,
899 trace_amdgpu_vm_set_ptes(pe
, addr
, count
, incr
, flags
);
902 amdgpu_vm_write_pte(params
->adev
, params
->ib
, pe
,
903 addr
| flags
, count
, incr
);
906 amdgpu_vm_set_pte_pde(params
->adev
, params
->ib
, pe
, addr
,
912 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
914 * @params: see amdgpu_pte_update_params definition
915 * @pe: addr of the page entry
916 * @addr: dst addr to write into pe
917 * @count: number of page entries to update
918 * @incr: increase next addr by incr bytes
919 * @flags: hw access flags
921 * Traces the parameters and calls the DMA function to copy the PTEs.
923 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params
*params
,
924 uint64_t pe
, uint64_t addr
,
925 unsigned count
, uint32_t incr
,
928 uint64_t src
= (params
->src
+ (addr
>> 12) * 8);
931 trace_amdgpu_vm_copy_ptes(pe
, src
, count
);
933 amdgpu_vm_copy_pte(params
->adev
, params
->ib
, pe
, src
, count
);
937 * amdgpu_vm_map_gart - Resolve gart mapping of addr
939 * @pages_addr: optional DMA address to use for lookup
940 * @addr: the unmapped addr
942 * Look up the physical address of the page that the pte resolves
943 * to and return the pointer for the page table entry.
945 static uint64_t amdgpu_vm_map_gart(const dma_addr_t
*pages_addr
, uint64_t addr
)
949 /* page table offset */
950 result
= pages_addr
[addr
>> PAGE_SHIFT
];
952 /* in case cpu page size != gpu page size*/
953 result
|= addr
& (~PAGE_MASK
);
955 result
&= 0xFFFFFFFFFFFFF000ULL
;
961 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
963 * @params: see amdgpu_pte_update_params definition
964 * @pe: kmap addr of the page entry
965 * @addr: dst addr to write into pe
966 * @count: number of page entries to update
967 * @incr: increase next addr by incr bytes
968 * @flags: hw access flags
970 * Write count number of PT/PD entries directly.
972 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params
*params
,
973 uint64_t pe
, uint64_t addr
,
974 unsigned count
, uint32_t incr
,
980 trace_amdgpu_vm_set_ptes(pe
, addr
, count
, incr
, flags
);
982 for (i
= 0; i
< count
; i
++) {
983 value
= params
->pages_addr
?
984 amdgpu_vm_map_gart(params
->pages_addr
, addr
) :
986 amdgpu_gart_set_pte_pde(params
->adev
, (void *)(uintptr_t)pe
,
992 static int amdgpu_vm_wait_pd(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
,
995 struct amdgpu_sync sync
;
998 amdgpu_sync_create(&sync
);
999 amdgpu_sync_resv(adev
, &sync
, vm
->root
.bo
->tbo
.resv
, owner
);
1000 r
= amdgpu_sync_wait(&sync
, true);
1001 amdgpu_sync_free(&sync
);
1007 * amdgpu_vm_update_level - update a single level in the hierarchy
1009 * @adev: amdgpu_device pointer
1011 * @parent: parent directory
1013 * Makes sure all entries in @parent are up to date.
1014 * Returns 0 for success, error for failure.
1016 static int amdgpu_vm_update_level(struct amdgpu_device
*adev
,
1017 struct amdgpu_vm
*vm
,
1018 struct amdgpu_vm_pt
*parent
,
1021 struct amdgpu_bo
*shadow
;
1022 struct amdgpu_ring
*ring
= NULL
;
1023 uint64_t pd_addr
, shadow_addr
= 0;
1024 uint32_t incr
= amdgpu_vm_bo_size(adev
, level
+ 1);
1025 uint64_t last_pde
= ~0, last_pt
= ~0, last_shadow
= ~0;
1026 unsigned count
= 0, pt_idx
, ndw
= 0;
1027 struct amdgpu_job
*job
;
1028 struct amdgpu_pte_update_params params
;
1029 struct dma_fence
*fence
= NULL
;
1033 if (!parent
->entries
)
1036 memset(¶ms
, 0, sizeof(params
));
1038 shadow
= parent
->bo
->shadow
;
1040 if (vm
->use_cpu_for_update
) {
1041 pd_addr
= (unsigned long)amdgpu_bo_kptr(parent
->bo
);
1042 r
= amdgpu_vm_wait_pd(adev
, vm
, AMDGPU_FENCE_OWNER_VM
);
1046 params
.func
= amdgpu_vm_cpu_set_ptes
;
1048 ring
= container_of(vm
->entity
.sched
, struct amdgpu_ring
,
1054 /* assume the worst case */
1055 ndw
+= parent
->last_entry_used
* 6;
1057 pd_addr
= amdgpu_bo_gpu_offset(parent
->bo
);
1060 shadow_addr
= amdgpu_bo_gpu_offset(shadow
);
1066 r
= amdgpu_job_alloc_with_ib(adev
, ndw
* 4, &job
);
1070 params
.ib
= &job
->ibs
[0];
1071 params
.func
= amdgpu_vm_do_set_ptes
;
1075 /* walk over the address space and update the directory */
1076 for (pt_idx
= 0; pt_idx
<= parent
->last_entry_used
; ++pt_idx
) {
1077 struct amdgpu_bo
*bo
= parent
->entries
[pt_idx
].bo
;
1083 pt
= amdgpu_bo_gpu_offset(bo
);
1084 pt
= amdgpu_gart_get_vm_pde(adev
, pt
);
1085 /* Don't update huge pages here */
1086 if ((parent
->entries
[pt_idx
].addr
& AMDGPU_PDE_PTE
) ||
1087 parent
->entries
[pt_idx
].addr
== (pt
| AMDGPU_PTE_VALID
))
1090 parent
->entries
[pt_idx
].addr
= pt
| AMDGPU_PTE_VALID
;
1092 pde
= pd_addr
+ pt_idx
* 8;
1093 if (((last_pde
+ 8 * count
) != pde
) ||
1094 ((last_pt
+ incr
* count
) != pt
) ||
1095 (count
== AMDGPU_VM_MAX_UPDATE_SIZE
)) {
1099 params
.func(¶ms
,
1105 params
.func(¶ms
, last_pde
,
1106 last_pt
, count
, incr
,
1112 last_shadow
= shadow_addr
+ pt_idx
* 8;
1120 if (vm
->root
.bo
->shadow
)
1121 params
.func(¶ms
, last_shadow
, last_pt
,
1122 count
, incr
, AMDGPU_PTE_VALID
);
1124 params
.func(¶ms
, last_pde
, last_pt
,
1125 count
, incr
, AMDGPU_PTE_VALID
);
1128 if (!vm
->use_cpu_for_update
) {
1129 if (params
.ib
->length_dw
== 0) {
1130 amdgpu_job_free(job
);
1132 amdgpu_ring_pad_ib(ring
, params
.ib
);
1133 amdgpu_sync_resv(adev
, &job
->sync
, parent
->bo
->tbo
.resv
,
1134 AMDGPU_FENCE_OWNER_VM
);
1136 amdgpu_sync_resv(adev
, &job
->sync
,
1138 AMDGPU_FENCE_OWNER_VM
);
1140 WARN_ON(params
.ib
->length_dw
> ndw
);
1141 r
= amdgpu_job_submit(job
, ring
, &vm
->entity
,
1142 AMDGPU_FENCE_OWNER_VM
, &fence
);
1146 amdgpu_bo_fence(parent
->bo
, fence
, true);
1147 dma_fence_put(vm
->last_dir_update
);
1148 vm
->last_dir_update
= dma_fence_get(fence
);
1149 dma_fence_put(fence
);
1153 * Recurse into the subdirectories. This recursion is harmless because
1154 * we only have a maximum of 5 layers.
1156 for (pt_idx
= 0; pt_idx
<= parent
->last_entry_used
; ++pt_idx
) {
1157 struct amdgpu_vm_pt
*entry
= &parent
->entries
[pt_idx
];
1162 r
= amdgpu_vm_update_level(adev
, vm
, entry
, level
+ 1);
1170 amdgpu_job_free(job
);
1175 * amdgpu_vm_invalidate_level - mark all PD levels as invalid
1177 * @parent: parent PD
1179 * Mark all PD level as invalid after an error.
1181 static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt
*parent
)
1186 * Recurse into the subdirectories. This recursion is harmless because
1187 * we only have a maximum of 5 layers.
1189 for (pt_idx
= 0; pt_idx
<= parent
->last_entry_used
; ++pt_idx
) {
1190 struct amdgpu_vm_pt
*entry
= &parent
->entries
[pt_idx
];
1195 entry
->addr
= ~0ULL;
1196 amdgpu_vm_invalidate_level(entry
);
1201 * amdgpu_vm_update_directories - make sure that all directories are valid
1203 * @adev: amdgpu_device pointer
1206 * Makes sure all directories are up to date.
1207 * Returns 0 for success, error for failure.
1209 int amdgpu_vm_update_directories(struct amdgpu_device
*adev
,
1210 struct amdgpu_vm
*vm
)
1214 r
= amdgpu_vm_update_level(adev
, vm
, &vm
->root
, 0);
1216 amdgpu_vm_invalidate_level(&vm
->root
);
1218 if (vm
->use_cpu_for_update
) {
1221 amdgpu_gart_flush_gpu_tlb(adev
, 0);
1228 * amdgpu_vm_find_entry - find the entry for an address
1230 * @p: see amdgpu_pte_update_params definition
1231 * @addr: virtual address in question
1232 * @entry: resulting entry or NULL
1233 * @parent: parent entry
1235 * Find the vm_pt entry and it's parent for the given address.
1237 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params
*p
, uint64_t addr
,
1238 struct amdgpu_vm_pt
**entry
,
1239 struct amdgpu_vm_pt
**parent
)
1241 unsigned idx
, level
= p
->adev
->vm_manager
.num_level
;
1244 *entry
= &p
->vm
->root
;
1245 while ((*entry
)->entries
) {
1246 idx
= addr
>> (p
->adev
->vm_manager
.block_size
* level
--);
1247 idx
%= amdgpu_bo_size((*entry
)->bo
) / 8;
1249 *entry
= &(*entry
)->entries
[idx
];
1257 * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
1259 * @p: see amdgpu_pte_update_params definition
1260 * @entry: vm_pt entry to check
1261 * @parent: parent entry
1262 * @nptes: number of PTEs updated with this operation
1263 * @dst: destination address where the PTEs should point to
1264 * @flags: access flags fro the PTEs
1266 * Check if we can update the PD with a huge page.
1268 static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params
*p
,
1269 struct amdgpu_vm_pt
*entry
,
1270 struct amdgpu_vm_pt
*parent
,
1271 unsigned nptes
, uint64_t dst
,
1274 bool use_cpu_update
= (p
->func
== amdgpu_vm_cpu_set_ptes
);
1275 uint64_t pd_addr
, pde
;
1277 /* In the case of a mixed PT the PDE must point to it*/
1278 if (p
->adev
->asic_type
< CHIP_VEGA10
||
1279 nptes
!= AMDGPU_VM_PTE_COUNT(p
->adev
) ||
1280 p
->func
== amdgpu_vm_do_copy_ptes
||
1281 !(flags
& AMDGPU_PTE_VALID
)) {
1283 dst
= amdgpu_bo_gpu_offset(entry
->bo
);
1284 dst
= amdgpu_gart_get_vm_pde(p
->adev
, dst
);
1285 flags
= AMDGPU_PTE_VALID
;
1287 /* Set the huge page flag to stop scanning at this PDE */
1288 flags
|= AMDGPU_PDE_PTE
;
1291 if (entry
->addr
== (dst
| flags
))
1294 entry
->addr
= (dst
| flags
);
1296 if (use_cpu_update
) {
1297 pd_addr
= (unsigned long)amdgpu_bo_kptr(parent
->bo
);
1298 pde
= pd_addr
+ (entry
- parent
->entries
) * 8;
1299 amdgpu_vm_cpu_set_ptes(p
, pde
, dst
, 1, 0, flags
);
1301 if (parent
->bo
->shadow
) {
1302 pd_addr
= amdgpu_bo_gpu_offset(parent
->bo
->shadow
);
1303 pde
= pd_addr
+ (entry
- parent
->entries
) * 8;
1304 amdgpu_vm_do_set_ptes(p
, pde
, dst
, 1, 0, flags
);
1306 pd_addr
= amdgpu_bo_gpu_offset(parent
->bo
);
1307 pde
= pd_addr
+ (entry
- parent
->entries
) * 8;
1308 amdgpu_vm_do_set_ptes(p
, pde
, dst
, 1, 0, flags
);
1313 * amdgpu_vm_update_ptes - make sure that page tables are valid
1315 * @params: see amdgpu_pte_update_params definition
1317 * @start: start of GPU address range
1318 * @end: end of GPU address range
1319 * @dst: destination address to map to, the next dst inside the function
1320 * @flags: mapping flags
1322 * Update the page tables in the range @start - @end.
1323 * Returns 0 for success, -EINVAL for failure.
1325 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params
*params
,
1326 uint64_t start
, uint64_t end
,
1327 uint64_t dst
, uint64_t flags
)
1329 struct amdgpu_device
*adev
= params
->adev
;
1330 const uint64_t mask
= AMDGPU_VM_PTE_COUNT(adev
) - 1;
1332 uint64_t addr
, pe_start
;
1333 struct amdgpu_bo
*pt
;
1335 bool use_cpu_update
= (params
->func
== amdgpu_vm_cpu_set_ptes
);
1337 /* walk over the address space and update the page tables */
1338 for (addr
= start
; addr
< end
; addr
+= nptes
,
1339 dst
+= nptes
* AMDGPU_GPU_PAGE_SIZE
) {
1340 struct amdgpu_vm_pt
*entry
, *parent
;
1342 amdgpu_vm_get_entry(params
, addr
, &entry
, &parent
);
1346 if ((addr
& ~mask
) == (end
& ~mask
))
1349 nptes
= AMDGPU_VM_PTE_COUNT(adev
) - (addr
& mask
);
1351 amdgpu_vm_handle_huge_pages(params
, entry
, parent
,
1353 /* We don't need to update PTEs for huge pages */
1354 if (entry
->addr
& AMDGPU_PDE_PTE
)
1358 if (use_cpu_update
) {
1359 pe_start
= (unsigned long)amdgpu_bo_kptr(pt
);
1362 pe_start
= amdgpu_bo_gpu_offset(pt
->shadow
);
1363 pe_start
+= (addr
& mask
) * 8;
1364 params
->func(params
, pe_start
, dst
, nptes
,
1365 AMDGPU_GPU_PAGE_SIZE
, flags
);
1367 pe_start
= amdgpu_bo_gpu_offset(pt
);
1370 pe_start
+= (addr
& mask
) * 8;
1371 params
->func(params
, pe_start
, dst
, nptes
,
1372 AMDGPU_GPU_PAGE_SIZE
, flags
);
1379 * amdgpu_vm_frag_ptes - add fragment information to PTEs
1381 * @params: see amdgpu_pte_update_params definition
1383 * @start: first PTE to handle
1384 * @end: last PTE to handle
1385 * @dst: addr those PTEs should point to
1386 * @flags: hw mapping flags
1387 * Returns 0 for success, -EINVAL for failure.
1389 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params
*params
,
1390 uint64_t start
, uint64_t end
,
1391 uint64_t dst
, uint64_t flags
)
1396 * The MC L1 TLB supports variable sized pages, based on a fragment
1397 * field in the PTE. When this field is set to a non-zero value, page
1398 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
1399 * flags are considered valid for all PTEs within the fragment range
1400 * and corresponding mappings are assumed to be physically contiguous.
1402 * The L1 TLB can store a single PTE for the whole fragment,
1403 * significantly increasing the space available for translation
1404 * caching. This leads to large improvements in throughput when the
1405 * TLB is under pressure.
1407 * The L2 TLB distributes small and large fragments into two
1408 * asymmetric partitions. The large fragment cache is significantly
1409 * larger. Thus, we try to use large fragments wherever possible.
1410 * Userspace can support this by aligning virtual base address and
1411 * allocation size to the fragment size.
1413 unsigned pages_per_frag
= params
->adev
->vm_manager
.fragment_size
;
1414 uint64_t frag_flags
= AMDGPU_PTE_FRAG(pages_per_frag
);
1415 uint64_t frag_align
= 1 << pages_per_frag
;
1417 uint64_t frag_start
= ALIGN(start
, frag_align
);
1418 uint64_t frag_end
= end
& ~(frag_align
- 1);
1420 /* system pages are non continuously */
1421 if (params
->src
|| !(flags
& AMDGPU_PTE_VALID
) ||
1422 (frag_start
>= frag_end
))
1423 return amdgpu_vm_update_ptes(params
, start
, end
, dst
, flags
);
1425 /* handle the 4K area at the beginning */
1426 if (start
!= frag_start
) {
1427 r
= amdgpu_vm_update_ptes(params
, start
, frag_start
,
1431 dst
+= (frag_start
- start
) * AMDGPU_GPU_PAGE_SIZE
;
1434 /* handle the area in the middle */
1435 r
= amdgpu_vm_update_ptes(params
, frag_start
, frag_end
, dst
,
1436 flags
| frag_flags
);
1440 /* handle the 4K area at the end */
1441 if (frag_end
!= end
) {
1442 dst
+= (frag_end
- frag_start
) * AMDGPU_GPU_PAGE_SIZE
;
1443 r
= amdgpu_vm_update_ptes(params
, frag_end
, end
, dst
, flags
);
1449 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
1451 * @adev: amdgpu_device pointer
1452 * @exclusive: fence we need to sync to
1453 * @src: address where to copy page table entries from
1454 * @pages_addr: DMA addresses to use for mapping
1456 * @start: start of mapped range
1457 * @last: last mapped entry
1458 * @flags: flags for the entries
1459 * @addr: addr to set the area to
1460 * @fence: optional resulting fence
1462 * Fill in the page table entries between @start and @last.
1463 * Returns 0 for success, -EINVAL for failure.
1465 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device
*adev
,
1466 struct dma_fence
*exclusive
,
1468 dma_addr_t
*pages_addr
,
1469 struct amdgpu_vm
*vm
,
1470 uint64_t start
, uint64_t last
,
1471 uint64_t flags
, uint64_t addr
,
1472 struct dma_fence
**fence
)
1474 struct amdgpu_ring
*ring
;
1475 void *owner
= AMDGPU_FENCE_OWNER_VM
;
1476 unsigned nptes
, ncmds
, ndw
;
1477 struct amdgpu_job
*job
;
1478 struct amdgpu_pte_update_params params
;
1479 struct dma_fence
*f
= NULL
;
1482 memset(¶ms
, 0, sizeof(params
));
1487 /* sync to everything on unmapping */
1488 if (!(flags
& AMDGPU_PTE_VALID
))
1489 owner
= AMDGPU_FENCE_OWNER_UNDEFINED
;
1491 if (vm
->use_cpu_for_update
) {
1492 /* params.src is used as flag to indicate system Memory */
1496 /* Wait for PT BOs to be free. PTs share the same resv. object
1499 r
= amdgpu_vm_wait_pd(adev
, vm
, owner
);
1503 params
.func
= amdgpu_vm_cpu_set_ptes
;
1504 params
.pages_addr
= pages_addr
;
1505 return amdgpu_vm_frag_ptes(¶ms
, start
, last
+ 1,
1509 ring
= container_of(vm
->entity
.sched
, struct amdgpu_ring
, sched
);
1511 nptes
= last
- start
+ 1;
1514 * reserve space for one command every (1 << BLOCK_SIZE)
1515 * entries or 2k dwords (whatever is smaller)
1517 ncmds
= (nptes
>> min(adev
->vm_manager
.block_size
, 11u)) + 1;
1522 /* one PDE write for each huge page */
1523 ndw
+= ((nptes
>> adev
->vm_manager
.block_size
) + 1) * 6;
1526 /* only copy commands needed */
1529 params
.func
= amdgpu_vm_do_copy_ptes
;
1531 } else if (pages_addr
) {
1532 /* copy commands needed */
1538 params
.func
= amdgpu_vm_do_copy_ptes
;
1541 /* set page commands needed */
1544 /* two extra commands for begin/end of fragment */
1547 params
.func
= amdgpu_vm_do_set_ptes
;
1550 r
= amdgpu_job_alloc_with_ib(adev
, ndw
* 4, &job
);
1554 params
.ib
= &job
->ibs
[0];
1556 if (!src
&& pages_addr
) {
1560 /* Put the PTEs at the end of the IB. */
1561 i
= ndw
- nptes
* 2;
1562 pte
= (uint64_t *)&(job
->ibs
->ptr
[i
]);
1563 params
.src
= job
->ibs
->gpu_addr
+ i
* 4;
1565 for (i
= 0; i
< nptes
; ++i
) {
1566 pte
[i
] = amdgpu_vm_map_gart(pages_addr
, addr
+ i
*
1567 AMDGPU_GPU_PAGE_SIZE
);
1573 r
= amdgpu_sync_fence(adev
, &job
->sync
, exclusive
);
1577 r
= amdgpu_sync_resv(adev
, &job
->sync
, vm
->root
.bo
->tbo
.resv
,
1582 r
= reservation_object_reserve_shared(vm
->root
.bo
->tbo
.resv
);
1586 r
= amdgpu_vm_frag_ptes(¶ms
, start
, last
+ 1, addr
, flags
);
1590 amdgpu_ring_pad_ib(ring
, params
.ib
);
1591 WARN_ON(params
.ib
->length_dw
> ndw
);
1592 r
= amdgpu_job_submit(job
, ring
, &vm
->entity
,
1593 AMDGPU_FENCE_OWNER_VM
, &f
);
1597 amdgpu_bo_fence(vm
->root
.bo
, f
, true);
1598 dma_fence_put(*fence
);
1603 amdgpu_job_free(job
);
1604 amdgpu_vm_invalidate_level(&vm
->root
);
1609 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
1611 * @adev: amdgpu_device pointer
1612 * @exclusive: fence we need to sync to
1613 * @gtt_flags: flags as they are used for GTT
1614 * @pages_addr: DMA addresses to use for mapping
1616 * @mapping: mapped range and flags to use for the update
1617 * @flags: HW flags for the mapping
1618 * @nodes: array of drm_mm_nodes with the MC addresses
1619 * @fence: optional resulting fence
1621 * Split the mapping into smaller chunks so that each update fits
1623 * Returns 0 for success, -EINVAL for failure.
1625 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device
*adev
,
1626 struct dma_fence
*exclusive
,
1628 dma_addr_t
*pages_addr
,
1629 struct amdgpu_vm
*vm
,
1630 struct amdgpu_bo_va_mapping
*mapping
,
1632 struct drm_mm_node
*nodes
,
1633 struct dma_fence
**fence
)
1635 uint64_t pfn
, src
= 0, start
= mapping
->start
;
1638 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1639 * but in case of something, we filter the flags in first place
1641 if (!(mapping
->flags
& AMDGPU_PTE_READABLE
))
1642 flags
&= ~AMDGPU_PTE_READABLE
;
1643 if (!(mapping
->flags
& AMDGPU_PTE_WRITEABLE
))
1644 flags
&= ~AMDGPU_PTE_WRITEABLE
;
1646 flags
&= ~AMDGPU_PTE_EXECUTABLE
;
1647 flags
|= mapping
->flags
& AMDGPU_PTE_EXECUTABLE
;
1649 flags
&= ~AMDGPU_PTE_MTYPE_MASK
;
1650 flags
|= (mapping
->flags
& AMDGPU_PTE_MTYPE_MASK
);
1652 if ((mapping
->flags
& AMDGPU_PTE_PRT
) &&
1653 (adev
->asic_type
>= CHIP_VEGA10
)) {
1654 flags
|= AMDGPU_PTE_PRT
;
1655 flags
&= ~AMDGPU_PTE_VALID
;
1658 trace_amdgpu_vm_bo_update(mapping
);
1660 pfn
= mapping
->offset
>> PAGE_SHIFT
;
1662 while (pfn
>= nodes
->size
) {
1669 uint64_t max_entries
;
1670 uint64_t addr
, last
;
1673 addr
= nodes
->start
<< PAGE_SHIFT
;
1674 max_entries
= (nodes
->size
- pfn
) *
1675 (PAGE_SIZE
/ AMDGPU_GPU_PAGE_SIZE
);
1678 max_entries
= S64_MAX
;
1682 if (flags
== gtt_flags
)
1683 src
= adev
->gart
.table_addr
+
1684 (addr
>> AMDGPU_GPU_PAGE_SHIFT
) * 8;
1686 max_entries
= min(max_entries
, 16ull * 1024ull);
1688 } else if (flags
& AMDGPU_PTE_VALID
) {
1689 addr
+= adev
->vm_manager
.vram_base_offset
;
1691 addr
+= pfn
<< PAGE_SHIFT
;
1693 last
= min((uint64_t)mapping
->last
, start
+ max_entries
- 1);
1694 r
= amdgpu_vm_bo_update_mapping(adev
, exclusive
,
1695 src
, pages_addr
, vm
,
1696 start
, last
, flags
, addr
,
1701 pfn
+= last
- start
+ 1;
1702 if (nodes
&& nodes
->size
== pfn
) {
1708 } while (unlikely(start
!= mapping
->last
+ 1));
1714 * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1716 * @adev: amdgpu_device pointer
1717 * @bo_va: requested BO and VM object
1718 * @clear: if true clear the entries
1720 * Fill in the page table entries for @bo_va.
1721 * Returns 0 for success, -EINVAL for failure.
1723 int amdgpu_vm_bo_update(struct amdgpu_device
*adev
,
1724 struct amdgpu_bo_va
*bo_va
,
1727 struct amdgpu_bo
*bo
= bo_va
->base
.bo
;
1728 struct amdgpu_vm
*vm
= bo_va
->base
.vm
;
1729 struct amdgpu_bo_va_mapping
*mapping
;
1730 dma_addr_t
*pages_addr
= NULL
;
1731 uint64_t gtt_flags
, flags
;
1732 struct ttm_mem_reg
*mem
;
1733 struct drm_mm_node
*nodes
;
1734 struct dma_fence
*exclusive
;
1737 if (clear
|| !bo_va
->base
.bo
) {
1742 struct ttm_dma_tt
*ttm
;
1744 mem
= &bo_va
->base
.bo
->tbo
.mem
;
1745 nodes
= mem
->mm_node
;
1746 if (mem
->mem_type
== TTM_PL_TT
) {
1747 ttm
= container_of(bo_va
->base
.bo
->tbo
.ttm
,
1748 struct ttm_dma_tt
, ttm
);
1749 pages_addr
= ttm
->dma_address
;
1751 exclusive
= reservation_object_get_excl(bo
->tbo
.resv
);
1755 flags
= amdgpu_ttm_tt_pte_flags(adev
, bo
->tbo
.ttm
, mem
);
1756 gtt_flags
= (amdgpu_ttm_is_bound(bo
->tbo
.ttm
) &&
1757 adev
== amdgpu_ttm_adev(bo
->tbo
.bdev
)) ?
1764 spin_lock(&vm
->status_lock
);
1765 if (!list_empty(&bo_va
->base
.vm_status
))
1766 list_splice_init(&bo_va
->valids
, &bo_va
->invalids
);
1767 spin_unlock(&vm
->status_lock
);
1769 list_for_each_entry(mapping
, &bo_va
->invalids
, list
) {
1770 r
= amdgpu_vm_bo_split_mapping(adev
, exclusive
,
1771 gtt_flags
, pages_addr
, vm
,
1772 mapping
, flags
, nodes
,
1773 &bo_va
->last_pt_update
);
1778 if (trace_amdgpu_vm_bo_mapping_enabled()) {
1779 list_for_each_entry(mapping
, &bo_va
->valids
, list
)
1780 trace_amdgpu_vm_bo_mapping(mapping
);
1782 list_for_each_entry(mapping
, &bo_va
->invalids
, list
)
1783 trace_amdgpu_vm_bo_mapping(mapping
);
1786 spin_lock(&vm
->status_lock
);
1787 list_splice_init(&bo_va
->invalids
, &bo_va
->valids
);
1788 list_del_init(&bo_va
->base
.vm_status
);
1790 list_add(&bo_va
->base
.vm_status
, &vm
->cleared
);
1791 spin_unlock(&vm
->status_lock
);
1793 if (vm
->use_cpu_for_update
) {
1796 amdgpu_gart_flush_gpu_tlb(adev
, 0);
1803 * amdgpu_vm_update_prt_state - update the global PRT state
1805 static void amdgpu_vm_update_prt_state(struct amdgpu_device
*adev
)
1807 unsigned long flags
;
1810 spin_lock_irqsave(&adev
->vm_manager
.prt_lock
, flags
);
1811 enable
= !!atomic_read(&adev
->vm_manager
.num_prt_users
);
1812 adev
->gart
.gart_funcs
->set_prt(adev
, enable
);
1813 spin_unlock_irqrestore(&adev
->vm_manager
.prt_lock
, flags
);
1817 * amdgpu_vm_prt_get - add a PRT user
1819 static void amdgpu_vm_prt_get(struct amdgpu_device
*adev
)
1821 if (!adev
->gart
.gart_funcs
->set_prt
)
1824 if (atomic_inc_return(&adev
->vm_manager
.num_prt_users
) == 1)
1825 amdgpu_vm_update_prt_state(adev
);
1829 * amdgpu_vm_prt_put - drop a PRT user
1831 static void amdgpu_vm_prt_put(struct amdgpu_device
*adev
)
1833 if (atomic_dec_return(&adev
->vm_manager
.num_prt_users
) == 0)
1834 amdgpu_vm_update_prt_state(adev
);
1838 * amdgpu_vm_prt_cb - callback for updating the PRT status
1840 static void amdgpu_vm_prt_cb(struct dma_fence
*fence
, struct dma_fence_cb
*_cb
)
1842 struct amdgpu_prt_cb
*cb
= container_of(_cb
, struct amdgpu_prt_cb
, cb
);
1844 amdgpu_vm_prt_put(cb
->adev
);
1849 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1851 static void amdgpu_vm_add_prt_cb(struct amdgpu_device
*adev
,
1852 struct dma_fence
*fence
)
1854 struct amdgpu_prt_cb
*cb
;
1856 if (!adev
->gart
.gart_funcs
->set_prt
)
1859 cb
= kmalloc(sizeof(struct amdgpu_prt_cb
), GFP_KERNEL
);
1861 /* Last resort when we are OOM */
1863 dma_fence_wait(fence
, false);
1865 amdgpu_vm_prt_put(adev
);
1868 if (!fence
|| dma_fence_add_callback(fence
, &cb
->cb
,
1870 amdgpu_vm_prt_cb(fence
, &cb
->cb
);
1875 * amdgpu_vm_free_mapping - free a mapping
1877 * @adev: amdgpu_device pointer
1879 * @mapping: mapping to be freed
1880 * @fence: fence of the unmap operation
1882 * Free a mapping and make sure we decrease the PRT usage count if applicable.
1884 static void amdgpu_vm_free_mapping(struct amdgpu_device
*adev
,
1885 struct amdgpu_vm
*vm
,
1886 struct amdgpu_bo_va_mapping
*mapping
,
1887 struct dma_fence
*fence
)
1889 if (mapping
->flags
& AMDGPU_PTE_PRT
)
1890 amdgpu_vm_add_prt_cb(adev
, fence
);
1895 * amdgpu_vm_prt_fini - finish all prt mappings
1897 * @adev: amdgpu_device pointer
1900 * Register a cleanup callback to disable PRT support after VM dies.
1902 static void amdgpu_vm_prt_fini(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
)
1904 struct reservation_object
*resv
= vm
->root
.bo
->tbo
.resv
;
1905 struct dma_fence
*excl
, **shared
;
1906 unsigned i
, shared_count
;
1909 r
= reservation_object_get_fences_rcu(resv
, &excl
,
1910 &shared_count
, &shared
);
1912 /* Not enough memory to grab the fence list, as last resort
1913 * block for all the fences to complete.
1915 reservation_object_wait_timeout_rcu(resv
, true, false,
1916 MAX_SCHEDULE_TIMEOUT
);
1920 /* Add a callback for each fence in the reservation object */
1921 amdgpu_vm_prt_get(adev
);
1922 amdgpu_vm_add_prt_cb(adev
, excl
);
1924 for (i
= 0; i
< shared_count
; ++i
) {
1925 amdgpu_vm_prt_get(adev
);
1926 amdgpu_vm_add_prt_cb(adev
, shared
[i
]);
1933 * amdgpu_vm_clear_freed - clear freed BOs in the PT
1935 * @adev: amdgpu_device pointer
1937 * @fence: optional resulting fence (unchanged if no work needed to be done
1938 * or if an error occurred)
1940 * Make sure all freed BOs are cleared in the PT.
1941 * Returns 0 for success.
1943 * PTs have to be reserved and mutex must be locked!
1945 int amdgpu_vm_clear_freed(struct amdgpu_device
*adev
,
1946 struct amdgpu_vm
*vm
,
1947 struct dma_fence
**fence
)
1949 struct amdgpu_bo_va_mapping
*mapping
;
1950 struct dma_fence
*f
= NULL
;
1952 uint64_t init_pte_value
= 0;
1954 while (!list_empty(&vm
->freed
)) {
1955 mapping
= list_first_entry(&vm
->freed
,
1956 struct amdgpu_bo_va_mapping
, list
);
1957 list_del(&mapping
->list
);
1959 if (vm
->pte_support_ats
)
1960 init_pte_value
= AMDGPU_PTE_SYSTEM
;
1962 r
= amdgpu_vm_bo_update_mapping(adev
, NULL
, 0, NULL
, vm
,
1963 mapping
->start
, mapping
->last
,
1964 init_pte_value
, 0, &f
);
1965 amdgpu_vm_free_mapping(adev
, vm
, mapping
, f
);
1973 dma_fence_put(*fence
);
1984 * amdgpu_vm_clear_moved - clear moved BOs in the PT
1986 * @adev: amdgpu_device pointer
1989 * Make sure all moved BOs are cleared in the PT.
1990 * Returns 0 for success.
1992 * PTs have to be reserved and mutex must be locked!
1994 int amdgpu_vm_clear_moved(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
,
1995 struct amdgpu_sync
*sync
)
1997 struct amdgpu_bo_va
*bo_va
= NULL
;
2000 spin_lock(&vm
->status_lock
);
2001 while (!list_empty(&vm
->moved
)) {
2002 bo_va
= list_first_entry(&vm
->moved
,
2003 struct amdgpu_bo_va
, base
.vm_status
);
2004 spin_unlock(&vm
->status_lock
);
2006 r
= amdgpu_vm_bo_update(adev
, bo_va
, true);
2010 spin_lock(&vm
->status_lock
);
2012 spin_unlock(&vm
->status_lock
);
2015 r
= amdgpu_sync_fence(adev
, sync
, bo_va
->last_pt_update
);
2021 * amdgpu_vm_bo_add - add a bo to a specific vm
2023 * @adev: amdgpu_device pointer
2025 * @bo: amdgpu buffer object
2027 * Add @bo into the requested vm.
2028 * Add @bo to the list of bos associated with the vm
2029 * Returns newly added bo_va or NULL for failure
2031 * Object has to be reserved!
2033 struct amdgpu_bo_va
*amdgpu_vm_bo_add(struct amdgpu_device
*adev
,
2034 struct amdgpu_vm
*vm
,
2035 struct amdgpu_bo
*bo
)
2037 struct amdgpu_bo_va
*bo_va
;
2039 bo_va
= kzalloc(sizeof(struct amdgpu_bo_va
), GFP_KERNEL
);
2040 if (bo_va
== NULL
) {
2043 bo_va
->base
.vm
= vm
;
2044 bo_va
->base
.bo
= bo
;
2045 INIT_LIST_HEAD(&bo_va
->base
.bo_list
);
2046 INIT_LIST_HEAD(&bo_va
->base
.vm_status
);
2048 bo_va
->ref_count
= 1;
2049 INIT_LIST_HEAD(&bo_va
->valids
);
2050 INIT_LIST_HEAD(&bo_va
->invalids
);
2053 list_add_tail(&bo_va
->base
.bo_list
, &bo
->va
);
2059 * amdgpu_vm_bo_map - map bo inside a vm
2061 * @adev: amdgpu_device pointer
2062 * @bo_va: bo_va to store the address
2063 * @saddr: where to map the BO
2064 * @offset: requested offset in the BO
2065 * @flags: attributes of pages (read/write/valid/etc.)
2067 * Add a mapping of the BO at the specefied addr into the VM.
2068 * Returns 0 for success, error for failure.
2070 * Object has to be reserved and unreserved outside!
2072 int amdgpu_vm_bo_map(struct amdgpu_device
*adev
,
2073 struct amdgpu_bo_va
*bo_va
,
2074 uint64_t saddr
, uint64_t offset
,
2075 uint64_t size
, uint64_t flags
)
2077 struct amdgpu_bo_va_mapping
*mapping
, *tmp
;
2078 struct amdgpu_bo
*bo
= bo_va
->base
.bo
;
2079 struct amdgpu_vm
*vm
= bo_va
->base
.vm
;
2082 /* validate the parameters */
2083 if (saddr
& AMDGPU_GPU_PAGE_MASK
|| offset
& AMDGPU_GPU_PAGE_MASK
||
2084 size
== 0 || size
& AMDGPU_GPU_PAGE_MASK
)
2087 /* make sure object fit at this offset */
2088 eaddr
= saddr
+ size
- 1;
2089 if (saddr
>= eaddr
||
2090 (bo
&& offset
+ size
> amdgpu_bo_size(bo
)))
2093 saddr
/= AMDGPU_GPU_PAGE_SIZE
;
2094 eaddr
/= AMDGPU_GPU_PAGE_SIZE
;
2096 tmp
= amdgpu_vm_it_iter_first(&vm
->va
, saddr
, eaddr
);
2098 /* bo and tmp overlap, invalid addr */
2099 dev_err(adev
->dev
, "bo %p va 0x%010Lx-0x%010Lx conflict with "
2100 "0x%010Lx-0x%010Lx\n", bo
, saddr
, eaddr
,
2101 tmp
->start
, tmp
->last
+ 1);
2105 mapping
= kmalloc(sizeof(*mapping
), GFP_KERNEL
);
2109 INIT_LIST_HEAD(&mapping
->list
);
2110 mapping
->start
= saddr
;
2111 mapping
->last
= eaddr
;
2112 mapping
->offset
= offset
;
2113 mapping
->flags
= flags
;
2115 list_add(&mapping
->list
, &bo_va
->invalids
);
2116 amdgpu_vm_it_insert(mapping
, &vm
->va
);
2118 if (flags
& AMDGPU_PTE_PRT
)
2119 amdgpu_vm_prt_get(adev
);
2125 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
2127 * @adev: amdgpu_device pointer
2128 * @bo_va: bo_va to store the address
2129 * @saddr: where to map the BO
2130 * @offset: requested offset in the BO
2131 * @flags: attributes of pages (read/write/valid/etc.)
2133 * Add a mapping of the BO at the specefied addr into the VM. Replace existing
2134 * mappings as we do so.
2135 * Returns 0 for success, error for failure.
2137 * Object has to be reserved and unreserved outside!
2139 int amdgpu_vm_bo_replace_map(struct amdgpu_device
*adev
,
2140 struct amdgpu_bo_va
*bo_va
,
2141 uint64_t saddr
, uint64_t offset
,
2142 uint64_t size
, uint64_t flags
)
2144 struct amdgpu_bo_va_mapping
*mapping
;
2145 struct amdgpu_bo
*bo
= bo_va
->base
.bo
;
2146 struct amdgpu_vm
*vm
= bo_va
->base
.vm
;
2150 /* validate the parameters */
2151 if (saddr
& AMDGPU_GPU_PAGE_MASK
|| offset
& AMDGPU_GPU_PAGE_MASK
||
2152 size
== 0 || size
& AMDGPU_GPU_PAGE_MASK
)
2155 /* make sure object fit at this offset */
2156 eaddr
= saddr
+ size
- 1;
2157 if (saddr
>= eaddr
||
2158 (bo
&& offset
+ size
> amdgpu_bo_size(bo
)))
2161 /* Allocate all the needed memory */
2162 mapping
= kmalloc(sizeof(*mapping
), GFP_KERNEL
);
2166 r
= amdgpu_vm_bo_clear_mappings(adev
, bo_va
->base
.vm
, saddr
, size
);
2172 saddr
/= AMDGPU_GPU_PAGE_SIZE
;
2173 eaddr
/= AMDGPU_GPU_PAGE_SIZE
;
2175 mapping
->start
= saddr
;
2176 mapping
->last
= eaddr
;
2177 mapping
->offset
= offset
;
2178 mapping
->flags
= flags
;
2180 list_add(&mapping
->list
, &bo_va
->invalids
);
2181 amdgpu_vm_it_insert(mapping
, &vm
->va
);
2183 if (flags
& AMDGPU_PTE_PRT
)
2184 amdgpu_vm_prt_get(adev
);
2190 * amdgpu_vm_bo_unmap - remove bo mapping from vm
2192 * @adev: amdgpu_device pointer
2193 * @bo_va: bo_va to remove the address from
2194 * @saddr: where to the BO is mapped
2196 * Remove a mapping of the BO at the specefied addr from the VM.
2197 * Returns 0 for success, error for failure.
2199 * Object has to be reserved and unreserved outside!
2201 int amdgpu_vm_bo_unmap(struct amdgpu_device
*adev
,
2202 struct amdgpu_bo_va
*bo_va
,
2205 struct amdgpu_bo_va_mapping
*mapping
;
2206 struct amdgpu_vm
*vm
= bo_va
->base
.vm
;
2209 saddr
/= AMDGPU_GPU_PAGE_SIZE
;
2211 list_for_each_entry(mapping
, &bo_va
->valids
, list
) {
2212 if (mapping
->start
== saddr
)
2216 if (&mapping
->list
== &bo_va
->valids
) {
2219 list_for_each_entry(mapping
, &bo_va
->invalids
, list
) {
2220 if (mapping
->start
== saddr
)
2224 if (&mapping
->list
== &bo_va
->invalids
)
2228 list_del(&mapping
->list
);
2229 amdgpu_vm_it_remove(mapping
, &vm
->va
);
2230 trace_amdgpu_vm_bo_unmap(bo_va
, mapping
);
2233 list_add(&mapping
->list
, &vm
->freed
);
2235 amdgpu_vm_free_mapping(adev
, vm
, mapping
,
2236 bo_va
->last_pt_update
);
2242 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
2244 * @adev: amdgpu_device pointer
2245 * @vm: VM structure to use
2246 * @saddr: start of the range
2247 * @size: size of the range
2249 * Remove all mappings in a range, split them as appropriate.
2250 * Returns 0 for success, error for failure.
2252 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device
*adev
,
2253 struct amdgpu_vm
*vm
,
2254 uint64_t saddr
, uint64_t size
)
2256 struct amdgpu_bo_va_mapping
*before
, *after
, *tmp
, *next
;
2260 eaddr
= saddr
+ size
- 1;
2261 saddr
/= AMDGPU_GPU_PAGE_SIZE
;
2262 eaddr
/= AMDGPU_GPU_PAGE_SIZE
;
2264 /* Allocate all the needed memory */
2265 before
= kzalloc(sizeof(*before
), GFP_KERNEL
);
2268 INIT_LIST_HEAD(&before
->list
);
2270 after
= kzalloc(sizeof(*after
), GFP_KERNEL
);
2275 INIT_LIST_HEAD(&after
->list
);
2277 /* Now gather all removed mappings */
2278 tmp
= amdgpu_vm_it_iter_first(&vm
->va
, saddr
, eaddr
);
2280 /* Remember mapping split at the start */
2281 if (tmp
->start
< saddr
) {
2282 before
->start
= tmp
->start
;
2283 before
->last
= saddr
- 1;
2284 before
->offset
= tmp
->offset
;
2285 before
->flags
= tmp
->flags
;
2286 list_add(&before
->list
, &tmp
->list
);
2289 /* Remember mapping split at the end */
2290 if (tmp
->last
> eaddr
) {
2291 after
->start
= eaddr
+ 1;
2292 after
->last
= tmp
->last
;
2293 after
->offset
= tmp
->offset
;
2294 after
->offset
+= after
->start
- tmp
->start
;
2295 after
->flags
= tmp
->flags
;
2296 list_add(&after
->list
, &tmp
->list
);
2299 list_del(&tmp
->list
);
2300 list_add(&tmp
->list
, &removed
);
2302 tmp
= amdgpu_vm_it_iter_next(tmp
, saddr
, eaddr
);
2305 /* And free them up */
2306 list_for_each_entry_safe(tmp
, next
, &removed
, list
) {
2307 amdgpu_vm_it_remove(tmp
, &vm
->va
);
2308 list_del(&tmp
->list
);
2310 if (tmp
->start
< saddr
)
2312 if (tmp
->last
> eaddr
)
2315 list_add(&tmp
->list
, &vm
->freed
);
2316 trace_amdgpu_vm_bo_unmap(NULL
, tmp
);
2319 /* Insert partial mapping before the range */
2320 if (!list_empty(&before
->list
)) {
2321 amdgpu_vm_it_insert(before
, &vm
->va
);
2322 if (before
->flags
& AMDGPU_PTE_PRT
)
2323 amdgpu_vm_prt_get(adev
);
2328 /* Insert partial mapping after the range */
2329 if (!list_empty(&after
->list
)) {
2330 amdgpu_vm_it_insert(after
, &vm
->va
);
2331 if (after
->flags
& AMDGPU_PTE_PRT
)
2332 amdgpu_vm_prt_get(adev
);
2341 * amdgpu_vm_bo_rmv - remove a bo to a specific vm
2343 * @adev: amdgpu_device pointer
2344 * @bo_va: requested bo_va
2346 * Remove @bo_va->bo from the requested vm.
2348 * Object have to be reserved!
2350 void amdgpu_vm_bo_rmv(struct amdgpu_device
*adev
,
2351 struct amdgpu_bo_va
*bo_va
)
2353 struct amdgpu_bo_va_mapping
*mapping
, *next
;
2354 struct amdgpu_vm
*vm
= bo_va
->base
.vm
;
2356 list_del(&bo_va
->base
.bo_list
);
2358 spin_lock(&vm
->status_lock
);
2359 list_del(&bo_va
->base
.vm_status
);
2360 spin_unlock(&vm
->status_lock
);
2362 list_for_each_entry_safe(mapping
, next
, &bo_va
->valids
, list
) {
2363 list_del(&mapping
->list
);
2364 amdgpu_vm_it_remove(mapping
, &vm
->va
);
2365 trace_amdgpu_vm_bo_unmap(bo_va
, mapping
);
2366 list_add(&mapping
->list
, &vm
->freed
);
2368 list_for_each_entry_safe(mapping
, next
, &bo_va
->invalids
, list
) {
2369 list_del(&mapping
->list
);
2370 amdgpu_vm_it_remove(mapping
, &vm
->va
);
2371 amdgpu_vm_free_mapping(adev
, vm
, mapping
,
2372 bo_va
->last_pt_update
);
2375 dma_fence_put(bo_va
->last_pt_update
);
2380 * amdgpu_vm_bo_invalidate - mark the bo as invalid
2382 * @adev: amdgpu_device pointer
2384 * @bo: amdgpu buffer object
2386 * Mark @bo as invalid.
2388 void amdgpu_vm_bo_invalidate(struct amdgpu_device
*adev
,
2389 struct amdgpu_bo
*bo
)
2391 struct amdgpu_vm_bo_base
*bo_base
;
2393 list_for_each_entry(bo_base
, &bo
->va
, bo_list
) {
2394 spin_lock(&bo_base
->vm
->status_lock
);
2395 if (list_empty(&bo_base
->vm_status
))
2396 list_add(&bo_base
->vm_status
,
2397 &bo_base
->vm
->moved
);
2398 spin_unlock(&bo_base
->vm
->status_lock
);
2402 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size
)
2404 /* Total bits covered by PD + PTs */
2405 unsigned bits
= ilog2(vm_size
) + 18;
2407 /* Make sure the PD is 4K in size up to 8GB address space.
2408 Above that split equal between PD and PTs */
2412 return ((bits
+ 3) / 2);
2416 * amdgpu_vm_set_fragment_size - adjust fragment size in PTE
2418 * @adev: amdgpu_device pointer
2419 * @fragment_size_default: the default fragment size if it's set auto
2421 void amdgpu_vm_set_fragment_size(struct amdgpu_device
*adev
, uint32_t fragment_size_default
)
2423 if (amdgpu_vm_fragment_size
== -1)
2424 adev
->vm_manager
.fragment_size
= fragment_size_default
;
2426 adev
->vm_manager
.fragment_size
= amdgpu_vm_fragment_size
;
2430 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2432 * @adev: amdgpu_device pointer
2433 * @vm_size: the default vm size if it's set auto
2435 void amdgpu_vm_adjust_size(struct amdgpu_device
*adev
, uint64_t vm_size
, uint32_t fragment_size_default
)
2437 /* adjust vm size firstly */
2438 if (amdgpu_vm_size
== -1)
2439 adev
->vm_manager
.vm_size
= vm_size
;
2441 adev
->vm_manager
.vm_size
= amdgpu_vm_size
;
2443 /* block size depends on vm size */
2444 if (amdgpu_vm_block_size
== -1)
2445 adev
->vm_manager
.block_size
=
2446 amdgpu_vm_get_block_size(adev
->vm_manager
.vm_size
);
2448 adev
->vm_manager
.block_size
= amdgpu_vm_block_size
;
2450 amdgpu_vm_set_fragment_size(adev
, fragment_size_default
);
2452 DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n",
2453 adev
->vm_manager
.vm_size
, adev
->vm_manager
.block_size
,
2454 adev
->vm_manager
.fragment_size
);
2458 * amdgpu_vm_init - initialize a vm instance
2460 * @adev: amdgpu_device pointer
2462 * @vm_context: Indicates if it GFX or Compute context
2466 int amdgpu_vm_init(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
,
2469 const unsigned align
= min(AMDGPU_VM_PTB_ALIGN_SIZE
,
2470 AMDGPU_VM_PTE_COUNT(adev
) * 8);
2471 unsigned ring_instance
;
2472 struct amdgpu_ring
*ring
;
2473 struct amd_sched_rq
*rq
;
2476 uint64_t init_pde_value
= 0;
2479 vm
->client_id
= atomic64_inc_return(&adev
->vm_manager
.client_counter
);
2480 for (i
= 0; i
< AMDGPU_MAX_VMHUBS
; i
++)
2481 vm
->reserved_vmid
[i
] = NULL
;
2482 spin_lock_init(&vm
->status_lock
);
2483 INIT_LIST_HEAD(&vm
->moved
);
2484 INIT_LIST_HEAD(&vm
->cleared
);
2485 INIT_LIST_HEAD(&vm
->freed
);
2487 /* create scheduler entity for page table updates */
2489 ring_instance
= atomic_inc_return(&adev
->vm_manager
.vm_pte_next_ring
);
2490 ring_instance
%= adev
->vm_manager
.vm_pte_num_rings
;
2491 ring
= adev
->vm_manager
.vm_pte_rings
[ring_instance
];
2492 rq
= &ring
->sched
.sched_rq
[AMD_SCHED_PRIORITY_KERNEL
];
2493 r
= amd_sched_entity_init(&ring
->sched
, &vm
->entity
,
2494 rq
, amdgpu_sched_jobs
);
2498 vm
->pte_support_ats
= false;
2500 if (vm_context
== AMDGPU_VM_CONTEXT_COMPUTE
) {
2501 vm
->use_cpu_for_update
= !!(adev
->vm_manager
.vm_update_mode
&
2502 AMDGPU_VM_USE_CPU_FOR_COMPUTE
);
2504 if (adev
->asic_type
== CHIP_RAVEN
) {
2505 vm
->pte_support_ats
= true;
2506 init_pde_value
= AMDGPU_PTE_SYSTEM
| AMDGPU_PDE_PTE
;
2509 vm
->use_cpu_for_update
= !!(adev
->vm_manager
.vm_update_mode
&
2510 AMDGPU_VM_USE_CPU_FOR_GFX
);
2511 DRM_DEBUG_DRIVER("VM update mode is %s\n",
2512 vm
->use_cpu_for_update
? "CPU" : "SDMA");
2513 WARN_ONCE((vm
->use_cpu_for_update
& !amdgpu_vm_is_large_bar(adev
)),
2514 "CPU update of VM recommended only for large BAR system\n");
2515 vm
->last_dir_update
= NULL
;
2517 flags
= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
|
2518 AMDGPU_GEM_CREATE_VRAM_CLEARED
;
2519 if (vm
->use_cpu_for_update
)
2520 flags
|= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
;
2522 flags
|= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS
|
2523 AMDGPU_GEM_CREATE_SHADOW
);
2525 r
= amdgpu_bo_create(adev
, amdgpu_vm_bo_size(adev
, 0), align
, true,
2526 AMDGPU_GEM_DOMAIN_VRAM
,
2528 NULL
, NULL
, init_pde_value
, &vm
->root
.bo
);
2530 goto error_free_sched_entity
;
2532 r
= amdgpu_bo_reserve(vm
->root
.bo
, false);
2534 goto error_free_root
;
2536 vm
->last_eviction_counter
= atomic64_read(&adev
->num_evictions
);
2538 if (vm
->use_cpu_for_update
) {
2539 r
= amdgpu_bo_kmap(vm
->root
.bo
, NULL
);
2541 goto error_free_root
;
2544 amdgpu_bo_unreserve(vm
->root
.bo
);
2549 amdgpu_bo_unref(&vm
->root
.bo
->shadow
);
2550 amdgpu_bo_unref(&vm
->root
.bo
);
2553 error_free_sched_entity
:
2554 amd_sched_entity_fini(&ring
->sched
, &vm
->entity
);
2560 * amdgpu_vm_free_levels - free PD/PT levels
2562 * @level: PD/PT starting level to free
2564 * Free the page directory or page table level and all sub levels.
2566 static void amdgpu_vm_free_levels(struct amdgpu_vm_pt
*level
)
2571 amdgpu_bo_unref(&level
->bo
->shadow
);
2572 amdgpu_bo_unref(&level
->bo
);
2576 for (i
= 0; i
<= level
->last_entry_used
; i
++)
2577 amdgpu_vm_free_levels(&level
->entries
[i
]);
2579 kvfree(level
->entries
);
2583 * amdgpu_vm_fini - tear down a vm instance
2585 * @adev: amdgpu_device pointer
2589 * Unbind the VM and remove all bos from the vm bo list
2591 void amdgpu_vm_fini(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
)
2593 struct amdgpu_bo_va_mapping
*mapping
, *tmp
;
2594 bool prt_fini_needed
= !!adev
->gart
.gart_funcs
->set_prt
;
2597 amd_sched_entity_fini(vm
->entity
.sched
, &vm
->entity
);
2599 if (!RB_EMPTY_ROOT(&vm
->va
)) {
2600 dev_err(adev
->dev
, "still active bo inside vm\n");
2602 rbtree_postorder_for_each_entry_safe(mapping
, tmp
, &vm
->va
, rb
) {
2603 list_del(&mapping
->list
);
2604 amdgpu_vm_it_remove(mapping
, &vm
->va
);
2607 list_for_each_entry_safe(mapping
, tmp
, &vm
->freed
, list
) {
2608 if (mapping
->flags
& AMDGPU_PTE_PRT
&& prt_fini_needed
) {
2609 amdgpu_vm_prt_fini(adev
, vm
);
2610 prt_fini_needed
= false;
2613 list_del(&mapping
->list
);
2614 amdgpu_vm_free_mapping(adev
, vm
, mapping
, NULL
);
2617 amdgpu_vm_free_levels(&vm
->root
);
2618 dma_fence_put(vm
->last_dir_update
);
2619 for (i
= 0; i
< AMDGPU_MAX_VMHUBS
; i
++)
2620 amdgpu_vm_free_reserved_vmid(adev
, vm
, i
);
2624 * amdgpu_vm_manager_init - init the VM manager
2626 * @adev: amdgpu_device pointer
2628 * Initialize the VM manager structures
2630 void amdgpu_vm_manager_init(struct amdgpu_device
*adev
)
2634 for (i
= 0; i
< AMDGPU_MAX_VMHUBS
; ++i
) {
2635 struct amdgpu_vm_id_manager
*id_mgr
=
2636 &adev
->vm_manager
.id_mgr
[i
];
2638 mutex_init(&id_mgr
->lock
);
2639 INIT_LIST_HEAD(&id_mgr
->ids_lru
);
2640 atomic_set(&id_mgr
->reserved_vmid_num
, 0);
2642 /* skip over VMID 0, since it is the system VM */
2643 for (j
= 1; j
< id_mgr
->num_ids
; ++j
) {
2644 amdgpu_vm_reset_id(adev
, i
, j
);
2645 amdgpu_sync_create(&id_mgr
->ids
[i
].active
);
2646 list_add_tail(&id_mgr
->ids
[j
].list
, &id_mgr
->ids_lru
);
2650 adev
->vm_manager
.fence_context
=
2651 dma_fence_context_alloc(AMDGPU_MAX_RINGS
);
2652 for (i
= 0; i
< AMDGPU_MAX_RINGS
; ++i
)
2653 adev
->vm_manager
.seqno
[i
] = 0;
2655 atomic_set(&adev
->vm_manager
.vm_pte_next_ring
, 0);
2656 atomic64_set(&adev
->vm_manager
.client_counter
, 0);
2657 spin_lock_init(&adev
->vm_manager
.prt_lock
);
2658 atomic_set(&adev
->vm_manager
.num_prt_users
, 0);
2660 /* If not overridden by the user, by default, only in large BAR systems
2661 * Compute VM tables will be updated by CPU
2663 #ifdef CONFIG_X86_64
2664 if (amdgpu_vm_update_mode
== -1) {
2665 if (amdgpu_vm_is_large_bar(adev
))
2666 adev
->vm_manager
.vm_update_mode
=
2667 AMDGPU_VM_USE_CPU_FOR_COMPUTE
;
2669 adev
->vm_manager
.vm_update_mode
= 0;
2671 adev
->vm_manager
.vm_update_mode
= amdgpu_vm_update_mode
;
2673 adev
->vm_manager
.vm_update_mode
= 0;
2679 * amdgpu_vm_manager_fini - cleanup VM manager
2681 * @adev: amdgpu_device pointer
2683 * Cleanup the VM manager and free resources.
2685 void amdgpu_vm_manager_fini(struct amdgpu_device
*adev
)
2689 for (i
= 0; i
< AMDGPU_MAX_VMHUBS
; ++i
) {
2690 struct amdgpu_vm_id_manager
*id_mgr
=
2691 &adev
->vm_manager
.id_mgr
[i
];
2693 mutex_destroy(&id_mgr
->lock
);
2694 for (j
= 0; j
< AMDGPU_NUM_VM
; ++j
) {
2695 struct amdgpu_vm_id
*id
= &id_mgr
->ids
[j
];
2697 amdgpu_sync_free(&id
->active
);
2698 dma_fence_put(id
->flushed_updates
);
2699 dma_fence_put(id
->last_flush
);
2704 int amdgpu_vm_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*filp
)
2706 union drm_amdgpu_vm
*args
= data
;
2707 struct amdgpu_device
*adev
= dev
->dev_private
;
2708 struct amdgpu_fpriv
*fpriv
= filp
->driver_priv
;
2711 switch (args
->in
.op
) {
2712 case AMDGPU_VM_OP_RESERVE_VMID
:
2713 /* current, we only have requirement to reserve vmid from gfxhub */
2714 r
= amdgpu_vm_alloc_reserved_vmid(adev
, &fpriv
->vm
,
2719 case AMDGPU_VM_OP_UNRESERVE_VMID
:
2720 amdgpu_vm_free_reserved_vmid(adev
, &fpriv
->vm
, AMDGPU_GFXHUB
);