]>
Commit | Line | Data |
---|---|---|
771fe6b9 JG |
1 | /* |
2 | * Copyright 2008 Advanced Micro Devices, Inc. | |
3 | * Copyright 2008 Red Hat Inc. | |
4 | * Copyright 2009 Jerome Glisse. | |
5 | * | |
6 | * Permission is hereby granted, free of charge, to any person obtaining a | |
7 | * copy of this software and associated documentation files (the "Software"), | |
8 | * to deal in the Software without restriction, including without limitation | |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
10 | * and/or sell copies of the Software, and to permit persons to whom the | |
11 | * Software is furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
22 | * OTHER DEALINGS IN THE SOFTWARE. | |
23 | * | |
24 | * Authors: Dave Airlie | |
25 | * Alex Deucher | |
26 | * Jerome Glisse | |
27 | */ | |
760285e7 DH |
28 | #include <drm/drmP.h> |
29 | #include <drm/radeon_drm.h> | |
771fe6b9 JG |
30 | #include "radeon.h" |
31 | #include "radeon_reg.h" | |
32 | ||
03eec93b AD |
33 | /* |
34 | * GART | |
35 | * The GART (Graphics Aperture Remapping Table) is an aperture | |
36 | * in the GPU's address space. System pages can be mapped into | |
37 | * the aperture and look like contiguous pages from the GPU's | |
38 | * perspective. A page table maps the pages in the aperture | |
39 | * to the actual backing pages in system memory. | |
40 | * | |
41 | * Radeon GPUs support both an internal GART, as described above, | |
42 | * and AGP. AGP works similarly, but the GART table is configured | |
43 | * and maintained by the northbridge rather than the driver. | |
44 | * Radeon hw has a separate AGP aperture that is programmed to | |
45 | * point to the AGP aperture provided by the northbridge and the | |
46 | * requests are passed through to the northbridge aperture. | |
47 | * Both AGP and internal GART can be used at the same time, however | |
48 | * that is not currently supported by the driver. | |
49 | * | |
50 | * This file handles the common internal GART management. | |
51 | */ | |
52 | ||
771fe6b9 JG |
53 | /* |
54 | * Common GART table functions. | |
55 | */ | |
03eec93b AD |
56 | /** |
57 | * radeon_gart_table_ram_alloc - allocate system ram for gart page table | |
58 | * | |
59 | * @rdev: radeon_device pointer | |
60 | * | |
61 | * Allocate system memory for GART page table | |
62 | * (r1xx-r3xx, non-pcie r4xx, rs400). These asics require the | |
63 | * gart table to be in system memory. | |
64 | * Returns 0 for success, -ENOMEM for failure. | |
65 | */ | |
771fe6b9 JG |
66 | int radeon_gart_table_ram_alloc(struct radeon_device *rdev) |
67 | { | |
68 | void *ptr; | |
69 | ||
70 | ptr = pci_alloc_consistent(rdev->pdev, rdev->gart.table_size, | |
71 | &rdev->gart.table_addr); | |
72 | if (ptr == NULL) { | |
73 | return -ENOMEM; | |
74 | } | |
75 | #ifdef CONFIG_X86 | |
76 | if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480 || | |
77 | rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) { | |
78 | set_memory_uc((unsigned long)ptr, | |
79 | rdev->gart.table_size >> PAGE_SHIFT); | |
80 | } | |
81 | #endif | |
c9a1be96 JG |
82 | rdev->gart.ptr = ptr; |
83 | memset((void *)rdev->gart.ptr, 0, rdev->gart.table_size); | |
771fe6b9 JG |
84 | return 0; |
85 | } | |
86 | ||
03eec93b AD |
87 | /** |
88 | * radeon_gart_table_ram_free - free system ram for gart page table | |
89 | * | |
90 | * @rdev: radeon_device pointer | |
91 | * | |
92 | * Free system memory for GART page table | |
93 | * (r1xx-r3xx, non-pcie r4xx, rs400). These asics require the | |
94 | * gart table to be in system memory. | |
95 | */ | |
771fe6b9 JG |
96 | void radeon_gart_table_ram_free(struct radeon_device *rdev) |
97 | { | |
c9a1be96 | 98 | if (rdev->gart.ptr == NULL) { |
771fe6b9 JG |
99 | return; |
100 | } | |
101 | #ifdef CONFIG_X86 | |
102 | if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480 || | |
103 | rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) { | |
c9a1be96 | 104 | set_memory_wb((unsigned long)rdev->gart.ptr, |
771fe6b9 JG |
105 | rdev->gart.table_size >> PAGE_SHIFT); |
106 | } | |
107 | #endif | |
108 | pci_free_consistent(rdev->pdev, rdev->gart.table_size, | |
c9a1be96 | 109 | (void *)rdev->gart.ptr, |
771fe6b9 | 110 | rdev->gart.table_addr); |
c9a1be96 | 111 | rdev->gart.ptr = NULL; |
771fe6b9 JG |
112 | rdev->gart.table_addr = 0; |
113 | } | |
114 | ||
03eec93b AD |
115 | /** |
116 | * radeon_gart_table_vram_alloc - allocate vram for gart page table | |
117 | * | |
118 | * @rdev: radeon_device pointer | |
119 | * | |
120 | * Allocate video memory for GART page table | |
121 | * (pcie r4xx, r5xx+). These asics require the | |
122 | * gart table to be in video memory. | |
123 | * Returns 0 for success, error for failure. | |
124 | */ | |
771fe6b9 JG |
125 | int radeon_gart_table_vram_alloc(struct radeon_device *rdev) |
126 | { | |
771fe6b9 JG |
127 | int r; |
128 | ||
c9a1be96 | 129 | if (rdev->gart.robj == NULL) { |
441921d5 | 130 | r = radeon_bo_create(rdev, rdev->gart.table_size, |
268b2510 | 131 | PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, |
40f5cf99 | 132 | NULL, &rdev->gart.robj); |
771fe6b9 JG |
133 | if (r) { |
134 | return r; | |
135 | } | |
136 | } | |
4aac0473 JG |
137 | return 0; |
138 | } | |
139 | ||
03eec93b AD |
140 | /** |
141 | * radeon_gart_table_vram_pin - pin gart page table in vram | |
142 | * | |
143 | * @rdev: radeon_device pointer | |
144 | * | |
145 | * Pin the GART page table in vram so it will not be moved | |
146 | * by the memory manager (pcie r4xx, r5xx+). These asics require the | |
147 | * gart table to be in video memory. | |
148 | * Returns 0 for success, error for failure. | |
149 | */ | |
4aac0473 JG |
150 | int radeon_gart_table_vram_pin(struct radeon_device *rdev) |
151 | { | |
152 | uint64_t gpu_addr; | |
153 | int r; | |
154 | ||
c9a1be96 | 155 | r = radeon_bo_reserve(rdev->gart.robj, false); |
4c788679 | 156 | if (unlikely(r != 0)) |
771fe6b9 | 157 | return r; |
c9a1be96 | 158 | r = radeon_bo_pin(rdev->gart.robj, |
4c788679 | 159 | RADEON_GEM_DOMAIN_VRAM, &gpu_addr); |
771fe6b9 | 160 | if (r) { |
c9a1be96 | 161 | radeon_bo_unreserve(rdev->gart.robj); |
771fe6b9 JG |
162 | return r; |
163 | } | |
c9a1be96 | 164 | r = radeon_bo_kmap(rdev->gart.robj, &rdev->gart.ptr); |
4c788679 | 165 | if (r) |
c9a1be96 JG |
166 | radeon_bo_unpin(rdev->gart.robj); |
167 | radeon_bo_unreserve(rdev->gart.robj); | |
771fe6b9 | 168 | rdev->gart.table_addr = gpu_addr; |
4c788679 | 169 | return r; |
771fe6b9 JG |
170 | } |
171 | ||
03eec93b AD |
172 | /** |
173 | * radeon_gart_table_vram_unpin - unpin gart page table in vram | |
174 | * | |
175 | * @rdev: radeon_device pointer | |
176 | * | |
177 | * Unpin the GART page table in vram (pcie r4xx, r5xx+). | |
178 | * These asics require the gart table to be in video memory. | |
179 | */ | |
c9a1be96 | 180 | void radeon_gart_table_vram_unpin(struct radeon_device *rdev) |
771fe6b9 | 181 | { |
4c788679 JG |
182 | int r; |
183 | ||
c9a1be96 | 184 | if (rdev->gart.robj == NULL) { |
771fe6b9 JG |
185 | return; |
186 | } | |
c9a1be96 | 187 | r = radeon_bo_reserve(rdev->gart.robj, false); |
4c788679 | 188 | if (likely(r == 0)) { |
c9a1be96 JG |
189 | radeon_bo_kunmap(rdev->gart.robj); |
190 | radeon_bo_unpin(rdev->gart.robj); | |
191 | radeon_bo_unreserve(rdev->gart.robj); | |
192 | rdev->gart.ptr = NULL; | |
193 | } | |
194 | } | |
195 | ||
03eec93b AD |
196 | /** |
197 | * radeon_gart_table_vram_free - free gart page table vram | |
198 | * | |
199 | * @rdev: radeon_device pointer | |
200 | * | |
201 | * Free the video memory used for the GART page table | |
202 | * (pcie r4xx, r5xx+). These asics require the gart table to | |
203 | * be in video memory. | |
204 | */ | |
c9a1be96 JG |
205 | void radeon_gart_table_vram_free(struct radeon_device *rdev) |
206 | { | |
207 | if (rdev->gart.robj == NULL) { | |
208 | return; | |
4c788679 | 209 | } |
c9a1be96 JG |
210 | radeon_gart_table_vram_unpin(rdev); |
211 | radeon_bo_unref(&rdev->gart.robj); | |
771fe6b9 JG |
212 | } |
213 | ||
771fe6b9 JG |
214 | /* |
215 | * Common gart functions. | |
216 | */ | |
03eec93b AD |
217 | /** |
218 | * radeon_gart_unbind - unbind pages from the gart page table | |
219 | * | |
220 | * @rdev: radeon_device pointer | |
221 | * @offset: offset into the GPU's gart aperture | |
222 | * @pages: number of pages to unbind | |
223 | * | |
224 | * Unbinds the requested pages from the gart page table and | |
225 | * replaces them with the dummy page (all asics). | |
226 | */ | |
771fe6b9 JG |
227 | void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, |
228 | int pages) | |
229 | { | |
230 | unsigned t; | |
231 | unsigned p; | |
232 | int i, j; | |
82568565 | 233 | u64 page_base; |
771fe6b9 JG |
234 | |
235 | if (!rdev->gart.ready) { | |
fcf4de5a | 236 | WARN(1, "trying to unbind memory from uninitialized GART !\n"); |
771fe6b9 JG |
237 | return; |
238 | } | |
a77f1718 MT |
239 | t = offset / RADEON_GPU_PAGE_SIZE; |
240 | p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); | |
771fe6b9 JG |
241 | for (i = 0; i < pages; i++, p++) { |
242 | if (rdev->gart.pages[p]) { | |
771fe6b9 | 243 | rdev->gart.pages[p] = NULL; |
82568565 DA |
244 | rdev->gart.pages_addr[p] = rdev->dummy_page.addr; |
245 | page_base = rdev->gart.pages_addr[p]; | |
a77f1718 | 246 | for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { |
c9a1be96 JG |
247 | if (rdev->gart.ptr) { |
248 | radeon_gart_set_page(rdev, t, page_base); | |
249 | } | |
82568565 | 250 | page_base += RADEON_GPU_PAGE_SIZE; |
771fe6b9 JG |
251 | } |
252 | } | |
253 | } | |
254 | mb(); | |
255 | radeon_gart_tlb_flush(rdev); | |
256 | } | |
257 | ||
03eec93b AD |
258 | /** |
259 | * radeon_gart_bind - bind pages into the gart page table | |
260 | * | |
261 | * @rdev: radeon_device pointer | |
262 | * @offset: offset into the GPU's gart aperture | |
263 | * @pages: number of pages to bind | |
264 | * @pagelist: pages to bind | |
265 | * @dma_addr: DMA addresses of pages | |
266 | * | |
267 | * Binds the requested pages to the gart page table | |
268 | * (all asics). | |
269 | * Returns 0 for success, -EINVAL for failure. | |
270 | */ | |
771fe6b9 | 271 | int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, |
c39d3516 | 272 | int pages, struct page **pagelist, dma_addr_t *dma_addr) |
771fe6b9 JG |
273 | { |
274 | unsigned t; | |
275 | unsigned p; | |
276 | uint64_t page_base; | |
277 | int i, j; | |
278 | ||
279 | if (!rdev->gart.ready) { | |
fcf4de5a | 280 | WARN(1, "trying to bind memory to uninitialized GART !\n"); |
771fe6b9 JG |
281 | return -EINVAL; |
282 | } | |
a77f1718 MT |
283 | t = offset / RADEON_GPU_PAGE_SIZE; |
284 | p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); | |
771fe6b9 JG |
285 | |
286 | for (i = 0; i < pages; i++, p++) { | |
c52494f6 | 287 | rdev->gart.pages_addr[p] = dma_addr[i]; |
771fe6b9 | 288 | rdev->gart.pages[p] = pagelist[i]; |
c9a1be96 JG |
289 | if (rdev->gart.ptr) { |
290 | page_base = rdev->gart.pages_addr[p]; | |
291 | for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { | |
292 | radeon_gart_set_page(rdev, t, page_base); | |
293 | page_base += RADEON_GPU_PAGE_SIZE; | |
294 | } | |
771fe6b9 JG |
295 | } |
296 | } | |
297 | mb(); | |
298 | radeon_gart_tlb_flush(rdev); | |
299 | return 0; | |
300 | } | |
301 | ||
03eec93b AD |
302 | /** |
303 | * radeon_gart_restore - bind all pages in the gart page table | |
304 | * | |
305 | * @rdev: radeon_device pointer | |
306 | * | |
307 | * Binds all pages in the gart page table (all asics). | |
308 | * Used to rebuild the gart table on device startup or resume. | |
309 | */ | |
82568565 DA |
310 | void radeon_gart_restore(struct radeon_device *rdev) |
311 | { | |
312 | int i, j, t; | |
313 | u64 page_base; | |
314 | ||
c9a1be96 JG |
315 | if (!rdev->gart.ptr) { |
316 | return; | |
317 | } | |
82568565 DA |
318 | for (i = 0, t = 0; i < rdev->gart.num_cpu_pages; i++) { |
319 | page_base = rdev->gart.pages_addr[i]; | |
320 | for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { | |
321 | radeon_gart_set_page(rdev, t, page_base); | |
322 | page_base += RADEON_GPU_PAGE_SIZE; | |
323 | } | |
324 | } | |
325 | mb(); | |
326 | radeon_gart_tlb_flush(rdev); | |
327 | } | |
328 | ||
03eec93b AD |
329 | /** |
330 | * radeon_gart_init - init the driver info for managing the gart | |
331 | * | |
332 | * @rdev: radeon_device pointer | |
333 | * | |
334 | * Allocate the dummy page and init the gart driver info (all asics). | |
335 | * Returns 0 for success, error for failure. | |
336 | */ | |
771fe6b9 JG |
337 | int radeon_gart_init(struct radeon_device *rdev) |
338 | { | |
82568565 DA |
339 | int r, i; |
340 | ||
771fe6b9 JG |
341 | if (rdev->gart.pages) { |
342 | return 0; | |
343 | } | |
a77f1718 MT |
344 | /* We need PAGE_SIZE >= RADEON_GPU_PAGE_SIZE */ |
345 | if (PAGE_SIZE < RADEON_GPU_PAGE_SIZE) { | |
771fe6b9 JG |
346 | DRM_ERROR("Page size is smaller than GPU page size!\n"); |
347 | return -EINVAL; | |
348 | } | |
82568565 DA |
349 | r = radeon_dummy_page_init(rdev); |
350 | if (r) | |
351 | return r; | |
771fe6b9 JG |
352 | /* Compute table size */ |
353 | rdev->gart.num_cpu_pages = rdev->mc.gtt_size / PAGE_SIZE; | |
a77f1718 | 354 | rdev->gart.num_gpu_pages = rdev->mc.gtt_size / RADEON_GPU_PAGE_SIZE; |
771fe6b9 JG |
355 | DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", |
356 | rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages); | |
357 | /* Allocate pages table */ | |
358 | rdev->gart.pages = kzalloc(sizeof(void *) * rdev->gart.num_cpu_pages, | |
359 | GFP_KERNEL); | |
360 | if (rdev->gart.pages == NULL) { | |
361 | radeon_gart_fini(rdev); | |
362 | return -ENOMEM; | |
363 | } | |
364 | rdev->gart.pages_addr = kzalloc(sizeof(dma_addr_t) * | |
365 | rdev->gart.num_cpu_pages, GFP_KERNEL); | |
366 | if (rdev->gart.pages_addr == NULL) { | |
367 | radeon_gart_fini(rdev); | |
368 | return -ENOMEM; | |
369 | } | |
82568565 DA |
370 | /* set GART entry to point to the dummy page by default */ |
371 | for (i = 0; i < rdev->gart.num_cpu_pages; i++) { | |
372 | rdev->gart.pages_addr[i] = rdev->dummy_page.addr; | |
373 | } | |
771fe6b9 JG |
374 | return 0; |
375 | } | |
376 | ||
03eec93b AD |
377 | /** |
378 | * radeon_gart_fini - tear down the driver info for managing the gart | |
379 | * | |
380 | * @rdev: radeon_device pointer | |
381 | * | |
382 | * Tear down the gart driver info and free the dummy page (all asics). | |
383 | */ | |
771fe6b9 JG |
384 | void radeon_gart_fini(struct radeon_device *rdev) |
385 | { | |
386 | if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) { | |
387 | /* unbind pages */ | |
388 | radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages); | |
389 | } | |
390 | rdev->gart.ready = false; | |
391 | kfree(rdev->gart.pages); | |
392 | kfree(rdev->gart.pages_addr); | |
393 | rdev->gart.pages = NULL; | |
394 | rdev->gart.pages_addr = NULL; | |
92656d70 AD |
395 | |
396 | radeon_dummy_page_fini(rdev); | |
771fe6b9 | 397 | } |
721604a1 | 398 | |
09db8644 AD |
399 | /* |
400 | * GPUVM | |
401 | * GPUVM is similar to the legacy gart on older asics, however | |
402 | * rather than there being a single global gart table | |
403 | * for the entire GPU, there are multiple VM page tables active | |
404 | * at any given time. The VM page tables can contain a mix | |
405 | * vram pages and system memory pages and system memory pages | |
406 | * can be mapped as snooped (cached system pages) or unsnooped | |
407 | * (uncached system pages). | |
408 | * Each VM has an ID associated with it and there is a page table | |
409 | * associated with each VMID. When execting a command buffer, | |
410 | * the kernel tells the the ring what VMID to use for that command | |
411 | * buffer. VMIDs are allocated dynamically as commands are submitted. | |
412 | * The userspace drivers maintain their own address space and the kernel | |
413 | * sets up their pages tables accordingly when they submit their | |
414 | * command buffers and a VMID is assigned. | |
415 | * Cayman/Trinity support up to 8 active VMs at any given time; | |
416 | * SI supports 16. | |
417 | */ | |
418 | ||
721604a1 JG |
419 | /* |
420 | * vm helpers | |
421 | * | |
422 | * TODO bind a default page at vm initialization for default address | |
423 | */ | |
c6105f24 | 424 | |
09db8644 AD |
425 | /** |
426 | * radeon_vm_manager_init - init the vm manager | |
427 | * | |
428 | * @rdev: radeon_device pointer | |
429 | * | |
430 | * Init the vm manager (cayman+). | |
431 | * Returns 0 for success, error for failure. | |
432 | */ | |
721604a1 JG |
433 | int radeon_vm_manager_init(struct radeon_device *rdev) |
434 | { | |
c6105f24 CK |
435 | struct radeon_vm *vm; |
436 | struct radeon_bo_va *bo_va; | |
721604a1 JG |
437 | int r; |
438 | ||
c6105f24 CK |
439 | if (!rdev->vm_manager.enabled) { |
440 | /* mark first vm as always in use, it's the system one */ | |
e6b0b6a8 | 441 | /* allocate enough for 2 full VM pts */ |
c6105f24 | 442 | r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, |
e6b0b6a8 | 443 | rdev->vm_manager.max_pfn * 8 * 2, |
c6105f24 CK |
444 | RADEON_GEM_DOMAIN_VRAM); |
445 | if (r) { | |
446 | dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", | |
447 | (rdev->vm_manager.max_pfn * 8) >> 10); | |
448 | return r; | |
449 | } | |
67e915e4 | 450 | |
c6105f24 CK |
451 | r = rdev->vm_manager.funcs->init(rdev); |
452 | if (r) | |
453 | return r; | |
454 | ||
455 | rdev->vm_manager.enabled = true; | |
456 | ||
457 | r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); | |
458 | if (r) | |
459 | return r; | |
721604a1 | 460 | } |
67e915e4 | 461 | |
c6105f24 CK |
462 | /* restore page table */ |
463 | list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { | |
464 | if (vm->id == -1) | |
465 | continue; | |
67e915e4 | 466 | |
c6105f24 CK |
467 | list_for_each_entry(bo_va, &vm->va, vm_list) { |
468 | struct ttm_mem_reg *mem = NULL; | |
469 | if (bo_va->valid) | |
470 | mem = &bo_va->bo->tbo.mem; | |
471 | ||
472 | bo_va->valid = false; | |
473 | r = radeon_vm_bo_update_pte(rdev, vm, bo_va->bo, mem); | |
474 | if (r) { | |
475 | DRM_ERROR("Failed to update pte for vm %d!\n", vm->id); | |
476 | } | |
477 | } | |
478 | ||
479 | r = rdev->vm_manager.funcs->bind(rdev, vm, vm->id); | |
480 | if (r) { | |
481 | DRM_ERROR("Failed to bind vm %d!\n", vm->id); | |
482 | } | |
483 | } | |
484 | return 0; | |
721604a1 JG |
485 | } |
486 | ||
36ff39c4 | 487 | /* global mutex must be lock */ |
09db8644 AD |
488 | /** |
489 | * radeon_vm_unbind_locked - unbind a specific vm | |
490 | * | |
491 | * @rdev: radeon_device pointer | |
492 | * @vm: vm to unbind | |
493 | * | |
494 | * Unbind the requested vm (cayman+). | |
495 | * Wait for use of the VM to finish, then unbind the page table, | |
496 | * and free the page table memory. | |
497 | */ | |
721604a1 JG |
498 | static void radeon_vm_unbind_locked(struct radeon_device *rdev, |
499 | struct radeon_vm *vm) | |
500 | { | |
501 | struct radeon_bo_va *bo_va; | |
502 | ||
503 | if (vm->id == -1) { | |
504 | return; | |
505 | } | |
506 | ||
507 | /* wait for vm use to end */ | |
35e56bd0 CK |
508 | while (vm->fence) { |
509 | int r; | |
510 | r = radeon_fence_wait(vm->fence, false); | |
511 | if (r) | |
512 | DRM_ERROR("error while waiting for fence: %d\n", r); | |
513 | if (r == -EDEADLK) { | |
514 | mutex_unlock(&rdev->vm_manager.lock); | |
515 | r = radeon_gpu_reset(rdev); | |
516 | mutex_lock(&rdev->vm_manager.lock); | |
517 | if (!r) | |
518 | continue; | |
519 | } | |
520 | break; | |
721604a1 | 521 | } |
35e56bd0 | 522 | radeon_fence_unref(&vm->fence); |
721604a1 JG |
523 | |
524 | /* hw unbind */ | |
525 | rdev->vm_manager.funcs->unbind(rdev, vm); | |
526 | rdev->vm_manager.use_bitmap &= ~(1 << vm->id); | |
527 | list_del_init(&vm->list); | |
528 | vm->id = -1; | |
557017a0 | 529 | radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); |
721604a1 JG |
530 | vm->pt = NULL; |
531 | ||
532 | list_for_each_entry(bo_va, &vm->va, vm_list) { | |
533 | bo_va->valid = false; | |
534 | } | |
535 | } | |
536 | ||
09db8644 AD |
537 | /** |
538 | * radeon_vm_manager_fini - tear down the vm manager | |
539 | * | |
540 | * @rdev: radeon_device pointer | |
541 | * | |
542 | * Tear down the VM manager (cayman+). | |
543 | */ | |
721604a1 | 544 | void radeon_vm_manager_fini(struct radeon_device *rdev) |
721604a1 JG |
545 | { |
546 | struct radeon_vm *vm, *tmp; | |
547 | ||
c6105f24 CK |
548 | if (!rdev->vm_manager.enabled) |
549 | return; | |
550 | ||
36ff39c4 | 551 | mutex_lock(&rdev->vm_manager.lock); |
721604a1 JG |
552 | /* unbind all active vm */ |
553 | list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { | |
554 | radeon_vm_unbind_locked(rdev, vm); | |
555 | } | |
556 | rdev->vm_manager.funcs->fini(rdev); | |
36ff39c4 | 557 | mutex_unlock(&rdev->vm_manager.lock); |
c6105f24 CK |
558 | |
559 | radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); | |
560 | radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); | |
561 | rdev->vm_manager.enabled = false; | |
721604a1 JG |
562 | } |
563 | ||
36ff39c4 | 564 | /* global mutex must be locked */ |
09db8644 AD |
565 | /** |
566 | * radeon_vm_unbind - locked version of unbind | |
567 | * | |
568 | * @rdev: radeon_device pointer | |
569 | * @vm: vm to unbind | |
570 | * | |
571 | * Locked version that wraps radeon_vm_unbind_locked (cayman+). | |
572 | */ | |
721604a1 JG |
573 | void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) |
574 | { | |
575 | mutex_lock(&vm->mutex); | |
576 | radeon_vm_unbind_locked(rdev, vm); | |
577 | mutex_unlock(&vm->mutex); | |
578 | } | |
579 | ||
36ff39c4 | 580 | /* global and local mutex must be locked */ |
09db8644 AD |
581 | /** |
582 | * radeon_vm_bind - bind a page table to a VMID | |
583 | * | |
584 | * @rdev: radeon_device pointer | |
585 | * @vm: vm to bind | |
586 | * | |
587 | * Bind the requested vm (cayman+). | |
588 | * Suballocate memory for the page table, allocate a VMID | |
589 | * and bind the page table to it, and finally start to populate | |
590 | * the page table. | |
591 | * Returns 0 for success, error for failure. | |
592 | */ | |
721604a1 JG |
593 | int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm) |
594 | { | |
595 | struct radeon_vm *vm_evict; | |
596 | unsigned i; | |
597 | int id = -1, r; | |
598 | ||
599 | if (vm == NULL) { | |
600 | return -EINVAL; | |
601 | } | |
602 | ||
603 | if (vm->id != -1) { | |
604 | /* update lru */ | |
605 | list_del_init(&vm->list); | |
606 | list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); | |
607 | return 0; | |
608 | } | |
609 | ||
610 | retry: | |
611 | r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, | |
612 | RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8), | |
557017a0 | 613 | RADEON_GPU_PAGE_SIZE, false); |
721604a1 JG |
614 | if (r) { |
615 | if (list_empty(&rdev->vm_manager.lru_vm)) { | |
616 | return r; | |
617 | } | |
618 | vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); | |
619 | radeon_vm_unbind(rdev, vm_evict); | |
620 | goto retry; | |
621 | } | |
2e0d9910 CK |
622 | vm->pt = radeon_sa_bo_cpu_addr(vm->sa_bo); |
623 | vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); | |
721604a1 JG |
624 | memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8)); |
625 | ||
626 | retry_id: | |
627 | /* search for free vm */ | |
628 | for (i = 0; i < rdev->vm_manager.nvm; i++) { | |
629 | if (!(rdev->vm_manager.use_bitmap & (1 << i))) { | |
630 | id = i; | |
631 | break; | |
632 | } | |
633 | } | |
634 | /* evict vm if necessary */ | |
635 | if (id == -1) { | |
636 | vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); | |
637 | radeon_vm_unbind(rdev, vm_evict); | |
638 | goto retry_id; | |
639 | } | |
640 | ||
641 | /* do hw bind */ | |
642 | r = rdev->vm_manager.funcs->bind(rdev, vm, id); | |
643 | if (r) { | |
557017a0 | 644 | radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); |
721604a1 JG |
645 | return r; |
646 | } | |
647 | rdev->vm_manager.use_bitmap |= 1 << id; | |
648 | vm->id = id; | |
649 | list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); | |
c507f7ef JG |
650 | return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, |
651 | &rdev->ring_tmp_bo.bo->tbo.mem); | |
721604a1 JG |
652 | } |
653 | ||
654 | /* object have to be reserved */ | |
09db8644 AD |
655 | /** |
656 | * radeon_vm_bo_add - add a bo to a specific vm | |
657 | * | |
658 | * @rdev: radeon_device pointer | |
659 | * @vm: requested vm | |
660 | * @bo: radeon buffer object | |
661 | * @offset: requested offset of the buffer in the VM address space | |
662 | * @flags: attributes of pages (read/write/valid/etc.) | |
663 | * | |
664 | * Add @bo into the requested vm (cayman+). | |
665 | * Add @bo to the list of bos associated with the vm and validate | |
666 | * the offset requested within the vm address space. | |
667 | * Returns 0 for success, error for failure. | |
668 | */ | |
721604a1 JG |
669 | int radeon_vm_bo_add(struct radeon_device *rdev, |
670 | struct radeon_vm *vm, | |
671 | struct radeon_bo *bo, | |
672 | uint64_t offset, | |
673 | uint32_t flags) | |
674 | { | |
675 | struct radeon_bo_va *bo_va, *tmp; | |
676 | struct list_head *head; | |
677 | uint64_t size = radeon_bo_size(bo), last_offset = 0; | |
678 | unsigned last_pfn; | |
679 | ||
680 | bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); | |
681 | if (bo_va == NULL) { | |
682 | return -ENOMEM; | |
683 | } | |
684 | bo_va->vm = vm; | |
685 | bo_va->bo = bo; | |
686 | bo_va->soffset = offset; | |
687 | bo_va->eoffset = offset + size; | |
688 | bo_va->flags = flags; | |
689 | bo_va->valid = false; | |
690 | INIT_LIST_HEAD(&bo_va->bo_list); | |
691 | INIT_LIST_HEAD(&bo_va->vm_list); | |
692 | /* make sure object fit at this offset */ | |
693 | if (bo_va->soffset >= bo_va->eoffset) { | |
694 | kfree(bo_va); | |
695 | return -EINVAL; | |
696 | } | |
697 | ||
698 | last_pfn = bo_va->eoffset / RADEON_GPU_PAGE_SIZE; | |
699 | if (last_pfn > rdev->vm_manager.max_pfn) { | |
700 | kfree(bo_va); | |
701 | dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", | |
702 | last_pfn, rdev->vm_manager.max_pfn); | |
703 | return -EINVAL; | |
704 | } | |
705 | ||
706 | mutex_lock(&vm->mutex); | |
707 | if (last_pfn > vm->last_pfn) { | |
bb409155 CK |
708 | /* release mutex and lock in right order */ |
709 | mutex_unlock(&vm->mutex); | |
36ff39c4 | 710 | mutex_lock(&rdev->vm_manager.lock); |
bb409155 CK |
711 | mutex_lock(&vm->mutex); |
712 | /* and check again */ | |
713 | if (last_pfn > vm->last_pfn) { | |
714 | /* grow va space 32M by 32M */ | |
715 | unsigned align = ((32 << 20) >> 12) - 1; | |
716 | radeon_vm_unbind_locked(rdev, vm); | |
717 | vm->last_pfn = (last_pfn + align) & ~align; | |
718 | } | |
36ff39c4 | 719 | mutex_unlock(&rdev->vm_manager.lock); |
721604a1 JG |
720 | } |
721 | head = &vm->va; | |
722 | last_offset = 0; | |
723 | list_for_each_entry(tmp, &vm->va, vm_list) { | |
724 | if (bo_va->soffset >= last_offset && bo_va->eoffset < tmp->soffset) { | |
725 | /* bo can be added before this one */ | |
726 | break; | |
727 | } | |
728 | if (bo_va->soffset >= tmp->soffset && bo_va->soffset < tmp->eoffset) { | |
729 | /* bo and tmp overlap, invalid offset */ | |
721604a1 JG |
730 | dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", |
731 | bo, (unsigned)bo_va->soffset, tmp->bo, | |
732 | (unsigned)tmp->soffset, (unsigned)tmp->eoffset); | |
55ba70c4 | 733 | kfree(bo_va); |
721604a1 JG |
734 | mutex_unlock(&vm->mutex); |
735 | return -EINVAL; | |
736 | } | |
737 | last_offset = tmp->eoffset; | |
738 | head = &tmp->vm_list; | |
739 | } | |
740 | list_add(&bo_va->vm_list, head); | |
741 | list_add_tail(&bo_va->bo_list, &bo->va); | |
742 | mutex_unlock(&vm->mutex); | |
743 | return 0; | |
744 | } | |
745 | ||
09db8644 AD |
746 | /** |
747 | * radeon_vm_get_addr - get the physical address of the page | |
748 | * | |
749 | * @rdev: radeon_device pointer | |
750 | * @mem: ttm mem | |
751 | * @pfn: pfn | |
752 | * | |
753 | * Look up the physical address of the page that the pte resolves | |
754 | * to (cayman+). | |
755 | * Returns the physical address of the page. | |
756 | */ | |
721604a1 JG |
757 | static u64 radeon_vm_get_addr(struct radeon_device *rdev, |
758 | struct ttm_mem_reg *mem, | |
759 | unsigned pfn) | |
760 | { | |
761 | u64 addr = 0; | |
762 | ||
763 | switch (mem->mem_type) { | |
764 | case TTM_PL_VRAM: | |
765 | addr = (mem->start << PAGE_SHIFT); | |
766 | addr += pfn * RADEON_GPU_PAGE_SIZE; | |
767 | addr += rdev->vm_manager.vram_base_offset; | |
768 | break; | |
769 | case TTM_PL_TT: | |
770 | /* offset inside page table */ | |
771 | addr = mem->start << PAGE_SHIFT; | |
772 | addr += pfn * RADEON_GPU_PAGE_SIZE; | |
773 | addr = addr >> PAGE_SHIFT; | |
774 | /* page table offset */ | |
775 | addr = rdev->gart.pages_addr[addr]; | |
776 | /* in case cpu page size != gpu page size*/ | |
777 | addr += (pfn * RADEON_GPU_PAGE_SIZE) & (~PAGE_MASK); | |
778 | break; | |
779 | default: | |
780 | break; | |
781 | } | |
782 | return addr; | |
783 | } | |
784 | ||
36ff39c4 | 785 | /* object have to be reserved & global and local mutex must be locked */ |
09db8644 AD |
786 | /** |
787 | * radeon_vm_bo_update_pte - map a bo into the vm page table | |
788 | * | |
789 | * @rdev: radeon_device pointer | |
790 | * @vm: requested vm | |
791 | * @bo: radeon buffer object | |
792 | * @mem: ttm mem | |
793 | * | |
794 | * Fill in the page table entries for @bo (cayman+). | |
795 | * Returns 0 for success, -EINVAL for failure. | |
796 | */ | |
721604a1 JG |
797 | int radeon_vm_bo_update_pte(struct radeon_device *rdev, |
798 | struct radeon_vm *vm, | |
799 | struct radeon_bo *bo, | |
800 | struct ttm_mem_reg *mem) | |
801 | { | |
802 | struct radeon_bo_va *bo_va; | |
803 | unsigned ngpu_pages, i; | |
804 | uint64_t addr = 0, pfn; | |
805 | uint32_t flags; | |
806 | ||
807 | /* nothing to do if vm isn't bound */ | |
808 | if (vm->id == -1) | |
04bd27ae | 809 | return 0; |
721604a1 JG |
810 | |
811 | bo_va = radeon_bo_va(bo, vm); | |
812 | if (bo_va == NULL) { | |
813 | dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); | |
814 | return -EINVAL; | |
815 | } | |
816 | ||
e43b5ec0 | 817 | if (bo_va->valid && mem) |
721604a1 JG |
818 | return 0; |
819 | ||
820 | ngpu_pages = radeon_bo_ngpu_pages(bo); | |
821 | bo_va->flags &= ~RADEON_VM_PAGE_VALID; | |
822 | bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; | |
823 | if (mem) { | |
824 | if (mem->mem_type != TTM_PL_SYSTEM) { | |
825 | bo_va->flags |= RADEON_VM_PAGE_VALID; | |
826 | bo_va->valid = true; | |
827 | } | |
828 | if (mem->mem_type == TTM_PL_TT) { | |
829 | bo_va->flags |= RADEON_VM_PAGE_SYSTEM; | |
830 | } | |
831 | } | |
832 | pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE; | |
833 | flags = rdev->vm_manager.funcs->page_flags(rdev, bo_va->vm, bo_va->flags); | |
834 | for (i = 0, addr = 0; i < ngpu_pages; i++) { | |
835 | if (mem && bo_va->valid) { | |
836 | addr = radeon_vm_get_addr(rdev, mem, i); | |
837 | } | |
838 | rdev->vm_manager.funcs->set_page(rdev, bo_va->vm, i + pfn, addr, flags); | |
839 | } | |
840 | rdev->vm_manager.funcs->tlb_flush(rdev, bo_va->vm); | |
841 | return 0; | |
842 | } | |
843 | ||
844 | /* object have to be reserved */ | |
09db8644 AD |
845 | /** |
846 | * radeon_vm_bo_rmv - remove a bo to a specific vm | |
847 | * | |
848 | * @rdev: radeon_device pointer | |
849 | * @vm: requested vm | |
850 | * @bo: radeon buffer object | |
851 | * | |
852 | * Remove @bo from the requested vm (cayman+). | |
853 | * Remove @bo from the list of bos associated with the vm and | |
854 | * remove the ptes for @bo in the page table. | |
855 | * Returns 0 for success. | |
856 | */ | |
721604a1 JG |
857 | int radeon_vm_bo_rmv(struct radeon_device *rdev, |
858 | struct radeon_vm *vm, | |
859 | struct radeon_bo *bo) | |
860 | { | |
861 | struct radeon_bo_va *bo_va; | |
e43b5ec0 | 862 | int r; |
721604a1 JG |
863 | |
864 | bo_va = radeon_bo_va(bo, vm); | |
865 | if (bo_va == NULL) | |
866 | return 0; | |
867 | ||
e43b5ec0 JG |
868 | /* wait for va use to end */ |
869 | while (bo_va->fence) { | |
870 | r = radeon_fence_wait(bo_va->fence, false); | |
871 | if (r) { | |
872 | DRM_ERROR("error while waiting for fence: %d\n", r); | |
873 | } | |
874 | if (r == -EDEADLK) { | |
875 | r = radeon_gpu_reset(rdev); | |
876 | if (!r) | |
877 | continue; | |
878 | } | |
879 | break; | |
880 | } | |
881 | radeon_fence_unref(&bo_va->fence); | |
882 | ||
36ff39c4 | 883 | mutex_lock(&rdev->vm_manager.lock); |
bb409155 | 884 | mutex_lock(&vm->mutex); |
721604a1 | 885 | radeon_vm_bo_update_pte(rdev, vm, bo, NULL); |
36ff39c4 | 886 | mutex_unlock(&rdev->vm_manager.lock); |
721604a1 | 887 | list_del(&bo_va->vm_list); |
a7eef882 | 888 | mutex_unlock(&vm->mutex); |
108b0d34 | 889 | list_del(&bo_va->bo_list); |
721604a1 JG |
890 | |
891 | kfree(bo_va); | |
892 | return 0; | |
893 | } | |
894 | ||
09db8644 AD |
895 | /** |
896 | * radeon_vm_bo_invalidate - mark the bo as invalid | |
897 | * | |
898 | * @rdev: radeon_device pointer | |
899 | * @vm: requested vm | |
900 | * @bo: radeon buffer object | |
901 | * | |
902 | * Mark @bo as invalid (cayman+). | |
903 | */ | |
721604a1 JG |
904 | void radeon_vm_bo_invalidate(struct radeon_device *rdev, |
905 | struct radeon_bo *bo) | |
906 | { | |
907 | struct radeon_bo_va *bo_va; | |
908 | ||
909 | BUG_ON(!atomic_read(&bo->tbo.reserved)); | |
910 | list_for_each_entry(bo_va, &bo->va, bo_list) { | |
911 | bo_va->valid = false; | |
912 | } | |
913 | } | |
914 | ||
09db8644 AD |
915 | /** |
916 | * radeon_vm_init - initialize a vm instance | |
917 | * | |
918 | * @rdev: radeon_device pointer | |
919 | * @vm: requested vm | |
920 | * | |
921 | * Init @vm (cayman+). | |
922 | * Map the IB pool and any other shared objects into the VM | |
923 | * by default as it's used by all VMs. | |
924 | * Returns 0 for success, error for failure. | |
925 | */ | |
721604a1 JG |
926 | int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) |
927 | { | |
928 | int r; | |
929 | ||
930 | vm->id = -1; | |
931 | vm->fence = NULL; | |
932 | mutex_init(&vm->mutex); | |
933 | INIT_LIST_HEAD(&vm->list); | |
934 | INIT_LIST_HEAD(&vm->va); | |
c21b328e AD |
935 | /* SI requires equal sized PTs for all VMs, so always set |
936 | * last_pfn to max_pfn. cayman allows variable sized | |
937 | * pts so we can grow then as needed. Once we switch | |
938 | * to two level pts we can unify this again. | |
939 | */ | |
940 | if (rdev->family >= CHIP_TAHITI) | |
941 | vm->last_pfn = rdev->vm_manager.max_pfn; | |
942 | else | |
943 | vm->last_pfn = 0; | |
721604a1 JG |
944 | /* map the ib pool buffer at 0 in virtual address space, set |
945 | * read only | |
946 | */ | |
c507f7ef | 947 | r = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo, 0, |
721604a1 JG |
948 | RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); |
949 | return r; | |
950 | } | |
951 | ||
09db8644 | 952 | /** |
f59abbf2 | 953 | * radeon_vm_fini - tear down a vm instance |
09db8644 AD |
954 | * |
955 | * @rdev: radeon_device pointer | |
956 | * @vm: requested vm | |
957 | * | |
958 | * Tear down @vm (cayman+). | |
959 | * Unbind the VM and remove all bos from the vm bo list | |
960 | */ | |
721604a1 JG |
961 | void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) |
962 | { | |
963 | struct radeon_bo_va *bo_va, *tmp; | |
964 | int r; | |
965 | ||
36ff39c4 | 966 | mutex_lock(&rdev->vm_manager.lock); |
bb409155 | 967 | mutex_lock(&vm->mutex); |
721604a1 | 968 | radeon_vm_unbind_locked(rdev, vm); |
36ff39c4 | 969 | mutex_unlock(&rdev->vm_manager.lock); |
721604a1 | 970 | |
e43b5ec0 JG |
971 | /* remove all bo at this point non are busy any more because unbind |
972 | * waited for the last vm fence to signal | |
973 | */ | |
c507f7ef | 974 | r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); |
721604a1 | 975 | if (!r) { |
c507f7ef | 976 | bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); |
721604a1 JG |
977 | list_del_init(&bo_va->bo_list); |
978 | list_del_init(&bo_va->vm_list); | |
e43b5ec0 | 979 | radeon_fence_unref(&bo_va->fence); |
c507f7ef | 980 | radeon_bo_unreserve(rdev->ring_tmp_bo.bo); |
721604a1 JG |
981 | kfree(bo_va); |
982 | } | |
983 | if (!list_empty(&vm->va)) { | |
984 | dev_err(rdev->dev, "still active bo inside vm\n"); | |
985 | } | |
986 | list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { | |
987 | list_del_init(&bo_va->vm_list); | |
988 | r = radeon_bo_reserve(bo_va->bo, false); | |
989 | if (!r) { | |
990 | list_del_init(&bo_va->bo_list); | |
e43b5ec0 | 991 | radeon_fence_unref(&bo_va->fence); |
721604a1 JG |
992 | radeon_bo_unreserve(bo_va->bo); |
993 | kfree(bo_va); | |
994 | } | |
995 | } | |
996 | mutex_unlock(&vm->mutex); | |
997 | } |