]>
Commit | Line | Data |
---|---|---|
5be73b69 JG |
1 | /* |
2 | * Copyright 2018 Red Hat Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | #include "nouveau_dmem.h" | |
23 | #include "nouveau_drv.h" | |
24 | #include "nouveau_chan.h" | |
25 | #include "nouveau_dma.h" | |
26 | #include "nouveau_mem.h" | |
27 | #include "nouveau_bo.h" | |
28 | ||
29 | #include <nvif/class.h> | |
30 | #include <nvif/object.h> | |
31 | #include <nvif/if500b.h> | |
32 | #include <nvif/if900b.h> | |
33 | ||
34 | #include <linux/sched/mm.h> | |
35 | #include <linux/hmm.h> | |
36 | ||
37 | /* | |
38 | * FIXME: this is ugly right now we are using TTM to allocate vram and we pin | |
39 | * it in vram while in use. We likely want to overhaul memory management for | |
40 | * nouveau to be more page like (not necessarily with system page size but a | |
41 | * bigger page size) at lowest level and have some shim layer on top that would | |
42 | * provide the same functionality as TTM. | |
43 | */ | |
44 | #define DMEM_CHUNK_SIZE (2UL << 20) | |
45 | #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) | |
46 | ||
47 | struct nouveau_migrate; | |
48 | ||
6c762d1b BS |
49 | enum nouveau_aper { |
50 | NOUVEAU_APER_VIRT, | |
51 | NOUVEAU_APER_VRAM, | |
52 | NOUVEAU_APER_HOST, | |
53 | }; | |
54 | ||
5be73b69 | 55 | typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages, |
6c762d1b BS |
56 | enum nouveau_aper, u64 dst_addr, |
57 | enum nouveau_aper, u64 src_addr); | |
5be73b69 JG |
58 | |
59 | struct nouveau_dmem_chunk { | |
60 | struct list_head list; | |
61 | struct nouveau_bo *bo; | |
62 | struct nouveau_drm *drm; | |
63 | unsigned long pfn_first; | |
64 | unsigned long callocated; | |
65 | unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)]; | |
5be73b69 JG |
66 | spinlock_t lock; |
67 | }; | |
68 | ||
69 | struct nouveau_dmem_migrate { | |
70 | nouveau_migrate_copy_t copy_func; | |
71 | struct nouveau_channel *chan; | |
72 | }; | |
73 | ||
74 | struct nouveau_dmem { | |
75 | struct hmm_devmem *devmem; | |
76 | struct nouveau_dmem_migrate migrate; | |
77 | struct list_head chunk_free; | |
78 | struct list_head chunk_full; | |
79 | struct list_head chunk_empty; | |
80 | struct mutex mutex; | |
81 | }; | |
82 | ||
5be73b69 JG |
83 | struct nouveau_dmem_fault { |
84 | struct nouveau_drm *drm; | |
85 | struct nouveau_fence *fence; | |
a788ade4 BS |
86 | dma_addr_t *dma; |
87 | unsigned long npages; | |
5be73b69 JG |
88 | }; |
89 | ||
90 | struct nouveau_migrate { | |
91 | struct vm_area_struct *vma; | |
92 | struct nouveau_drm *drm; | |
93 | struct nouveau_fence *fence; | |
94 | unsigned long npages; | |
a788ade4 BS |
95 | dma_addr_t *dma; |
96 | unsigned long dma_nr; | |
5be73b69 JG |
97 | }; |
98 | ||
5be73b69 JG |
99 | static void |
100 | nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page) | |
101 | { | |
102 | struct nouveau_dmem_chunk *chunk; | |
5be73b69 JG |
103 | unsigned long idx; |
104 | ||
105 | chunk = (void *)hmm_devmem_page_get_drvdata(page); | |
106 | idx = page_to_pfn(page) - chunk->pfn_first; | |
5be73b69 JG |
107 | |
108 | /* | |
109 | * FIXME: | |
110 | * | |
111 | * This is really a bad example, we need to overhaul nouveau memory | |
112 | * management to be more page focus and allow lighter locking scheme | |
113 | * to be use in the process. | |
114 | */ | |
115 | spin_lock(&chunk->lock); | |
116 | clear_bit(idx, chunk->bitmap); | |
117 | WARN_ON(!chunk->callocated); | |
118 | chunk->callocated--; | |
119 | /* | |
120 | * FIXME when chunk->callocated reach 0 we should add the chunk to | |
121 | * a reclaim list so that it can be freed in case of memory pressure. | |
122 | */ | |
123 | spin_unlock(&chunk->lock); | |
124 | } | |
125 | ||
126 | static void | |
127 | nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, | |
128 | const unsigned long *src_pfns, | |
129 | unsigned long *dst_pfns, | |
130 | unsigned long start, | |
131 | unsigned long end, | |
132 | void *private) | |
133 | { | |
134 | struct nouveau_dmem_fault *fault = private; | |
135 | struct nouveau_drm *drm = fault->drm; | |
a788ade4 BS |
136 | struct device *dev = drm->dev->dev; |
137 | unsigned long addr, i, npages = 0; | |
5be73b69 JG |
138 | nouveau_migrate_copy_t copy; |
139 | int ret; | |
140 | ||
141 | ||
142 | /* First allocate new memory */ | |
143 | for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { | |
144 | struct page *dpage, *spage; | |
145 | ||
146 | dst_pfns[i] = 0; | |
147 | spage = migrate_pfn_to_page(src_pfns[i]); | |
148 | if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) | |
149 | continue; | |
150 | ||
151 | dpage = hmm_vma_alloc_locked_page(vma, addr); | |
152 | if (!dpage) { | |
153 | dst_pfns[i] = MIGRATE_PFN_ERROR; | |
154 | continue; | |
155 | } | |
156 | ||
157 | dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | | |
158 | MIGRATE_PFN_LOCKED; | |
159 | npages++; | |
160 | } | |
161 | ||
a788ade4 BS |
162 | /* Allocate storage for DMA addresses, so we can unmap later. */ |
163 | fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL); | |
164 | if (!fault->dma) | |
5be73b69 JG |
165 | goto error; |
166 | ||
167 | /* Copy things over */ | |
168 | copy = drm->dmem->migrate.copy_func; | |
a788ade4 | 169 | for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { |
5be73b69 JG |
170 | struct nouveau_dmem_chunk *chunk; |
171 | struct page *spage, *dpage; | |
172 | u64 src_addr, dst_addr; | |
173 | ||
174 | dpage = migrate_pfn_to_page(dst_pfns[i]); | |
175 | if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) | |
176 | continue; | |
177 | ||
5be73b69 JG |
178 | spage = migrate_pfn_to_page(src_pfns[i]); |
179 | if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { | |
180 | dst_pfns[i] = MIGRATE_PFN_ERROR; | |
181 | __free_page(dpage); | |
182 | continue; | |
183 | } | |
184 | ||
a788ade4 BS |
185 | fault->dma[fault->npages] = |
186 | dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE, | |
187 | PCI_DMA_BIDIRECTIONAL, | |
188 | DMA_ATTR_SKIP_CPU_SYNC); | |
189 | if (dma_mapping_error(dev, fault->dma[fault->npages])) { | |
190 | dst_pfns[i] = MIGRATE_PFN_ERROR; | |
191 | __free_page(dpage); | |
192 | continue; | |
193 | } | |
194 | ||
195 | dst_addr = fault->dma[fault->npages++]; | |
196 | ||
5be73b69 JG |
197 | chunk = (void *)hmm_devmem_page_get_drvdata(spage); |
198 | src_addr = page_to_pfn(spage) - chunk->pfn_first; | |
fd5e9856 | 199 | src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset; |
5be73b69 | 200 | |
a788ade4 | 201 | ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr, |
fd5e9856 | 202 | NOUVEAU_APER_VRAM, src_addr); |
5be73b69 JG |
203 | if (ret) { |
204 | dst_pfns[i] = MIGRATE_PFN_ERROR; | |
205 | __free_page(dpage); | |
206 | continue; | |
207 | } | |
208 | } | |
209 | ||
210 | nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence); | |
211 | ||
212 | return; | |
213 | ||
214 | error: | |
215 | for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { | |
216 | struct page *page; | |
217 | ||
218 | if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) | |
219 | continue; | |
220 | ||
221 | page = migrate_pfn_to_page(dst_pfns[i]); | |
222 | dst_pfns[i] = MIGRATE_PFN_ERROR; | |
223 | if (page == NULL) | |
224 | continue; | |
225 | ||
226 | __free_page(page); | |
227 | } | |
228 | } | |
229 | ||
230 | void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, | |
231 | const unsigned long *src_pfns, | |
232 | const unsigned long *dst_pfns, | |
233 | unsigned long start, | |
234 | unsigned long end, | |
235 | void *private) | |
236 | { | |
237 | struct nouveau_dmem_fault *fault = private; | |
238 | struct nouveau_drm *drm = fault->drm; | |
239 | ||
240 | if (fault->fence) { | |
241 | nouveau_fence_wait(fault->fence, true, false); | |
242 | nouveau_fence_unref(&fault->fence); | |
243 | } else { | |
244 | /* | |
245 | * FIXME wait for channel to be IDLE before calling finalizing | |
246 | * the hmem object below (nouveau_migrate_hmem_fini()). | |
247 | */ | |
248 | } | |
a788ade4 BS |
249 | |
250 | while (fault->npages--) { | |
251 | dma_unmap_page(drm->dev->dev, fault->dma[fault->npages], | |
252 | PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); | |
253 | } | |
254 | kfree(fault->dma); | |
5be73b69 JG |
255 | } |
256 | ||
257 | static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { | |
258 | .alloc_and_copy = nouveau_dmem_fault_alloc_and_copy, | |
259 | .finalize_and_map = nouveau_dmem_fault_finalize_and_map, | |
260 | }; | |
261 | ||
b57e622e | 262 | static vm_fault_t |
5be73b69 JG |
263 | nouveau_dmem_fault(struct hmm_devmem *devmem, |
264 | struct vm_area_struct *vma, | |
265 | unsigned long addr, | |
266 | const struct page *page, | |
267 | unsigned int flags, | |
268 | pmd_t *pmdp) | |
269 | { | |
270 | struct drm_device *drm_dev = dev_get_drvdata(devmem->device); | |
271 | unsigned long src[1] = {0}, dst[1] = {0}; | |
272 | struct nouveau_dmem_fault fault = {0}; | |
273 | int ret; | |
274 | ||
275 | ||
276 | ||
277 | /* | |
278 | * FIXME what we really want is to find some heuristic to migrate more | |
279 | * than just one page on CPU fault. When such fault happens it is very | |
280 | * likely that more surrounding page will CPU fault too. | |
281 | */ | |
282 | fault.drm = nouveau_drm(drm_dev); | |
283 | ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vma, addr, | |
284 | addr + PAGE_SIZE, src, dst, &fault); | |
285 | if (ret) | |
286 | return VM_FAULT_SIGBUS; | |
287 | ||
288 | if (dst[0] == MIGRATE_PFN_ERROR) | |
289 | return VM_FAULT_SIGBUS; | |
290 | ||
291 | return 0; | |
292 | } | |
293 | ||
294 | static const struct hmm_devmem_ops | |
295 | nouveau_dmem_devmem_ops = { | |
296 | .free = nouveau_dmem_free, | |
297 | .fault = nouveau_dmem_fault, | |
298 | }; | |
299 | ||
300 | static int | |
301 | nouveau_dmem_chunk_alloc(struct nouveau_drm *drm) | |
302 | { | |
5be73b69 JG |
303 | struct nouveau_dmem_chunk *chunk; |
304 | int ret; | |
305 | ||
306 | if (drm->dmem == NULL) | |
307 | return -EINVAL; | |
308 | ||
309 | mutex_lock(&drm->dmem->mutex); | |
310 | chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, | |
311 | struct nouveau_dmem_chunk, | |
312 | list); | |
313 | if (chunk == NULL) { | |
314 | mutex_unlock(&drm->dmem->mutex); | |
315 | return -ENOMEM; | |
316 | } | |
317 | ||
318 | list_del(&chunk->list); | |
319 | mutex_unlock(&drm->dmem->mutex); | |
320 | ||
5be73b69 JG |
321 | ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0, |
322 | TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL, | |
323 | &chunk->bo); | |
324 | if (ret) | |
325 | goto out; | |
326 | ||
327 | ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); | |
328 | if (ret) { | |
329 | nouveau_bo_ref(NULL, &chunk->bo); | |
330 | goto out; | |
331 | } | |
332 | ||
5be73b69 JG |
333 | bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES); |
334 | spin_lock_init(&chunk->lock); | |
335 | ||
336 | out: | |
337 | mutex_lock(&drm->dmem->mutex); | |
338 | if (chunk->bo) | |
339 | list_add(&chunk->list, &drm->dmem->chunk_empty); | |
340 | else | |
341 | list_add_tail(&chunk->list, &drm->dmem->chunk_empty); | |
342 | mutex_unlock(&drm->dmem->mutex); | |
343 | ||
344 | return ret; | |
345 | } | |
346 | ||
347 | static struct nouveau_dmem_chunk * | |
348 | nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm) | |
349 | { | |
350 | struct nouveau_dmem_chunk *chunk; | |
351 | ||
352 | chunk = list_first_entry_or_null(&drm->dmem->chunk_free, | |
353 | struct nouveau_dmem_chunk, | |
354 | list); | |
355 | if (chunk) | |
356 | return chunk; | |
357 | ||
358 | chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, | |
359 | struct nouveau_dmem_chunk, | |
360 | list); | |
361 | if (chunk->bo) | |
362 | return chunk; | |
363 | ||
364 | return NULL; | |
365 | } | |
366 | ||
367 | static int | |
368 | nouveau_dmem_pages_alloc(struct nouveau_drm *drm, | |
369 | unsigned long npages, | |
370 | unsigned long *pages) | |
371 | { | |
372 | struct nouveau_dmem_chunk *chunk; | |
373 | unsigned long c; | |
374 | int ret; | |
375 | ||
376 | memset(pages, 0xff, npages * sizeof(*pages)); | |
377 | ||
378 | mutex_lock(&drm->dmem->mutex); | |
379 | for (c = 0; c < npages;) { | |
380 | unsigned long i; | |
381 | ||
382 | chunk = nouveau_dmem_chunk_first_free_locked(drm); | |
383 | if (chunk == NULL) { | |
384 | mutex_unlock(&drm->dmem->mutex); | |
385 | ret = nouveau_dmem_chunk_alloc(drm); | |
386 | if (ret) { | |
387 | if (c) | |
388 | break; | |
389 | return ret; | |
390 | } | |
391 | continue; | |
392 | } | |
393 | ||
394 | spin_lock(&chunk->lock); | |
395 | i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES); | |
396 | while (i < DMEM_CHUNK_NPAGES && c < npages) { | |
397 | pages[c] = chunk->pfn_first + i; | |
398 | set_bit(i, chunk->bitmap); | |
399 | chunk->callocated++; | |
400 | c++; | |
401 | ||
402 | i = find_next_zero_bit(chunk->bitmap, | |
403 | DMEM_CHUNK_NPAGES, i); | |
404 | } | |
405 | spin_unlock(&chunk->lock); | |
406 | } | |
407 | mutex_unlock(&drm->dmem->mutex); | |
408 | ||
409 | return 0; | |
410 | } | |
411 | ||
412 | static struct page * | |
413 | nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm) | |
414 | { | |
415 | unsigned long pfns[1]; | |
416 | struct page *page; | |
417 | int ret; | |
418 | ||
419 | /* FIXME stop all the miss-match API ... */ | |
420 | ret = nouveau_dmem_pages_alloc(drm, 1, pfns); | |
421 | if (ret) | |
422 | return NULL; | |
423 | ||
424 | page = pfn_to_page(pfns[0]); | |
425 | get_page(page); | |
426 | lock_page(page); | |
427 | return page; | |
428 | } | |
429 | ||
430 | static void | |
431 | nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page) | |
432 | { | |
433 | unlock_page(page); | |
434 | put_page(page); | |
435 | } | |
436 | ||
437 | void | |
438 | nouveau_dmem_resume(struct nouveau_drm *drm) | |
439 | { | |
440 | struct nouveau_dmem_chunk *chunk; | |
441 | int ret; | |
442 | ||
443 | if (drm->dmem == NULL) | |
444 | return; | |
445 | ||
446 | mutex_lock(&drm->dmem->mutex); | |
447 | list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { | |
448 | ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); | |
449 | /* FIXME handle pin failure */ | |
450 | WARN_ON(ret); | |
451 | } | |
452 | list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { | |
453 | ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); | |
454 | /* FIXME handle pin failure */ | |
455 | WARN_ON(ret); | |
456 | } | |
5be73b69 JG |
457 | mutex_unlock(&drm->dmem->mutex); |
458 | } | |
459 | ||
460 | void | |
461 | nouveau_dmem_suspend(struct nouveau_drm *drm) | |
462 | { | |
463 | struct nouveau_dmem_chunk *chunk; | |
464 | ||
465 | if (drm->dmem == NULL) | |
466 | return; | |
467 | ||
468 | mutex_lock(&drm->dmem->mutex); | |
469 | list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { | |
470 | nouveau_bo_unpin(chunk->bo); | |
471 | } | |
472 | list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { | |
473 | nouveau_bo_unpin(chunk->bo); | |
474 | } | |
5be73b69 JG |
475 | mutex_unlock(&drm->dmem->mutex); |
476 | } | |
477 | ||
478 | void | |
479 | nouveau_dmem_fini(struct nouveau_drm *drm) | |
480 | { | |
5be73b69 JG |
481 | struct nouveau_dmem_chunk *chunk, *tmp; |
482 | ||
483 | if (drm->dmem == NULL) | |
484 | return; | |
485 | ||
486 | mutex_lock(&drm->dmem->mutex); | |
487 | ||
488 | WARN_ON(!list_empty(&drm->dmem->chunk_free)); | |
489 | WARN_ON(!list_empty(&drm->dmem->chunk_full)); | |
490 | ||
491 | list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) { | |
492 | if (chunk->bo) { | |
493 | nouveau_bo_unpin(chunk->bo); | |
494 | nouveau_bo_ref(NULL, &chunk->bo); | |
495 | } | |
5be73b69 JG |
496 | list_del(&chunk->list); |
497 | kfree(chunk); | |
498 | } | |
499 | ||
500 | mutex_unlock(&drm->dmem->mutex); | |
501 | } | |
502 | ||
503 | static int | |
504 | nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages, | |
6c762d1b BS |
505 | enum nouveau_aper dst_aper, u64 dst_addr, |
506 | enum nouveau_aper src_aper, u64 src_addr) | |
5be73b69 JG |
507 | { |
508 | struct nouveau_channel *chan = drm->dmem->migrate.chan; | |
6c762d1b BS |
509 | u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ | |
510 | (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ | | |
511 | (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ | | |
512 | (1 << 2) /* FLUSH_ENABLE_TRUE. */ | | |
513 | (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */; | |
5be73b69 JG |
514 | int ret; |
515 | ||
6c762d1b | 516 | ret = RING_SPACE(chan, 13); |
5be73b69 JG |
517 | if (ret) |
518 | return ret; | |
519 | ||
6c762d1b BS |
520 | if (src_aper != NOUVEAU_APER_VIRT) { |
521 | switch (src_aper) { | |
522 | case NOUVEAU_APER_VRAM: | |
523 | BEGIN_IMC0(chan, NvSubCopy, 0x0260, 0); | |
524 | break; | |
525 | case NOUVEAU_APER_HOST: | |
526 | BEGIN_IMC0(chan, NvSubCopy, 0x0260, 1); | |
527 | break; | |
528 | default: | |
529 | return -EINVAL; | |
530 | } | |
531 | launch_dma |= 0x00001000; /* SRC_TYPE_PHYSICAL. */ | |
532 | } | |
533 | ||
534 | if (dst_aper != NOUVEAU_APER_VIRT) { | |
535 | switch (dst_aper) { | |
536 | case NOUVEAU_APER_VRAM: | |
537 | BEGIN_IMC0(chan, NvSubCopy, 0x0264, 0); | |
538 | break; | |
539 | case NOUVEAU_APER_HOST: | |
540 | BEGIN_IMC0(chan, NvSubCopy, 0x0264, 1); | |
541 | break; | |
542 | default: | |
543 | return -EINVAL; | |
544 | } | |
545 | launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */ | |
546 | } | |
547 | ||
5be73b69 JG |
548 | BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8); |
549 | OUT_RING (chan, upper_32_bits(src_addr)); | |
550 | OUT_RING (chan, lower_32_bits(src_addr)); | |
551 | OUT_RING (chan, upper_32_bits(dst_addr)); | |
552 | OUT_RING (chan, lower_32_bits(dst_addr)); | |
553 | OUT_RING (chan, PAGE_SIZE); | |
554 | OUT_RING (chan, PAGE_SIZE); | |
555 | OUT_RING (chan, PAGE_SIZE); | |
556 | OUT_RING (chan, npages); | |
6c762d1b BS |
557 | BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1); |
558 | OUT_RING (chan, launch_dma); | |
5be73b69 JG |
559 | return 0; |
560 | } | |
561 | ||
562 | static int | |
563 | nouveau_dmem_migrate_init(struct nouveau_drm *drm) | |
564 | { | |
565 | switch (drm->ttm.copy.oclass) { | |
566 | case PASCAL_DMA_COPY_A: | |
567 | case PASCAL_DMA_COPY_B: | |
568 | case VOLTA_DMA_COPY_A: | |
569 | case TURING_DMA_COPY_A: | |
570 | drm->dmem->migrate.copy_func = nvc0b5_migrate_copy; | |
571 | drm->dmem->migrate.chan = drm->ttm.chan; | |
572 | return 0; | |
573 | default: | |
574 | break; | |
575 | } | |
576 | return -ENODEV; | |
577 | } | |
578 | ||
579 | void | |
580 | nouveau_dmem_init(struct nouveau_drm *drm) | |
581 | { | |
582 | struct device *device = drm->dev->dev; | |
583 | unsigned long i, size; | |
584 | int ret; | |
585 | ||
586 | /* This only make sense on PASCAL or newer */ | |
587 | if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL) | |
588 | return; | |
589 | ||
590 | if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL))) | |
591 | return; | |
592 | ||
593 | mutex_init(&drm->dmem->mutex); | |
594 | INIT_LIST_HEAD(&drm->dmem->chunk_free); | |
595 | INIT_LIST_HEAD(&drm->dmem->chunk_full); | |
596 | INIT_LIST_HEAD(&drm->dmem->chunk_empty); | |
597 | ||
598 | size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE); | |
599 | ||
600 | /* Initialize migration dma helpers before registering memory */ | |
601 | ret = nouveau_dmem_migrate_init(drm); | |
602 | if (ret) { | |
603 | kfree(drm->dmem); | |
604 | drm->dmem = NULL; | |
605 | return; | |
606 | } | |
607 | ||
608 | /* | |
609 | * FIXME we need some kind of policy to decide how much VRAM we | |
610 | * want to register with HMM. For now just register everything | |
611 | * and latter if we want to do thing like over commit then we | |
612 | * could revisit this. | |
613 | */ | |
614 | drm->dmem->devmem = hmm_devmem_add(&nouveau_dmem_devmem_ops, | |
615 | device, size); | |
18ec3c12 | 616 | if (IS_ERR(drm->dmem->devmem)) { |
5be73b69 JG |
617 | kfree(drm->dmem); |
618 | drm->dmem = NULL; | |
619 | return; | |
620 | } | |
621 | ||
622 | for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) { | |
623 | struct nouveau_dmem_chunk *chunk; | |
624 | struct page *page; | |
625 | unsigned long j; | |
626 | ||
627 | chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); | |
628 | if (chunk == NULL) { | |
629 | nouveau_dmem_fini(drm); | |
630 | return; | |
631 | } | |
632 | ||
633 | chunk->drm = drm; | |
634 | chunk->pfn_first = drm->dmem->devmem->pfn_first; | |
635 | chunk->pfn_first += (i * DMEM_CHUNK_NPAGES); | |
636 | list_add_tail(&chunk->list, &drm->dmem->chunk_empty); | |
637 | ||
638 | page = pfn_to_page(chunk->pfn_first); | |
639 | for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) { | |
640 | hmm_devmem_page_set_drvdata(page, (long)chunk); | |
641 | } | |
642 | } | |
643 | ||
644 | NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20); | |
645 | } | |
646 | ||
647 | static void | |
648 | nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, | |
649 | const unsigned long *src_pfns, | |
650 | unsigned long *dst_pfns, | |
651 | unsigned long start, | |
652 | unsigned long end, | |
653 | void *private) | |
654 | { | |
655 | struct nouveau_migrate *migrate = private; | |
656 | struct nouveau_drm *drm = migrate->drm; | |
a788ade4 BS |
657 | struct device *dev = drm->dev->dev; |
658 | unsigned long addr, i, npages = 0; | |
5be73b69 JG |
659 | nouveau_migrate_copy_t copy; |
660 | int ret; | |
661 | ||
662 | /* First allocate new memory */ | |
663 | for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { | |
664 | struct page *dpage, *spage; | |
665 | ||
666 | dst_pfns[i] = 0; | |
667 | spage = migrate_pfn_to_page(src_pfns[i]); | |
668 | if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) | |
669 | continue; | |
670 | ||
671 | dpage = nouveau_dmem_page_alloc_locked(drm); | |
672 | if (!dpage) | |
673 | continue; | |
674 | ||
675 | dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | | |
676 | MIGRATE_PFN_LOCKED | | |
677 | MIGRATE_PFN_DEVICE; | |
678 | npages++; | |
679 | } | |
680 | ||
681 | if (!npages) | |
682 | return; | |
683 | ||
a788ade4 BS |
684 | /* Allocate storage for DMA addresses, so we can unmap later. */ |
685 | migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL); | |
686 | if (!migrate->dma) | |
5be73b69 JG |
687 | goto error; |
688 | ||
689 | /* Copy things over */ | |
690 | copy = drm->dmem->migrate.copy_func; | |
a788ade4 | 691 | for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { |
5be73b69 JG |
692 | struct nouveau_dmem_chunk *chunk; |
693 | struct page *spage, *dpage; | |
694 | u64 src_addr, dst_addr; | |
695 | ||
696 | dpage = migrate_pfn_to_page(dst_pfns[i]); | |
697 | if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) | |
698 | continue; | |
699 | ||
700 | chunk = (void *)hmm_devmem_page_get_drvdata(dpage); | |
701 | dst_addr = page_to_pfn(dpage) - chunk->pfn_first; | |
fd5e9856 | 702 | dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset; |
5be73b69 JG |
703 | |
704 | spage = migrate_pfn_to_page(src_pfns[i]); | |
705 | if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { | |
706 | nouveau_dmem_page_free_locked(drm, dpage); | |
707 | dst_pfns[i] = 0; | |
708 | continue; | |
709 | } | |
710 | ||
a788ade4 BS |
711 | migrate->dma[migrate->dma_nr] = |
712 | dma_map_page_attrs(dev, spage, 0, PAGE_SIZE, | |
713 | PCI_DMA_BIDIRECTIONAL, | |
714 | DMA_ATTR_SKIP_CPU_SYNC); | |
715 | if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) { | |
716 | nouveau_dmem_page_free_locked(drm, dpage); | |
717 | dst_pfns[i] = 0; | |
718 | continue; | |
719 | } | |
720 | ||
721 | src_addr = migrate->dma[migrate->dma_nr++]; | |
5be73b69 | 722 | |
fd5e9856 | 723 | ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr, |
a788ade4 | 724 | NOUVEAU_APER_HOST, src_addr); |
5be73b69 JG |
725 | if (ret) { |
726 | nouveau_dmem_page_free_locked(drm, dpage); | |
727 | dst_pfns[i] = 0; | |
728 | continue; | |
729 | } | |
730 | } | |
731 | ||
732 | nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence); | |
733 | ||
734 | return; | |
735 | ||
736 | error: | |
737 | for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { | |
738 | struct page *page; | |
739 | ||
740 | if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) | |
741 | continue; | |
742 | ||
743 | page = migrate_pfn_to_page(dst_pfns[i]); | |
744 | dst_pfns[i] = MIGRATE_PFN_ERROR; | |
745 | if (page == NULL) | |
746 | continue; | |
747 | ||
748 | __free_page(page); | |
749 | } | |
750 | } | |
751 | ||
752 | void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, | |
753 | const unsigned long *src_pfns, | |
754 | const unsigned long *dst_pfns, | |
755 | unsigned long start, | |
756 | unsigned long end, | |
757 | void *private) | |
758 | { | |
759 | struct nouveau_migrate *migrate = private; | |
760 | struct nouveau_drm *drm = migrate->drm; | |
761 | ||
762 | if (migrate->fence) { | |
763 | nouveau_fence_wait(migrate->fence, true, false); | |
764 | nouveau_fence_unref(&migrate->fence); | |
765 | } else { | |
766 | /* | |
767 | * FIXME wait for channel to be IDLE before finalizing | |
768 | * the hmem object below (nouveau_migrate_hmem_fini()) ? | |
769 | */ | |
770 | } | |
a788ade4 BS |
771 | |
772 | while (migrate->dma_nr--) { | |
773 | dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr], | |
774 | PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); | |
775 | } | |
776 | kfree(migrate->dma); | |
5be73b69 JG |
777 | |
778 | /* | |
779 | * FIXME optimization: update GPU page table to point to newly | |
780 | * migrated memory. | |
781 | */ | |
782 | } | |
783 | ||
784 | static const struct migrate_vma_ops nouveau_dmem_migrate_ops = { | |
785 | .alloc_and_copy = nouveau_dmem_migrate_alloc_and_copy, | |
786 | .finalize_and_map = nouveau_dmem_migrate_finalize_and_map, | |
787 | }; | |
788 | ||
789 | int | |
790 | nouveau_dmem_migrate_vma(struct nouveau_drm *drm, | |
791 | struct vm_area_struct *vma, | |
792 | unsigned long start, | |
793 | unsigned long end) | |
794 | { | |
795 | unsigned long *src_pfns, *dst_pfns, npages; | |
796 | struct nouveau_migrate migrate = {0}; | |
797 | unsigned long i, c, max; | |
798 | int ret = 0; | |
799 | ||
800 | npages = (end - start) >> PAGE_SHIFT; | |
801 | max = min(SG_MAX_SINGLE_ALLOC, npages); | |
802 | src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); | |
803 | if (src_pfns == NULL) | |
804 | return -ENOMEM; | |
805 | dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); | |
806 | if (dst_pfns == NULL) { | |
807 | kfree(src_pfns); | |
808 | return -ENOMEM; | |
809 | } | |
810 | ||
811 | migrate.drm = drm; | |
812 | migrate.vma = vma; | |
813 | migrate.npages = npages; | |
814 | for (i = 0; i < npages; i += c) { | |
815 | unsigned long next; | |
816 | ||
817 | c = min(SG_MAX_SINGLE_ALLOC, npages); | |
818 | next = start + (c << PAGE_SHIFT); | |
819 | ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start, | |
820 | next, src_pfns, dst_pfns, &migrate); | |
821 | if (ret) | |
822 | goto out; | |
823 | start = next; | |
824 | } | |
825 | ||
826 | out: | |
827 | kfree(dst_pfns); | |
828 | kfree(src_pfns); | |
829 | return ret; | |
830 | } | |
831 | ||
832 | static inline bool | |
833 | nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) | |
834 | { | |
835 | if (!is_device_private_page(page)) | |
836 | return false; | |
837 | ||
838 | if (drm->dmem->devmem != page->pgmap->data) | |
839 | return false; | |
840 | ||
841 | return true; | |
842 | } | |
843 | ||
844 | void | |
845 | nouveau_dmem_convert_pfn(struct nouveau_drm *drm, | |
846 | struct hmm_range *range) | |
847 | { | |
848 | unsigned long i, npages; | |
849 | ||
850 | npages = (range->end - range->start) >> PAGE_SHIFT; | |
851 | for (i = 0; i < npages; ++i) { | |
852 | struct nouveau_dmem_chunk *chunk; | |
853 | struct page *page; | |
854 | uint64_t addr; | |
855 | ||
856 | page = hmm_pfn_to_page(range, range->pfns[i]); | |
857 | if (page == NULL) | |
858 | continue; | |
859 | ||
860 | if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) { | |
861 | continue; | |
862 | } | |
863 | ||
864 | if (!nouveau_dmem_page(drm, page)) { | |
865 | WARN(1, "Some unknown device memory !\n"); | |
866 | range->pfns[i] = 0; | |
867 | continue; | |
868 | } | |
869 | ||
870 | chunk = (void *)hmm_devmem_page_get_drvdata(page); | |
871 | addr = page_to_pfn(page) - chunk->pfn_first; | |
872 | addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT; | |
873 | ||
874 | range->pfns[i] &= ((1UL << range->pfn_shift) - 1); | |
875 | range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; | |
876 | } | |
877 | } |